From 36c4203cfaf65f0d181c5f563bc134ce8da21602 Mon Sep 17 00:00:00 2001 From: Michiel Cottaar <MichielCottaar@gmail.com> Date: Sat, 17 Feb 2018 15:16:04 +0000 Subject: [PATCH] update notebook --- talks/packages/packages.ipynb | 469 +++++++++++++++++++++++++++------- talks/packages/packages.md | 2 +- 2 files changed, 374 insertions(+), 97 deletions(-) diff --git a/talks/packages/packages.ipynb b/talks/packages/packages.ipynb index 8578a80..7d179c4 100644 --- a/talks/packages/packages.ipynb +++ b/talks/packages/packages.ipynb @@ -68,8 +68,8 @@ "source": [ "from scipy import optimize\n", "def costfunc(params):\n", - " return params[0] ** 2 * (params[1] - 3) ** 2 + (params[0] - 2) ** 2\n", - "optimize.minimize(costfunc, x0=[0, 0], method='l-bfgs-b')" + " return (params[0] - 3) ** 2\n", + "optimize.minimize(costfunc, x0=[0], method='l-bfgs-b')" ] }, { @@ -130,6 +130,91 @@ "- [Bokeh](https://bokeh.pydata.org/en/latest/) among many others: interactive plots in the browser (i.e., in javascript)\n", "\n", "## [Ipython](http://ipython.org/)/[Jupyter](https://jupyter.org/) notebook: interactive python environments\n", + "Supports:\n", + "- run code in multiple languages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "for name in python ruby ; do\n", + " echo $name\n", + "done" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- debugging" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import optimize\n", + "def costfunc(params):\n", + " return 1 / params[0] ** 2\n", + "optimize.minimize(costfunc, x0=[0], method='l-bfgs-b')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%debug" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- timing/profiling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%prun\n", + "plt.plot([0, 3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- getting help" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- [and much more...](https://ipython.readthedocs.io/en/stable/interactive/magics.html)\n", + "\n", + "The next generation is already out: [jupyterlab](https://jupyterlab.readthedocs.io/en/latest/)\n", + "\n", "There are many [useful extensions available](https://github.com/ipython-contrib/jupyter_contrib_nbextensions).\n", "\n", "## [Pandas](https://pandas.pydata.org/): Analyzing \"clean\" data\n", @@ -139,11 +224,11 @@ "- fast IO to many tabular formats\n", "- accurate handling of missing data\n", "- Many, many routines to handle data\n", - " - group by categorical data (i.e., male/female, or age groups)\n", - " - joining/merging data\n", + " - group by categorical data (e.g., male/female)\n", + " - joining/merging data (all SQL-like operations and much more)\n", " - time series support\n", "- statistical models through [statsmodels](http://www.statsmodels.org/stable/index.html)\n", - "- plotting though seaborn [seaborn](https://seaborn.pydata.org/)\n", + "- plotting though [seaborn](https://seaborn.pydata.org/)\n", "- Use [dask](https://dask.pydata.org/en/latest/) if your data is too big for memory (or if you want to run in parallel)\n", "\n", "You should also install `numexpr` and `bottleneck` for optimal performance.\n", @@ -356,7 +441,7 @@ "metadata": {}, "outputs": [], "source": [ - "%run test_argparse.py 3 8.5 -q" + "%run test_argparse.py 3 8.5" ] }, { @@ -430,7 +515,7 @@ "metadata": {}, "outputs": [], "source": [ - "%run test_gooey.py" + "!python.app test_gooey.py" ] }, { @@ -495,6 +580,10 @@ "metadata": {}, "outputs": [], "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "plt.ioff()\n", + "\n", "def plot_sine(amplitude, frequency):\n", " x = np.linspace(0, 2 * np.pi, 100)\n", " y = amplitude * np.sin(frequency * x)\n", @@ -509,7 +598,8 @@ "\n", "!mkdir plots\n", "amplitudes = [plot_sine(A, 1.) for A in [0.1, 0.3, 0.7, 1.0]]\n", - "frequencies = [plot_sine(1., F) for F in [1, 2, 3, 4, 5, 6]]" + "frequencies = [plot_sine(1., F) for F in [1, 2, 3, 4, 5, 6]]\n", + "plt.ion()" ] }, { @@ -564,7 +654,6 @@ "outputs": [], "source": [ "%%writefile wx_hello_world.py\n", - "#!/usr/bin/env python\n", "\"\"\"\n", "Hello World, but with more meat.\n", "\"\"\"\n", @@ -671,7 +760,7 @@ "metadata": {}, "outputs": [], "source": [ - "%run wx_hello_world.py" + "!python.app wx_hello_world.py" ] }, { @@ -683,6 +772,102 @@ "- theano/tensorflow/pytorch\n", " - keras\n", "\n", + "## [pymc3](http://docs.pymc.io/): Pobabilstic programming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Initialize random number generator\n", + "np.random.seed(123)\n", + "\n", + "# True parameter values\n", + "alpha, sigma = 1, 1\n", + "beta = [1, 2.5]\n", + "\n", + "# Size of dataset\n", + "size = 100\n", + "\n", + "# Predictor variable\n", + "X1 = np.random.randn(size)\n", + "X2 = np.random.randn(size) * 0.2\n", + "\n", + "# Simulate outcome variable\n", + "Y = alpha + beta[0]*X1 + beta[1]*X2 + np.random.randn(size)*sigma" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pymc3 as pm\n", + "basic_model = pm.Model()\n", + "\n", + "with basic_model:\n", + "\n", + " # Priors for unknown model parameters\n", + " alpha = pm.Normal('alpha', mu=0, sd=10)\n", + " beta = pm.Normal('beta', mu=0, sd=10, shape=2)\n", + " sigma = pm.HalfNormal('sigma', sd=1)\n", + "\n", + " # Expected value of outcome\n", + " mu = alpha + beta[0]*X1 + beta[1]*X2\n", + "\n", + " # Likelihood (sampling distribution) of observations\n", + " Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with basic_model:\n", + "\n", + " # obtain starting values via MAP\n", + " start = pm.find_MAP(fmin=optimize.fmin_powell)\n", + "\n", + " # instantiate sampler\n", + " step = pm.Slice()\n", + "\n", + " # draw 5000 posterior samples\n", + " trace = pm.sample(5000, step=step, start=start)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = pm.traceplot(trace)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pm.summary(trace)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternative: [pystan](https://pystan.readthedocs.io/en/latest/): wrapper around the [Stan](http://mc-stan.org/users/) probabilistic programming language.\n", + "\n", + "\n", "## [Pycuda](https://documen.tician.de/pycuda/): Programming the GPU\n", "Wrapper around [Cuda](https://developer.nvidia.com/cuda-zone).\n", "The alternative [Pyopencl](https://documen.tician.de/pyopencl/) provides a very similar wrapper around [OpenCL](https://www.khronos.org/opencl/)." @@ -725,89 +910,124 @@ "source": [ "Also see [pyopenGL](http://pyopengl.sourceforge.net/): graphics programming in python (used in FSLeyes)\n", "## Testing\n", - "- [unittest](https://docs.python.org/3.6/library/unittest.html): python built-in testing\n", - "> ```\n", - "> import unittest\n", - ">\n", - "> class TestStringMethods(unittest.TestCase):\n", - ">\n", - "> def test_upper(self):\n", - "> self.assertEqual('foo'.upper(), 'FOO')\n", - ">\n", - "> def test_isupper(self):\n", - "> self.assertTrue('FOO'.isupper())\n", - "> self.assertFalse('Foo'.isupper())\n", - ">\n", - "> def test_split(self):\n", - "> s = 'hello world'\n", - "> self.assertEqual(s.split(), ['hello', 'world'])\n", - "> # check that s.split fails when the separator is not a string\n", - "> with self.assertRaises(TypeError):\n", - "> s.split(2)\n", - ">\n", - "> if __name__ == '__main__':\n", - "> unittest.main()\n", - "> ```\n", - "- [doctest](https://docs.python.org/3.6/library/doctest.html): checks the example usage in the documentation\n", - "> ```\n", - "> def factorial(n):\n", - "> \"\"\"Return the factorial of n, an exact integer >= 0.\n", - ">\n", - "> >>> [factorial(n) for n in range(6)]\n", - "> [1, 1, 2, 6, 24, 120]\n", - "> >>> factorial(30)\n", - "> 265252859812191058636308480000000\n", - "> >>> factorial(-1)\n", - "> Traceback (most recent call last):\n", - "> ...\n", - "> ValueError: n must be >= 0\n", - ">\n", - "> Factorials of floats are OK, but the float must be an exact integer:\n", - "> >>> factorial(30.1)\n", - "> Traceback (most recent call last):\n", - "> ...\n", - "> ValueError: n must be exact integer\n", - "> >>> factorial(30.0)\n", - "> 265252859812191058636308480000000\n", - ">\n", - "> It must also not be ridiculously large:\n", - "> >>> factorial(1e100)\n", - "> Traceback (most recent call last):\n", - "> ...\n", - "> OverflowError: n too large\n", - "> \"\"\"\n", - ">\n", - "> import math\n", - "> if not n >= 0:\n", - "> raise ValueError(\"n must be >= 0\")\n", - "> if math.floor(n) != n:\n", - "> raise ValueError(\"n must be exact integer\")\n", - "> if n+1 == n: # catch a value like 1e300\n", - "> raise OverflowError(\"n too large\")\n", - "> result = 1\n", - "> factor = 2\n", - "> while factor <= n:\n", - "> result *= factor\n", - "> factor += 1\n", - "> return result\n", - ">\n", - ">\n", - "> if __name__ == \"__main__\":\n", - "> import doctest\n", - "> doctest.testmod()\n", - "> ```\n", + "- [unittest](https://docs.python.org/3.6/library/unittest.html): python built-in testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import unittest\n", + "\n", + "class TestStringMethods(unittest.TestCase):\n", + "\n", + " def test_upper(self):\n", + " self.assertEqual('foo'.upper(), 'FOO')\n", + "\n", + " def test_isupper(self):\n", + " self.assertTrue('FOO'.isupper())\n", + " self.assertFalse('Foo'.isupper())\n", + "\n", + " def test_split(self):\n", + " s = 'hello world'\n", + " self.assertEqual(s.split(), ['hello', 'world'])\n", + " # check that s.split fails when the separator is not a string\n", + " with self.assertRaises(TypeError):\n", + " s.split(2)\n", + "\n", + "if __name__ == '__main__':\n", + " unittest.main()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- [doctest](https://docs.python.org/3.6/library/doctest.html): checks the example usage in the documentation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def factorial(n):\n", + " \"\"\"Return the factorial of n, an exact integer >= 0.\n", + "\n", + " >>> [factorial(n) for n in range(6)]\n", + " [1, 1, 2, 6, 24, 120]\n", + " >>> factorial(30)\n", + " 265252859812191058636308480000000\n", + " >>> factorial(-1)\n", + " Traceback (most recent call last):\n", + " ...\n", + " ValueError: n must be >= 0\n", + "\n", + " Factorials of floats are OK, but the float must be an exact integer:\n", + " >>> factorial(30.1)\n", + " Traceback (most recent call last):\n", + " ...\n", + " ValueError: n must be exact integer\n", + " >>> factorial(30.0)\n", + " 265252859812191058636308480000000\n", + "\n", + " It must also not be ridiculously large:\n", + " >>> factorial(1e100)\n", + " Traceback (most recent call last):\n", + " ...\n", + " OverflowError: n too large\n", + " \"\"\"\n", + "\n", + " import math\n", + " if not n >= 0:\n", + " raise ValueError(\"n must be >= 0\")\n", + " if math.floor(n) != n:\n", + " raise ValueError(\"n must be exact integer\")\n", + " if n+1 == n: # catch a value like 1e300\n", + " raise OverflowError(\"n too large\")\n", + " result = 1\n", + " factor = 2\n", + " while factor <= n:\n", + " result *= factor\n", + " factor += 1\n", + " return result\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " import doctest\n", + " doctest.testmod()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "Two external packages provide more convenient unit tests:\n", "- [py.test](https://docs.pytest.org/en/latest/)\n", - "- [nose2](http://nose2.readthedocs.io/en/latest/usage.html)\n", - "> ```\n", - "> # content of test_sample.py\n", - "> def inc(x):\n", - "> return x + 1\n", - ">\n", - "> def test_answer():\n", - "> assert inc(3) == 5\n", - "> ```\n", + "- [nose2](http://nose2.readthedocs.io/en/latest/usage.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# content of test_sample.py\n", + "def inc(x):\n", + " return x + 1\n", "\n", + "def test_answer():\n", + " assert inc(3) == 5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "- [coverage](https://coverage.readthedocs.io/en/coverage-4.5.1/): measures which part of the code is covered by the tests\n", "\n", "## Linters\n", @@ -815,7 +1035,38 @@ "- [pylint](https://pypi.python.org/pypi/pylint): most extensive linter\n", "- [pyflake](https://pypi.python.org/pypi/pyflakes): if you think pylint is too strict\n", "- [pep8](https://pypi.python.org/pypi/pep8): just checks for style errors\n", - "- [mypy](http://mypy-lang.org/): adding explicit typing to python\n", + "### Optional static typing\n", + "- Document how your method/function should be called\n", + " - Static checking of whether your type hints are still up to date\n", + " - Static checking of whether you call your own function correctly\n", + "- Even if you don't assign types yourself, static type checking can still check whether you call typed functions/methods from other packages correctly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List\n", + "\n", + "def greet_all(names: List[str]) -> None:\n", + " for name in names:\n", + " print('Hello, {}'.format(name))\n", + "\n", + "greet_all(['python', 'java', 'C++']) # type checker will be fine with this\n", + "\n", + "greet_all('matlab') # this will actually run fine, but type checker will raise an error" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Packages:\n", + "- [typing](https://docs.python.org/3/library/typing.html): built-in library containing generics, unions, etc.\n", + "- [mypy](http://mypy-lang.org/): linter doing static type checking\n", + "- [pyAnnotate](https://github.com/dropbox/pyannotate): automatically assign types to most of your functions/methods based on runtime\n", "\n", "## Web frameworks\n", "- [Django2](https://www.djangoproject.com/): includes the most features, but also forces you to do things their way\n", @@ -824,15 +1075,13 @@ "\n", "There are also many, many libraries to interact with databases, but you will have to google those yourself.\n", "\n", - "# Several honourable mentions\n", + "# Quick mentions\n", "- [trimesh](https://github.com/mikedh/trimesh): Triangular mesh algorithms\n", "- [Pillow](https://pillow.readthedocs.io/en/latest/): Read/write/manipulate a wide variety of images (png, jpg, tiff, etc.)\n", "- [psychopy](http://www.psychopy.org/): equivalent of psychtoolbox (workshop coming up in April in Nottingham)\n", "- [Buit-in libraries](https://docs.python.org/3/py-modindex.html)\n", " - [collections](https://docs.python.org/3.6/library/collections.html): deque, OrderedDict, namedtuple, and more\n", " - [datetime](https://docs.python.org/3/library/datetime.html): Basic date and time types\n", - " - [enum](https://docs.python.org/3/library/enum.html): Enumerators\n", - " - [fractions](https://docs.python.org/3/library/fractions.html): rational numbers\n", " - [functools](https://docs.python.org/3/library/functools.html): caching, decorators, and support for functional programming\n", " - [json](https://docs.python.org/3/library/json.html)/[ipaddress](https://docs.python.org/3/library/ipaddress.html)/[xml](https://docs.python.org/3/library/xml.html#module-xml): parsing/writing\n", " - [itertools](https://docs.python.org/3/library/itertools.html): more tools to loop over sequences\n", @@ -843,10 +1092,38 @@ " - [pickle](https://docs.python.org/3/library/pickle.html): Store/load any python object\n", " - [shutil](https://docs.python.org/3/library/shutil.html): copy/move files\n", " - [subprocess](https://docs.python.org/3/library/subprocess.html): call shell commands\n", - " - [time](https://docs.python.org/3/library/time.html)/[timeit](https://docs.python.org/3/library/timeit.html): keeping track of it\n", - " - [turtule](https://docs.python.org/3/library/turtle.html#module-turtle): teach python to your kids!\n", + " - [time](https://docs.python.org/3/library/time.html)/[timeit](https://docs.python.org/3/library/timeit.html): Timing your code\n", + " - [turtle](https://docs.python.org/3/library/turtle.html#module-turtle): teach python to your kids!\n", " - [warnings](https://docs.python.org/3/library/warnings.html#module-warnings): tell people they are not using your code properly" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from turtle import *\n", + "color('red', 'yellow')\n", + "begin_fill()\n", + "speed(10)\n", + "while True:\n", + " forward(200)\n", + " left(170)\n", + " if abs(pos()) < 1:\n", + " break\n", + "end_fill()\n", + "done()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import this" + ] } ], "metadata": {}, diff --git a/talks/packages/packages.md b/talks/packages/packages.md index 7d11138..e72e5bd 100644 --- a/talks/packages/packages.md +++ b/talks/packages/packages.md @@ -743,4 +743,4 @@ done() ``` import this -``` \ No newline at end of file +``` -- GitLab