From 36c4203cfaf65f0d181c5f563bc134ce8da21602 Mon Sep 17 00:00:00 2001
From: Michiel Cottaar <MichielCottaar@gmail.com>
Date: Sat, 17 Feb 2018 15:16:04 +0000
Subject: [PATCH] update notebook

---
 talks/packages/packages.ipynb | 469 +++++++++++++++++++++++++++-------
 talks/packages/packages.md    |   2 +-
 2 files changed, 374 insertions(+), 97 deletions(-)

diff --git a/talks/packages/packages.ipynb b/talks/packages/packages.ipynb
index 8578a80..7d179c4 100644
--- a/talks/packages/packages.ipynb
+++ b/talks/packages/packages.ipynb
@@ -68,8 +68,8 @@
    "source": [
     "from scipy import optimize\n",
     "def costfunc(params):\n",
-    "    return params[0] ** 2 * (params[1] - 3) ** 2 + (params[0] - 2) ** 2\n",
-    "optimize.minimize(costfunc, x0=[0, 0], method='l-bfgs-b')"
+    "    return (params[0] - 3) ** 2\n",
+    "optimize.minimize(costfunc, x0=[0], method='l-bfgs-b')"
    ]
   },
   {
@@ -130,6 +130,91 @@
     "- [Bokeh](https://bokeh.pydata.org/en/latest/) among many others: interactive plots in the browser (i.e., in javascript)\n",
     "\n",
     "## [Ipython](http://ipython.org/)/[Jupyter](https://jupyter.org/) notebook: interactive python environments\n",
+    "Supports:\n",
+    "- run code in multiple languages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "for name in python ruby ; do\n",
+    "    echo $name\n",
+    "done"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- debugging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy import optimize\n",
+    "def costfunc(params):\n",
+    "    return 1 / params[0] ** 2\n",
+    "optimize.minimize(costfunc, x0=[0], method='l-bfgs-b')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%debug"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- timing/profiling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%prun\n",
+    "plt.plot([0, 3])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- getting help"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- [and much more...](https://ipython.readthedocs.io/en/stable/interactive/magics.html)\n",
+    "\n",
+    "The next generation is already out: [jupyterlab](https://jupyterlab.readthedocs.io/en/latest/)\n",
+    "\n",
     "There are many [useful extensions available](https://github.com/ipython-contrib/jupyter_contrib_nbextensions).\n",
     "\n",
     "## [Pandas](https://pandas.pydata.org/): Analyzing \"clean\" data\n",
@@ -139,11 +224,11 @@
     "- fast IO to many tabular formats\n",
     "- accurate handling of missing data\n",
     "- Many, many routines to handle data\n",
-    "  - group by categorical data (i.e., male/female, or age groups)\n",
-    "  - joining/merging data\n",
+    "  - group by categorical data (e.g., male/female)\n",
+    "  - joining/merging data (all SQL-like operations and much more)\n",
     "  - time series support\n",
     "- statistical models through [statsmodels](http://www.statsmodels.org/stable/index.html)\n",
-    "- plotting though seaborn [seaborn](https://seaborn.pydata.org/)\n",
+    "- plotting though [seaborn](https://seaborn.pydata.org/)\n",
     "- Use [dask](https://dask.pydata.org/en/latest/) if your data is too big for memory (or if you want to run in parallel)\n",
     "\n",
     "You should also install `numexpr` and `bottleneck` for optimal performance.\n",
@@ -356,7 +441,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%run test_argparse.py 3 8.5 -q"
+    "%run test_argparse.py 3 8.5"
    ]
   },
   {
@@ -430,7 +515,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%run test_gooey.py"
+    "!python.app test_gooey.py"
    ]
   },
   {
@@ -495,6 +580,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.ioff()\n",
+    "\n",
     "def plot_sine(amplitude, frequency):\n",
     "    x = np.linspace(0, 2 * np.pi, 100)\n",
     "    y = amplitude * np.sin(frequency * x)\n",
@@ -509,7 +598,8 @@
     "\n",
     "!mkdir plots\n",
     "amplitudes = [plot_sine(A, 1.) for A in [0.1, 0.3, 0.7, 1.0]]\n",
-    "frequencies = [plot_sine(1., F) for F in [1, 2, 3, 4, 5, 6]]"
+    "frequencies = [plot_sine(1., F) for F in [1, 2, 3, 4, 5, 6]]\n",
+    "plt.ion()"
    ]
   },
   {
@@ -564,7 +654,6 @@
    "outputs": [],
    "source": [
     "%%writefile wx_hello_world.py\n",
-    "#!/usr/bin/env python\n",
     "\"\"\"\n",
     "Hello World, but with more meat.\n",
     "\"\"\"\n",
@@ -671,7 +760,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%run wx_hello_world.py"
+    "!python.app wx_hello_world.py"
    ]
   },
   {
@@ -683,6 +772,102 @@
     "- theano/tensorflow/pytorch\n",
     "  - keras\n",
     "\n",
+    "## [pymc3](http://docs.pymc.io/): Pobabilstic programming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Initialize random number generator\n",
+    "np.random.seed(123)\n",
+    "\n",
+    "# True parameter values\n",
+    "alpha, sigma = 1, 1\n",
+    "beta = [1, 2.5]\n",
+    "\n",
+    "# Size of dataset\n",
+    "size = 100\n",
+    "\n",
+    "# Predictor variable\n",
+    "X1 = np.random.randn(size)\n",
+    "X2 = np.random.randn(size) * 0.2\n",
+    "\n",
+    "# Simulate outcome variable\n",
+    "Y = alpha + beta[0]*X1 + beta[1]*X2 + np.random.randn(size)*sigma"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pymc3 as pm\n",
+    "basic_model = pm.Model()\n",
+    "\n",
+    "with basic_model:\n",
+    "\n",
+    "    # Priors for unknown model parameters\n",
+    "    alpha = pm.Normal('alpha', mu=0, sd=10)\n",
+    "    beta = pm.Normal('beta', mu=0, sd=10, shape=2)\n",
+    "    sigma = pm.HalfNormal('sigma', sd=1)\n",
+    "\n",
+    "    # Expected value of outcome\n",
+    "    mu = alpha + beta[0]*X1 + beta[1]*X2\n",
+    "\n",
+    "    # Likelihood (sampling distribution) of observations\n",
+    "    Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with basic_model:\n",
+    "\n",
+    "    # obtain starting values via MAP\n",
+    "    start = pm.find_MAP(fmin=optimize.fmin_powell)\n",
+    "\n",
+    "    # instantiate sampler\n",
+    "    step = pm.Slice()\n",
+    "\n",
+    "    # draw 5000 posterior samples\n",
+    "    trace = pm.sample(5000, step=step, start=start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_ = pm.traceplot(trace)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pm.summary(trace)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternative: [pystan](https://pystan.readthedocs.io/en/latest/): wrapper around the [Stan](http://mc-stan.org/users/) probabilistic programming language.\n",
+    "\n",
+    "\n",
     "## [Pycuda](https://documen.tician.de/pycuda/): Programming the GPU\n",
     "Wrapper around [Cuda](https://developer.nvidia.com/cuda-zone).\n",
     "The alternative [Pyopencl](https://documen.tician.de/pyopencl/) provides a very similar wrapper around [OpenCL](https://www.khronos.org/opencl/)."
@@ -725,89 +910,124 @@
    "source": [
     "Also see [pyopenGL](http://pyopengl.sourceforge.net/): graphics programming in python (used in FSLeyes)\n",
     "## Testing\n",
-    "- [unittest](https://docs.python.org/3.6/library/unittest.html): python built-in testing\n",
-    "> ```\n",
-    "> import unittest\n",
-    ">\n",
-    "> class TestStringMethods(unittest.TestCase):\n",
-    ">\n",
-    ">     def test_upper(self):\n",
-    ">         self.assertEqual('foo'.upper(), 'FOO')\n",
-    ">\n",
-    ">     def test_isupper(self):\n",
-    ">         self.assertTrue('FOO'.isupper())\n",
-    ">         self.assertFalse('Foo'.isupper())\n",
-    ">\n",
-    ">     def test_split(self):\n",
-    ">         s = 'hello world'\n",
-    ">         self.assertEqual(s.split(), ['hello', 'world'])\n",
-    ">         # check that s.split fails when the separator is not a string\n",
-    ">         with self.assertRaises(TypeError):\n",
-    ">             s.split(2)\n",
-    ">\n",
-    "> if __name__ == '__main__':\n",
-    ">     unittest.main()\n",
-    "> ```\n",
-    "- [doctest](https://docs.python.org/3.6/library/doctest.html): checks the example usage in the documentation\n",
-    "> ```\n",
-    "> def factorial(n):\n",
-    ">     \"\"\"Return the factorial of n, an exact integer >= 0.\n",
-    ">\n",
-    ">     >>> [factorial(n) for n in range(6)]\n",
-    ">     [1, 1, 2, 6, 24, 120]\n",
-    ">     >>> factorial(30)\n",
-    ">     265252859812191058636308480000000\n",
-    ">     >>> factorial(-1)\n",
-    ">     Traceback (most recent call last):\n",
-    ">         ...\n",
-    ">     ValueError: n must be >= 0\n",
-    ">\n",
-    ">     Factorials of floats are OK, but the float must be an exact integer:\n",
-    ">     >>> factorial(30.1)\n",
-    ">     Traceback (most recent call last):\n",
-    ">         ...\n",
-    ">     ValueError: n must be exact integer\n",
-    ">     >>> factorial(30.0)\n",
-    ">     265252859812191058636308480000000\n",
-    ">\n",
-    ">     It must also not be ridiculously large:\n",
-    ">     >>> factorial(1e100)\n",
-    ">     Traceback (most recent call last):\n",
-    ">         ...\n",
-    ">     OverflowError: n too large\n",
-    ">     \"\"\"\n",
-    ">\n",
-    ">     import math\n",
-    ">     if not n >= 0:\n",
-    ">         raise ValueError(\"n must be >= 0\")\n",
-    ">     if math.floor(n) != n:\n",
-    ">         raise ValueError(\"n must be exact integer\")\n",
-    ">     if n+1 == n:  # catch a value like 1e300\n",
-    ">         raise OverflowError(\"n too large\")\n",
-    ">     result = 1\n",
-    ">     factor = 2\n",
-    ">     while factor <= n:\n",
-    ">         result *= factor\n",
-    ">         factor += 1\n",
-    ">     return result\n",
-    ">\n",
-    ">\n",
-    "> if __name__ == \"__main__\":\n",
-    ">     import doctest\n",
-    ">     doctest.testmod()\n",
-    "> ```\n",
+    "- [unittest](https://docs.python.org/3.6/library/unittest.html): python built-in testing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import unittest\n",
+    "\n",
+    "class TestStringMethods(unittest.TestCase):\n",
+    "\n",
+    "    def test_upper(self):\n",
+    "        self.assertEqual('foo'.upper(), 'FOO')\n",
+    "\n",
+    "    def test_isupper(self):\n",
+    "        self.assertTrue('FOO'.isupper())\n",
+    "        self.assertFalse('Foo'.isupper())\n",
+    "\n",
+    "    def test_split(self):\n",
+    "        s = 'hello world'\n",
+    "        self.assertEqual(s.split(), ['hello', 'world'])\n",
+    "        # check that s.split fails when the separator is not a string\n",
+    "        with self.assertRaises(TypeError):\n",
+    "            s.split(2)\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    unittest.main()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- [doctest](https://docs.python.org/3.6/library/doctest.html): checks the example usage in the documentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def factorial(n):\n",
+    "    \"\"\"Return the factorial of n, an exact integer >= 0.\n",
+    "\n",
+    "    >>> [factorial(n) for n in range(6)]\n",
+    "    [1, 1, 2, 6, 24, 120]\n",
+    "    >>> factorial(30)\n",
+    "    265252859812191058636308480000000\n",
+    "    >>> factorial(-1)\n",
+    "    Traceback (most recent call last):\n",
+    "        ...\n",
+    "    ValueError: n must be >= 0\n",
+    "\n",
+    "    Factorials of floats are OK, but the float must be an exact integer:\n",
+    "    >>> factorial(30.1)\n",
+    "    Traceback (most recent call last):\n",
+    "        ...\n",
+    "    ValueError: n must be exact integer\n",
+    "    >>> factorial(30.0)\n",
+    "    265252859812191058636308480000000\n",
+    "\n",
+    "    It must also not be ridiculously large:\n",
+    "    >>> factorial(1e100)\n",
+    "    Traceback (most recent call last):\n",
+    "        ...\n",
+    "    OverflowError: n too large\n",
+    "    \"\"\"\n",
+    "\n",
+    "    import math\n",
+    "    if not n >= 0:\n",
+    "        raise ValueError(\"n must be >= 0\")\n",
+    "    if math.floor(n) != n:\n",
+    "        raise ValueError(\"n must be exact integer\")\n",
+    "    if n+1 == n:  # catch a value like 1e300\n",
+    "        raise OverflowError(\"n too large\")\n",
+    "    result = 1\n",
+    "    factor = 2\n",
+    "    while factor <= n:\n",
+    "        result *= factor\n",
+    "        factor += 1\n",
+    "    return result\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    import doctest\n",
+    "    doctest.testmod()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "Two external packages provide more convenient unit tests:\n",
     "- [py.test](https://docs.pytest.org/en/latest/)\n",
-    "- [nose2](http://nose2.readthedocs.io/en/latest/usage.html)\n",
-    "> ```\n",
-    "> # content of test_sample.py\n",
-    "> def inc(x):\n",
-    ">     return x + 1\n",
-    ">\n",
-    "> def test_answer():\n",
-    ">     assert inc(3) == 5\n",
-    "> ```\n",
+    "- [nose2](http://nose2.readthedocs.io/en/latest/usage.html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# content of test_sample.py\n",
+    "def inc(x):\n",
+    "    return x + 1\n",
     "\n",
+    "def test_answer():\n",
+    "    assert inc(3) == 5"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "- [coverage](https://coverage.readthedocs.io/en/coverage-4.5.1/): measures which part of the code is covered by the tests\n",
     "\n",
     "## Linters\n",
@@ -815,7 +1035,38 @@
     "- [pylint](https://pypi.python.org/pypi/pylint): most extensive linter\n",
     "- [pyflake](https://pypi.python.org/pypi/pyflakes): if you think pylint is too strict\n",
     "- [pep8](https://pypi.python.org/pypi/pep8): just checks for style errors\n",
-    "- [mypy](http://mypy-lang.org/): adding explicit typing to python\n",
+    "### Optional static typing\n",
+    "- Document how your method/function should be called\n",
+    "  - Static checking of whether your type hints are still up to date\n",
+    "  - Static checking of whether you call your own function correctly\n",
+    "- Even if you don't assign types yourself, static type checking can still check whether you call typed functions/methods from other packages correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "\n",
+    "def greet_all(names: List[str]) -> None:\n",
+    "    for name in names:\n",
+    "        print('Hello, {}'.format(name))\n",
+    "\n",
+    "greet_all(['python', 'java', 'C++'])  # type checker will be fine with this\n",
+    "\n",
+    "greet_all('matlab')  # this will actually run fine, but type checker will raise an error"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Packages:\n",
+    "- [typing](https://docs.python.org/3/library/typing.html): built-in library containing generics, unions, etc.\n",
+    "- [mypy](http://mypy-lang.org/): linter doing static type checking\n",
+    "- [pyAnnotate](https://github.com/dropbox/pyannotate): automatically assign types to most of your functions/methods based on runtime\n",
     "\n",
     "## Web frameworks\n",
     "- [Django2](https://www.djangoproject.com/): includes the most features, but also forces you to do things their way\n",
@@ -824,15 +1075,13 @@
     "\n",
     "There are also many, many libraries to interact with databases, but you will have to google those yourself.\n",
     "\n",
-    "# Several honourable mentions\n",
+    "# Quick mentions\n",
     "- [trimesh](https://github.com/mikedh/trimesh): Triangular mesh algorithms\n",
     "- [Pillow](https://pillow.readthedocs.io/en/latest/): Read/write/manipulate a wide variety of images (png, jpg, tiff, etc.)\n",
     "- [psychopy](http://www.psychopy.org/): equivalent of psychtoolbox (workshop coming up in April in Nottingham)\n",
     "- [Buit-in libraries](https://docs.python.org/3/py-modindex.html)\n",
     "    - [collections](https://docs.python.org/3.6/library/collections.html): deque, OrderedDict, namedtuple, and more\n",
     "    - [datetime](https://docs.python.org/3/library/datetime.html): Basic date and time types\n",
-    "    - [enum](https://docs.python.org/3/library/enum.html): Enumerators\n",
-    "    - [fractions](https://docs.python.org/3/library/fractions.html): rational numbers\n",
     "    - [functools](https://docs.python.org/3/library/functools.html): caching, decorators, and support for functional programming\n",
     "    - [json](https://docs.python.org/3/library/json.html)/[ipaddress](https://docs.python.org/3/library/ipaddress.html)/[xml](https://docs.python.org/3/library/xml.html#module-xml): parsing/writing\n",
     "    - [itertools](https://docs.python.org/3/library/itertools.html): more tools to loop over sequences\n",
@@ -843,10 +1092,38 @@
     "    - [pickle](https://docs.python.org/3/library/pickle.html): Store/load any python object\n",
     "    - [shutil](https://docs.python.org/3/library/shutil.html): copy/move files\n",
     "    - [subprocess](https://docs.python.org/3/library/subprocess.html): call shell commands\n",
-    "    - [time](https://docs.python.org/3/library/time.html)/[timeit](https://docs.python.org/3/library/timeit.html): keeping track of it\n",
-    "    - [turtule](https://docs.python.org/3/library/turtle.html#module-turtle): teach python to your kids!\n",
+    "    - [time](https://docs.python.org/3/library/time.html)/[timeit](https://docs.python.org/3/library/timeit.html): Timing your code\n",
+    "    - [turtle](https://docs.python.org/3/library/turtle.html#module-turtle): teach python to your kids!\n",
     "    - [warnings](https://docs.python.org/3/library/warnings.html#module-warnings): tell people they are not using your code properly"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from turtle import *\n",
+    "color('red', 'yellow')\n",
+    "begin_fill()\n",
+    "speed(10)\n",
+    "while True:\n",
+    "    forward(200)\n",
+    "    left(170)\n",
+    "    if abs(pos()) < 1:\n",
+    "        break\n",
+    "end_fill()\n",
+    "done()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import this"
+   ]
   }
  ],
  "metadata": {},
diff --git a/talks/packages/packages.md b/talks/packages/packages.md
index 7d11138..e72e5bd 100644
--- a/talks/packages/packages.md
+++ b/talks/packages/packages.md
@@ -743,4 +743,4 @@ done()
 
 ```
 import this
-```
\ No newline at end of file
+```
-- 
GitLab