diff --git a/advanced_topics/02_modules_and_packages.ipynb b/advanced_topics/02_modules_and_packages.ipynb index b49f0e5a1d7ce99ed66f00bc1b21c62fd5af88ed..36a56733b97fe4aa43e0676d1c396e30832a7c90 100644 --- a/advanced_topics/02_modules_and_packages.ipynb +++ b/advanced_topics/02_modules_and_packages.ipynb @@ -10,7 +10,7 @@ "Python gives you a lot of flexibility in how you organise your code. If you\n", "want, you can write a Python program just as you would write a Bash script.\n", "You don't _have_ to use functions, classes, modules or packages if you don't\n", - "want to, or if the script's task does not require them.\n", + "want to, or if the script task does not require them.\n", "\n", "\n", "But when your code starts to grow beyond what can reasonably be defined in a\n", @@ -169,7 +169,7 @@ "metadata": {}, "source": [ "__PLEASE DON'T DO THIS!__ Because every time you do, somewhere in the world, a\n", - "software developer will will spontaneously stub his/her toe, and start crying.\n", + "software developer will spontaneously stub his/her toe, and start crying.\n", "Using this approach can make more complicated programs very difficult to read,\n", "because it is not possible to determine the origin of the functions and\n", "attributes that are being used.\n", @@ -349,7 +349,7 @@ "If you are experimenting or developing your program, the quickest and easiest\n", "way to make your module(s) importable is to add their containing directory to\n", "the `PYTHONPATH`. But if you are developing a larger piece of software, you\n", - "should probably organise your modules into _packages_, which are [described\n", + "should probably organise your modules into *packages*, which are [described\n", "below](#what-is-a-package).\n", "\n", "\n", @@ -421,10 +421,10 @@ "\n", "\n", "You now know how to split your Python code up into separate files\n", - "(a.k.a. _modules_). When your code grows beyond a handful of files, you may\n", + "(a.k.a. *modules*). When your code grows beyond a handful of files, you may\n", "wish for more fine-grained control over the namespaces in which your modules\n", "live. Python has another feature which allows you to organise your modules\n", - "into _packages_.\n", + "into *packages*.\n", "\n", "\n", "A package in Python is simply a directory which:\n", @@ -507,7 +507,7 @@ "<a class=\"anchor\" id=\"useful-references\"></a>\n", "## Useful references\n", "\n", - "* [Modules and packages in Python](https://docs.python.org/3.5/tutorial/modules.html)\n", + "* [Modules and packages in Python](https://docs.python.org/3/tutorial/modules.html)\n", "* [Using `__init__.py`](http://mikegrouchy.com/blog/2012/05/be-pythonic-__init__py.html)" ] } diff --git a/advanced_topics/02_modules_and_packages.md b/advanced_topics/02_modules_and_packages.md index 1fdccb84963ead3deb0305324be0bb793a3b10ff..9fc75ee338c2913bfb6ec2ad588093a59693c1ea 100644 --- a/advanced_topics/02_modules_and_packages.md +++ b/advanced_topics/02_modules_and_packages.md @@ -4,7 +4,7 @@ Python gives you a lot of flexibility in how you organise your code. If you want, you can write a Python program just as you would write a Bash script. You don't _have_ to use functions, classes, modules or packages if you don't -want to, or if the script's task does not require them. +want to, or if the script task does not require them. But when your code starts to grow beyond what can reasonably be defined in a @@ -113,7 +113,7 @@ print(add(1, 5)) __PLEASE DON'T DO THIS!__ Because every time you do, somewhere in the world, a -software developer will will spontaneously stub his/her toe, and start crying. +software developer will spontaneously stub his/her toe, and start crying. Using this approach can make more complicated programs very difficult to read, because it is not possible to determine the origin of the functions and attributes that are being used. @@ -242,7 +242,7 @@ in the following order: If you are experimenting or developing your program, the quickest and easiest way to make your module(s) importable is to add their containing directory to the `PYTHONPATH`. But if you are developing a larger piece of software, you -should probably organise your modules into _packages_, which are [described +should probably organise your modules into *packages*, which are [described below](#what-is-a-package). @@ -298,10 +298,10 @@ mas.main([str(a), str(b)]) You now know how to split your Python code up into separate files -(a.k.a. _modules_). When your code grows beyond a handful of files, you may +(a.k.a. *modules*). When your code grows beyond a handful of files, you may wish for more fine-grained control over the namespaces in which your modules live. Python has another feature which allows you to organise your modules -into _packages_. +into *packages*. A package in Python is simply a directory which: @@ -367,5 +367,5 @@ fsleyes.fsleyes_main() <a class="anchor" id="useful-references"></a> ## Useful references -* [Modules and packages in Python](https://docs.python.org/3.5/tutorial/modules.html) -* [Using `__init__.py`](http://mikegrouchy.com/blog/2012/05/be-pythonic-__init__py.html) \ No newline at end of file +* [Modules and packages in Python](https://docs.python.org/3/tutorial/modules.html) +* [Using `__init__.py`](http://mikegrouchy.com/blog/2012/05/be-pythonic-__init__py.html) diff --git a/advanced_topics/03_object_oriented_programming.ipynb b/advanced_topics/03_object_oriented_programming.ipynb index 40e01b70427412bacbd77909c88b4a88959d999f..7c8eda183d89c5dec9a0357c3c750bb2bd44b6ca 100644 --- a/advanced_topics/03_object_oriented_programming.ipynb +++ b/advanced_topics/03_object_oriented_programming.ipynb @@ -25,6 +25,7 @@ " * [We didn't specify the `self` argument - what gives?!?](#we-didnt-specify-the-self-argument)\n", "* [Attributes](#attributes)\n", "* [Methods](#methods)\n", + "* [Method chaining](#method-chaining)\n", "* [Protecting attribute access](#protecting-attribute-access)\n", " * [A better way - properties](#a-better-way-properties])\n", "* [Inheritance](#inheritance)\n", @@ -52,8 +53,8 @@ "\n", "\n", "If you have not done any object-oriented programming before, your first step\n", - "is to understand the difference between _objects_ (also known as\n", - "_instances_) and _classes_ (also known as _types_).\n", + "is to understand the difference between *objects* (also known as\n", + "*instances*) and *classes* (also known as *types*).\n", "\n", "\n", "If you have some experience in C, then you can start off by thinking of a\n", @@ -72,8 +73,8 @@ "> ```\n", "\n", "\n", - "Now, an _object_ is not a definition, but rather a thing which resides in\n", - "memory. An object can have _attributes_ (pieces of information), and _methods_\n", + "Now, an *object* is not a definition, but rather a thing which resides in\n", + "memory. An object can have *attributes* (pieces of information), and *methods*\n", "(functions associated with the object). You can pass objects around your code,\n", "manipulate their attributes, and call their methods.\n", "\n", @@ -98,12 +99,12 @@ "Of course there are many more differences between C structs and classes (most\n", "notably [inheritance](todo), [polymorphism](todo), and [access\n", "protection](todo)). But if you can understand the difference between a\n", - "_definition_ of a C struct, and an _instantiation_ of that struct, then you\n", - "are most of the way towards understanding the difference between a _class_,\n", - "and an _object_.\n", + "*definition* of a C struct, and an *instantiation* of that struct, then you\n", + "are most of the way towards understanding the difference between a *class*,\n", + "and an *object*.\n", "\n", "\n", - "> But just to confuse you, remember that in Python, __everything__ is an\n", + "> But just to confuse you, remember that in Python, **everything** is an\n", "> object - even classes!\n", "\n", "\n", @@ -252,7 +253,7 @@ "metadata": {}, "source": [ "Refer to the [official\n", - "docs](https://docs.python.org/3.5/reference/datamodel.html#special-method-names)\n", + "docs](https://docs.python.org/3/reference/datamodel.html#special-method-names)\n", "for details on all of the special methods that can be defined in a class. And\n", "take a look at the appendix for some more details on [how Python objects get\n", "created](appendix-init-versus-new).\n", @@ -438,8 +439,8 @@ "\n", "The idea behind this design is that our `FSLMaths` class will not actually do\n", "anything when we call the `add`, `mul` or `div` methods. Instead, it will\n", - "\"stage\" each operation, and then perform them all in one go. So let's add\n", - "another method, `run`, which actually does the work:" + "*stage* each operation, and then perform them all in one go at a later point\n", + "in time. So let's add another method, `run`, which actually does the work:" ] }, { @@ -478,7 +479,6 @@ " if isinstance(value, nib.nifti1.Nifti1Image):\n", " value = value.get_data()\n", "\n", - "\n", " if oper == 'add':\n", " data = data + value\n", " elif oper == 'mul':\n", @@ -532,6 +532,117 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "<a class=\"anchor\" id=\"method-chaining\"></a>\n", + "## Method chaining\n", + "\n", + "\n", + "A neat trick, which is used by all the cool kids these days, is to write\n", + "classes that allow *method chaining* - writing one line of code which\n", + "calls more than one method on an object, e.g.:\n", + "\n", + "> ```\n", + "> fm = FSLMaths(img)\n", + "> result = fm.add(1).mul(10).run()\n", + "> ```\n", + "\n", + "Adding this feature to our budding `FSLMaths` class is easy - all we have\n", + "to do is return `self` from each method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import nibabel as nib\n", + "\n", + "class FSLMaths(object):\n", + "\n", + " def __init__(self, inimg):\n", + " self.img = inimg\n", + " self.operations = []\n", + "\n", + " def add(self, value):\n", + " self.operations.append(('add', value))\n", + " return self\n", + "\n", + " def mul(self, value):\n", + " self.operations.append(('mul', value))\n", + " return self\n", + "\n", + " def div(self, value):\n", + " self.operations.append(('div', value))\n", + " return self\n", + "\n", + " def run(self, output=None):\n", + "\n", + " data = np.array(self.img.get_data())\n", + "\n", + " for oper, value in self.operations:\n", + "\n", + " # Value could be an image.\n", + " # If not, we assume that\n", + " # it is a scalar/numpy array.\n", + " if isinstance(value, nib.nifti1.Nifti1Image):\n", + " value = value.get_data()\n", + "\n", + " if oper == 'add':\n", + " data = data + value\n", + " elif oper == 'mul':\n", + " data = data * value\n", + " elif oper == 'div':\n", + " data = data / value\n", + "\n", + " # turn final output into a nifti,\n", + " # and save it to disk if an\n", + " # 'output' has been specified.\n", + " outimg = nib.nifti1.Nifti1Image(data, inimg.affine)\n", + "\n", + " if output is not None:\n", + " nib.save(outimg, output)\n", + "\n", + " return outimg" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can chain all of our method calls, and even the creation of our\n", + "`FSLMaths` object, into a single line:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fpath = op.expandvars('$FSLDIR/data/standard/MNI152_T1_2mm.nii.gz')\n", + "fmask = op.expandvars('$FSLDIR/data/standard/MNI152_T1_2mm_brain_mask.nii.gz')\n", + "inimg = nib.load(fpath)\n", + "mask = nib.load(fmask)\n", + "\n", + "outimg = FSLMaths(inimg).mul(mask).add(-10).run()\n", + "\n", + "norigvox = (inimg .get_data() > 0).sum()\n", + "nmaskvox = (outimg.get_data() > 0).sum()\n", + "\n", + "print('Number of voxels >0 in original image: {}'.format(norigvox))\n", + "print('Number of voxels >0 in masked image: {}'.format(nmaskvox))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> In fact, this is precisely how the\n", + "> [`fsl.wrappers.fslmaths`](https://users.fmrib.ox.ac.uk/~paulmc/fsleyes/fslpy/latest/fsl.wrappers.fslmaths.html)\n", + "> function works.\n", + "\n", + "\n", "<a class=\"anchor\" id=\"protecting-attribute-access\"></a>\n", "## Protecting attribute access\n", "\n", @@ -606,9 +717,8 @@ "notion of a private attribute or method is strictly enforced by the language.\n", "\n", "\n", - "However, there are a couple of conventions in Python that are [universally\n", - "adhered\n", - "to](https://docs.python.org/3.5/tutorial/classes.html#private-variables):\n", + "However, there are a couple of conventions in Python that are\n", + "[universally adhered to](https://docs.python.org/3/tutorial/classes.html#private-variables):\n", "\n", "* Class-level attributes and methods, and module-level attributes, functions,\n", " and classes, which begin with a single underscore (`_`), should be\n", @@ -622,14 +732,13 @@ " enforcement for this rule - any attribute or method with such a name will\n", " actually be _renamed_ (in a standardised manner) at runtime, so that it is\n", " not accessible through its original name (it is still accessible via its\n", - " [mangled\n", - " name](https://docs.python.org/3.5/tutorial/classes.html#private-variables)\n", + " [mangled name](https://docs.python.org/3/tutorial/classes.html#private-variables)\n", " though).\n", "\n", "\n", "> <sup>2</sup> With the exception that module-level fields which begin with a\n", "> single underscore will not be imported into the local scope via the\n", - "> `from [module] import *` techinque.\n", + "> `from [module] import *` technique.\n", "\n", "\n", "So with all of this in mind, we can adjust our `FSLMaths` class to discourage\n", @@ -676,7 +785,7 @@ "\n", "\n", "Python has a feature called\n", - "[`properties`](https://docs.python.org/3.5/library/functions.html#property),\n", + "[`properties`](https://docs.python.org/3/library/functions.html#property),\n", "which is a nice way of controlling access to the attributes of an object. We\n", "can use properties by defining a \"getter\" method which can be used to access\n", "our attributes, and \"decorating\" them with the `@property` decorator (we will\n", @@ -851,17 +960,17 @@ "metadata": {}, "source": [ "Hopefully this example doesn't need much in the way of explanation - this\n", - "collection of classes captures a hierarchical relationship which exists in the\n", - "real world (and also captures the inherently annoying nature of\n", + "collection of classes represents a hierarchical relationship which exists in\n", + "the real world (and also represents the inherently annoying nature of\n", "chihuahuas). For example, in the real world, all dogs are animals, but not all\n", "animals are dogs. Therefore in our model, the `Dog` class has specified\n", - "`Animal` as its base class. We say that the `Dog` class _extends_, _derives\n", - "from_, or _inherits from_, the `Animal` class, and that all `Dog` instances\n", + "`Animal` as its base class. We say that the `Dog` class *extends*, *derives\n", + "from*, or *inherits from*, the `Animal` class, and that all `Dog` instances\n", "are also `Animal` instances (but not vice-versa).\n", "\n", "\n", "What does that `noiseMade` method do? There is a `noiseMade` method defined\n", - "on the `Animal` class, but it has been re-implemented, or _overridden_ in the\n", + "on the `Animal` class, but it has been re-implemented, or *overridden* in the\n", "`Dog`,\n", "[`TalkingDog`](https://twitter.com/simpsonsqotd/status/427941665836630016?lang=en),\n", "`Cat`, and `Chihuahua` classes (but not on the `Labrador` class). We can call\n", @@ -1018,7 +1127,7 @@ "\n", "\n", "In Python, we can use the [built-in `super`\n", - "method](https://docs.python.org/3.5/library/functions.html#super) to take care\n", + "method](https://docs.python.org/3/library/functions.html#super) to take care\n", "of correctly calling methods that are defined in an object's base-class (or\n", "classes, in the case of [multiple inheritance](multiple-inheritance)).\n", "\n", @@ -1136,8 +1245,8 @@ "### Polymorphism\n", "\n", "\n", - "Inheritance also allows us to take advantage of _polymorphism_, which refers\n", - "to idea that, in an object-oriented language, we should be able to use an\n", + "Inheritance also allows us to take advantage of *polymorphism*, which refers\n", + "to the idea that, in an object-oriented language, we should be able to use an\n", "object without having complete knowledge about the class, or type, of that\n", "object. For example, we should be able to write a function which expects an\n", "`Operator` instance, but which will work on an instance of any `Operator`\n", @@ -1371,12 +1480,15 @@ "\n", " def add(self, value):\n", " self.operations.append(('add', value))\n", + " return self\n", "\n", " def mul(self, value):\n", " self.operations.append(('mul', value))\n", + " return self\n", "\n", " def div(self, value):\n", " self.operations.append(('div', value))\n", + " return self\n", "\n", " def run(self, output=None):\n", "\n", @@ -1386,12 +1498,14 @@ "\n", " # Code omitted for brevity\n", "\n", - " # Increment the usage counter\n", - " # for this operation. We can\n", - " # access class attributes (and\n", - " # methods) through the class\n", - " # itself.\n", - " FSLMaths.opCounters[oper] = self.opCounters.get(oper, 0) + 1" + " # Increment the usage counter for this operation. We can\n", + " # access class attributes (and methods) through the class\n", + " # itself, as shown here.\n", + " FSLMaths.opCounters[oper] = FSLMaths.opCounters.get(oper, 0) + 1\n", + "\n", + " # It is also possible to access class-level\n", + " # attributes via instances of the class, e.g.\n", + " # self.opCounters[oper] = self.opCounters.get(oper, 0) + 1\n" ] }, { @@ -1412,17 +1526,8 @@ "inimg = nib.load(fpath)\n", "mask = nib.load(fmask)\n", "\n", - "fm1 = FSLMaths(inimg)\n", - "fm2 = FSLMaths(inimg)\n", - "\n", - "fm1.mul(mask)\n", - "fm1.add(15)\n", - "\n", - "fm2.add(25)\n", - "fm1.div(1.5)\n", - "\n", - "fm1.run()\n", - "fm2.run()\n", + "FSLMaths(inimg).mul(mask).add(25).run()\n", + "FSLMaths(inimg).add(15).div(1.5).run()\n", "\n", "print('FSLMaths usage statistics')\n", "for oper in ('add', 'div', 'mul'):\n", @@ -1471,12 +1576,15 @@ "\n", " def add(self, value):\n", " self.operations.append(('add', value))\n", + " return self\n", "\n", " def mul(self, value):\n", " self.operations.append(('mul', value))\n", + " return self\n", "\n", " def div(self, value):\n", " self.operations.append(('div', value))\n", + " return self\n", "\n", " def run(self, output=None):\n", "\n", @@ -1493,11 +1601,11 @@ "> There is another decorator -\n", "> [`@staticmethod`](https://docs.python.org/3.5/library/functions.html#staticmethod) -\n", "> which can be used on methods defined within a class. The difference\n", - "> between a `@classmethod` and a `@staticmethod` is that the latter will _not_\n", + "> between a `@classmethod` and a `@staticmethod` is that the latter will *not*\n", "> be passed the class (`cls`).\n", "\n", "\n", - "calling a class method is the same as accessing a class attribute:" + "Calling a class method is the same as accessing a class attribute:" ] }, { @@ -1511,14 +1619,8 @@ "inimg = nib.load(fpath)\n", "mask = nib.load(fmask)\n", "\n", - "fm1 = FSLMaths(inimg)\n", - "fm2 = FSLMaths(inimg)\n", - "\n", - "fm1.mul(mask)\n", - "fm1.add(15)\n", - "\n", - "fm2.add(25)\n", - "fm1.div(1.5)\n", + "fm1 = FSLMaths(inimg).mul(mask).add(25)\n", + "fm2 = FSLMaths(inimg).add(15).div(1.5)\n", "\n", "fm1.run()\n", "fm2.run()\n", @@ -1590,9 +1692,9 @@ "## Appendix: `__init__` versus `__new__`\n", "\n", "\n", - "In Python, object creation is actually a two-stage process - _creation_, and\n", - "then _initialisation_. The `__init__` method gets called during the\n", - "_initialisation_ stage - its job is to initialise the state of the object. But\n", + "In Python, object creation is actually a two-stage process - *creation*, and\n", + "then *initialisation*. The `__init__` method gets called during the\n", + "*initialisation* stage - its job is to initialise the state of the object. But\n", "note that, by the time `__init__` gets called, the object has already been\n", "created.\n", "\n", @@ -1610,7 +1712,7 @@ "the difference between `__new__` and `__init__` can be found\n", "[here](https://www.reddit.com/r/learnpython/comments/2s3pms/what_is_the_difference_between_init_and_new/cnm186z/),\n", "and you may also wish to take a look at the [official Python\n", - "docs](https://docs.python.org/3.5/reference/datamodel.html#basic-customization).\n", + "docs](https://docs.python.org/3/reference/datamodel.html#basic-customization).\n", "\n", "\n", "<a class=\"anchor\" id=\"appendix-monkey-patching\"></a>\n", @@ -1618,24 +1720,24 @@ "\n", "\n", "The act of run-time modification of objects or class definitions is referred\n", - "to as [_monkey-patching_](https://en.wikipedia.org/wiki/Monkey_patch) and,\n", + "to as [*monkey-patching*](https://en.wikipedia.org/wiki/Monkey_patch) and,\n", "whilst it is allowed by the Python programming language, it is generally\n", "considered quite bad practice.\n", "\n", "\n", - "Just because you _can_ do something doesn't mean that you _should_. Python\n", + "Just because you *can* do something doesn't mean that you *should*. Python\n", "gives you the flexibility to write your software in whatever manner you deem\n", - "suitable. __But__ if you want to write software that will be used, adopted,\n", + "suitable. **But** if you want to write software that will be used, adopted,\n", "maintained, and enjoyed by other people, you should be polite, write your code\n", "in a clear, readable fashion, and avoid the use of devious tactics such as\n", "monkey-patching.\n", "\n", "\n", - "__However__, while monkey-patching may seem like a horrific programming\n", + "**However**, while monkey-patching may seem like a horrific programming\n", "practice to those of you coming from the realms of C++, Java, and the like,\n", - "(and it is horrific in many cases), it can be _extremely_ useful in certain\n", + "(and it is horrific in many cases), it can be *extremely* useful in certain\n", "circumstances. For instance, monkey-patching makes [unit testing a\n", - "breeze in Python](https://docs.python.org/3.5/library/unittest.mock.html).\n", + "breeze in Python](https://docs.python.org/3/library/unittest.mock.html).\n", "\n", "\n", "As another example, consider the scenario where you are dependent on a third\n", @@ -1703,7 +1805,7 @@ "metadata": {}, "source": [ "> <sup>4</sup>Another option is the [`functools.singledispatch`\n", - "> decorator](https://docs.python.org/3.5/library/functools.html#functools.singledispatch),\n", + "> decorator](https://docs.python.org/3/library/functools.html#functools.singledispatch),\n", "> which is more complicated, but may allow you to write your dispatch logic in\n", "> a more concise manner.\n", "\n", @@ -1716,8 +1818,8 @@ "workings of classes and objects, so these pages are worth a read:\n", "\n", "\n", - "* https://docs.python.org/3.5/tutorial/classes.html\n", - "* https://docs.python.org/3.5/reference/datamodel.html" + "* https://docs.python.org/3/tutorial/classes.html\n", + "* https://docs.python.org/3/reference/datamodel.html" ] } ], diff --git a/advanced_topics/03_object_oriented_programming.md b/advanced_topics/03_object_oriented_programming.md index 5ffbfc7c63b3be4b0704a717852a49bdffc1efd7..21ec9a676ead32d7d8f6577e7f91f141f9437d49 100644 --- a/advanced_topics/03_object_oriented_programming.md +++ b/advanced_topics/03_object_oriented_programming.md @@ -19,6 +19,7 @@ you use an object-oriented approach. * [We didn't specify the `self` argument - what gives?!?](#we-didnt-specify-the-self-argument) * [Attributes](#attributes) * [Methods](#methods) +* [Method chaining](#method-chaining) * [Protecting attribute access](#protecting-attribute-access) * [A better way - properties](#a-better-way-properties]) * [Inheritance](#inheritance) @@ -46,8 +47,8 @@ section. If you have not done any object-oriented programming before, your first step -is to understand the difference between _objects_ (also known as -_instances_) and _classes_ (also known as _types_). +is to understand the difference between *objects* (also known as +*instances*) and *classes* (also known as *types*). If you have some experience in C, then you can start off by thinking of a @@ -66,8 +67,8 @@ layout of a chunk of memory. For example, here is a typical struct definition: > ``` -Now, an _object_ is not a definition, but rather a thing which resides in -memory. An object can have _attributes_ (pieces of information), and _methods_ +Now, an *object* is not a definition, but rather a thing which resides in +memory. An object can have *attributes* (pieces of information), and *methods* (functions associated with the object). You can pass objects around your code, manipulate their attributes, and call their methods. @@ -92,12 +93,12 @@ you create an object from that class. Of course there are many more differences between C structs and classes (most notably [inheritance](todo), [polymorphism](todo), and [access protection](todo)). But if you can understand the difference between a -_definition_ of a C struct, and an _instantiation_ of that struct, then you -are most of the way towards understanding the difference between a _class_, -and an _object_. +*definition* of a C struct, and an *instantiation* of that struct, then you +are most of the way towards understanding the difference between a *class*, +and an *object*. -> But just to confuse you, remember that in Python, __everything__ is an +> But just to confuse you, remember that in Python, **everything** is an > object - even classes! @@ -206,7 +207,7 @@ print(fm) Refer to the [official -docs](https://docs.python.org/3.5/reference/datamodel.html#special-method-names) +docs](https://docs.python.org/3/reference/datamodel.html#special-method-names) for details on all of the special methods that can be defined in a class. And take a look at the appendix for some more details on [how Python objects get created](appendix-init-versus-new). @@ -352,8 +353,8 @@ append a tuple to that `operations` list. The idea behind this design is that our `FSLMaths` class will not actually do anything when we call the `add`, `mul` or `div` methods. Instead, it will -"stage" each operation, and then perform them all in one go. So let's add -another method, `run`, which actually does the work: +*stage* each operation, and then perform them all in one go at a later point +in time. So let's add another method, `run`, which actually does the work: ``` @@ -387,7 +388,6 @@ class FSLMaths(object): if isinstance(value, nib.nifti1.Nifti1Image): value = value.get_data() - if oper == 'add': data = data + value elif oper == 'mul': @@ -430,6 +430,99 @@ print('Number of voxels >0 in masked image: {}'.format(nmaskvox)) ``` +<a class="anchor" id="method-chaining"></a> +## Method chaining + + +A neat trick, which is used by all the cool kids these days, is to write +classes that allow *method chaining* - writing one line of code which +calls more than one method on an object, e.g.: + +> ``` +> fm = FSLMaths(img) +> result = fm.add(1).mul(10).run() +> ``` + +Adding this feature to our budding `FSLMaths` class is easy - all we have +to do is return `self` from each method: + +``` +import numpy as np +import nibabel as nib + +class FSLMaths(object): + + def __init__(self, inimg): + self.img = inimg + self.operations = [] + + def add(self, value): + self.operations.append(('add', value)) + return self + + def mul(self, value): + self.operations.append(('mul', value)) + return self + + def div(self, value): + self.operations.append(('div', value)) + return self + + def run(self, output=None): + + data = np.array(self.img.get_data()) + + for oper, value in self.operations: + + # Value could be an image. + # If not, we assume that + # it is a scalar/numpy array. + if isinstance(value, nib.nifti1.Nifti1Image): + value = value.get_data() + + if oper == 'add': + data = data + value + elif oper == 'mul': + data = data * value + elif oper == 'div': + data = data / value + + # turn final output into a nifti, + # and save it to disk if an + # 'output' has been specified. + outimg = nib.nifti1.Nifti1Image(data, inimg.affine) + + if output is not None: + nib.save(outimg, output) + + return outimg +``` + + +Now we can chain all of our method calls, and even the creation of our +`FSLMaths` object, into a single line: + + +``` +fpath = op.expandvars('$FSLDIR/data/standard/MNI152_T1_2mm.nii.gz') +fmask = op.expandvars('$FSLDIR/data/standard/MNI152_T1_2mm_brain_mask.nii.gz') +inimg = nib.load(fpath) +mask = nib.load(fmask) + +outimg = FSLMaths(inimg).mul(mask).add(-10).run() + +norigvox = (inimg .get_data() > 0).sum() +nmaskvox = (outimg.get_data() > 0).sum() + +print('Number of voxels >0 in original image: {}'.format(norigvox)) +print('Number of voxels >0 in masked image: {}'.format(nmaskvox)) +``` + +> In fact, this is precisely how the +> [`fsl.wrappers.fslmaths`](https://users.fmrib.ox.ac.uk/~paulmc/fsleyes/fslpy/latest/fsl.wrappers.fslmaths.html) +> function works. + + <a class="anchor" id="protecting-attribute-access"></a> ## Protecting attribute access @@ -488,9 +581,8 @@ of an object. This is in contrast to languages like C++ and Java, where the notion of a private attribute or method is strictly enforced by the language. -However, there are a couple of conventions in Python that are [universally -adhered -to](https://docs.python.org/3.5/tutorial/classes.html#private-variables): +However, there are a couple of conventions in Python that are +[universally adhered to](https://docs.python.org/3/tutorial/classes.html#private-variables): * Class-level attributes and methods, and module-level attributes, functions, and classes, which begin with a single underscore (`_`), should be @@ -504,14 +596,13 @@ to](https://docs.python.org/3.5/tutorial/classes.html#private-variables): enforcement for this rule - any attribute or method with such a name will actually be _renamed_ (in a standardised manner) at runtime, so that it is not accessible through its original name (it is still accessible via its - [mangled - name](https://docs.python.org/3.5/tutorial/classes.html#private-variables) + [mangled name](https://docs.python.org/3/tutorial/classes.html#private-variables) though). > <sup>2</sup> With the exception that module-level fields which begin with a > single underscore will not be imported into the local scope via the -> `from [module] import *` techinque. +> `from [module] import *` technique. So with all of this in mind, we can adjust our `FSLMaths` class to discourage @@ -541,7 +632,7 @@ print(fm.__img) Python has a feature called -[`properties`](https://docs.python.org/3.5/library/functions.html#property), +[`properties`](https://docs.python.org/3/library/functions.html#property), which is a nice way of controlling access to the attributes of an object. We can use properties by defining a "getter" method which can be used to access our attributes, and "decorating" them with the `@property` decorator (we will @@ -676,17 +767,17 @@ class Chihuahua(Dog): Hopefully this example doesn't need much in the way of explanation - this -collection of classes captures a hierarchical relationship which exists in the -real world (and also captures the inherently annoying nature of +collection of classes represents a hierarchical relationship which exists in +the real world (and also represents the inherently annoying nature of chihuahuas). For example, in the real world, all dogs are animals, but not all animals are dogs. Therefore in our model, the `Dog` class has specified -`Animal` as its base class. We say that the `Dog` class _extends_, _derives -from_, or _inherits from_, the `Animal` class, and that all `Dog` instances +`Animal` as its base class. We say that the `Dog` class *extends*, *derives +from*, or *inherits from*, the `Animal` class, and that all `Dog` instances are also `Animal` instances (but not vice-versa). What does that `noiseMade` method do? There is a `noiseMade` method defined -on the `Animal` class, but it has been re-implemented, or _overridden_ in the +on the `Animal` class, but it has been re-implemented, or *overridden* in the `Dog`, [`TalkingDog`](https://twitter.com/simpsonsqotd/status/427941665836630016?lang=en), `Cat`, and `Chihuahua` classes (but not on the `Labrador` class). We can call @@ -819,7 +910,7 @@ This line invokes `Operator.__init__` - the initialisation method for the In Python, we can use the [built-in `super` -method](https://docs.python.org/3.5/library/functions.html#super) to take care +method](https://docs.python.org/3/library/functions.html#super) to take care of correctly calling methods that are defined in an object's base-class (or classes, in the case of [multiple inheritance](multiple-inheritance)). @@ -920,8 +1011,8 @@ print(so.run('python is an ok language')) ### Polymorphism -Inheritance also allows us to take advantage of _polymorphism_, which refers -to idea that, in an object-oriented language, we should be able to use an +Inheritance also allows us to take advantage of *polymorphism*, which refers +to the idea that, in an object-oriented language, we should be able to use an object without having complete knowledge about the class, or type, of that object. For example, we should be able to write a function which expects an `Operator` instance, but which will work on an instance of any `Operator` @@ -1110,12 +1201,15 @@ class FSLMaths(object): def add(self, value): self.operations.append(('add', value)) + return self def mul(self, value): self.operations.append(('mul', value)) + return self def div(self, value): self.operations.append(('div', value)) + return self def run(self, output=None): @@ -1125,12 +1219,15 @@ class FSLMaths(object): # Code omitted for brevity - # Increment the usage counter - # for this operation. We can - # access class attributes (and - # methods) through the class - # itself. - FSLMaths.opCounters[oper] = self.opCounters.get(oper, 0) + 1 + # Increment the usage counter for this operation. We can + # access class attributes (and methods) through the class + # itself, as shown here. + FSLMaths.opCounters[oper] = FSLMaths.opCounters.get(oper, 0) + 1 + + # It is also possible to access class-level + # attributes via instances of the class, e.g. + # self.opCounters[oper] = self.opCounters.get(oper, 0) + 1 + ``` @@ -1143,17 +1240,8 @@ fmask = op.expandvars('$FSLDIR/data/standard/MNI152_T1_2mm_brain_mask.nii.gz') inimg = nib.load(fpath) mask = nib.load(fmask) -fm1 = FSLMaths(inimg) -fm2 = FSLMaths(inimg) - -fm1.mul(mask) -fm1.add(15) - -fm2.add(25) -fm1.div(1.5) - -fm1.run() -fm2.run() +FSLMaths(inimg).mul(mask).add(25).run() +FSLMaths(inimg).add(15).div(1.5).run() print('FSLMaths usage statistics') for oper in ('add', 'div', 'mul'): @@ -1194,12 +1282,15 @@ class FSLMaths(object): def add(self, value): self.operations.append(('add', value)) + return self def mul(self, value): self.operations.append(('mul', value)) + return self def div(self, value): self.operations.append(('div', value)) + return self def run(self, output=None): @@ -1213,11 +1304,11 @@ class FSLMaths(object): > There is another decorator - > [`@staticmethod`](https://docs.python.org/3.5/library/functions.html#staticmethod) - > which can be used on methods defined within a class. The difference -> between a `@classmethod` and a `@staticmethod` is that the latter will _not_ +> between a `@classmethod` and a `@staticmethod` is that the latter will *not* > be passed the class (`cls`). -calling a class method is the same as accessing a class attribute: +Calling a class method is the same as accessing a class attribute: ``` @@ -1226,14 +1317,8 @@ fmask = op.expandvars('$FSLDIR/data/standard/MNI152_T1_2mm_brain_mask.nii.gz') inimg = nib.load(fpath) mask = nib.load(fmask) -fm1 = FSLMaths(inimg) -fm2 = FSLMaths(inimg) - -fm1.mul(mask) -fm1.add(15) - -fm2.add(25) -fm1.div(1.5) +fm1 = FSLMaths(inimg).mul(mask).add(25) +fm2 = FSLMaths(inimg).add(15).div(1.5) fm1.run() fm2.run() @@ -1294,9 +1379,9 @@ always use the new-style format. ## Appendix: `__init__` versus `__new__` -In Python, object creation is actually a two-stage process - _creation_, and -then _initialisation_. The `__init__` method gets called during the -_initialisation_ stage - its job is to initialise the state of the object. But +In Python, object creation is actually a two-stage process - *creation*, and +then *initialisation*. The `__init__` method gets called during the +*initialisation* stage - its job is to initialise the state of the object. But note that, by the time `__init__` gets called, the object has already been created. @@ -1314,7 +1399,7 @@ A brief explanation on the difference between `__new__` and `__init__` can be found [here](https://www.reddit.com/r/learnpython/comments/2s3pms/what_is_the_difference_between_init_and_new/cnm186z/), and you may also wish to take a look at the [official Python -docs](https://docs.python.org/3.5/reference/datamodel.html#basic-customization). +docs](https://docs.python.org/3/reference/datamodel.html#basic-customization). <a class="anchor" id="appendix-monkey-patching"></a> @@ -1322,24 +1407,24 @@ docs](https://docs.python.org/3.5/reference/datamodel.html#basic-customization). The act of run-time modification of objects or class definitions is referred -to as [_monkey-patching_](https://en.wikipedia.org/wiki/Monkey_patch) and, +to as [*monkey-patching*](https://en.wikipedia.org/wiki/Monkey_patch) and, whilst it is allowed by the Python programming language, it is generally considered quite bad practice. -Just because you _can_ do something doesn't mean that you _should_. Python +Just because you *can* do something doesn't mean that you *should*. Python gives you the flexibility to write your software in whatever manner you deem -suitable. __But__ if you want to write software that will be used, adopted, +suitable. **But** if you want to write software that will be used, adopted, maintained, and enjoyed by other people, you should be polite, write your code in a clear, readable fashion, and avoid the use of devious tactics such as monkey-patching. -__However__, while monkey-patching may seem like a horrific programming +**However**, while monkey-patching may seem like a horrific programming practice to those of you coming from the realms of C++, Java, and the like, -(and it is horrific in many cases), it can be _extremely_ useful in certain +(and it is horrific in many cases), it can be *extremely* useful in certain circumstances. For instance, monkey-patching makes [unit testing a -breeze in Python](https://docs.python.org/3.5/library/unittest.mock.html). +breeze in Python](https://docs.python.org/3/library/unittest.mock.html). As another example, consider the scenario where you are dependent on a third @@ -1398,7 +1483,7 @@ print('Add four: {}'.format(a.add(1, 2, 3, 4))) ``` > <sup>4</sup>Another option is the [`functools.singledispatch` -> decorator](https://docs.python.org/3.5/library/functools.html#functools.singledispatch), +> decorator](https://docs.python.org/3/library/functools.html#functools.singledispatch), > which is more complicated, but may allow you to write your dispatch logic in > a more concise manner. @@ -1411,5 +1496,5 @@ The official Python documentation has a wealth of information on the internal workings of classes and objects, so these pages are worth a read: -* https://docs.python.org/3.5/tutorial/classes.html -* https://docs.python.org/3.5/reference/datamodel.html +* https://docs.python.org/3/tutorial/classes.html +* https://docs.python.org/3/reference/datamodel.html diff --git a/advanced_topics/04_operator_overloading.ipynb b/advanced_topics/04_operator_overloading.ipynb index 567fd7e6ade801b930979254bc95954d723f34d9..c860fb3391cc115296667afe418fa559ef28d323 100644 --- a/advanced_topics/04_operator_overloading.ipynb +++ b/advanced_topics/04_operator_overloading.ipynb @@ -14,13 +14,13 @@ "Operator overloading, in an object-oriented programming language, is the\n", "process of customising the behaviour of _operators_ (e.g. `+`, `*`, `/` and\n", "`-`) on user-defined types. This practical aims to show you that operator\n", - "overloading is __very__ easy to do in Python.\n", + "overloading is **very** easy to do in Python.\n", "\n", "\n", "This practical gives a brief overview of the operators which you may be most\n", "interested in implementing. However, there are many operators (and other\n", "special methods) which you can support in your own classes - the [official\n", - "documentation](https://docs.python.org/3.5/reference/datamodel.html#basic-customization)\n", + "documentation](https://docs.python.org/3/reference/datamodel.html#basic-customization)\n", "is the best reference if you are interested in learning more.\n", "\n", "\n", @@ -243,7 +243,7 @@ "operands. For example, in the expression `a + b`, if `a.__add__` is not\n", "implemented, but but `b.__radd__` is implemented, then the latter will be\n", "called. Take a look at the [official\n", - "documentation](https://docs.python.org/3.5/reference/datamodel.html#emulating-numeric-types)\n", + "documentation](https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types)\n", "for further details, including a full list of the arithmetic and logical\n", "operators that your classes can support.\n", "\n", @@ -346,9 +346,9 @@ "metadata": {}, "source": [ "The\n", - "[`@functools.total_ordering`](https://docs.python.org/3.5/library/functools.html#functools.total_ordering)\n", + "[`@functools.total_ordering`](https://docs.python.org/3/library/functools.html#functools.total_ordering)\n", "is a convenience\n", - "[decorator](https://docs.python.org/3.5/glossary.html#term-decorator) which,\n", + "[decorator](https://docs.python.org/3/glossary.html#term-decorator) which,\n", "given a class that implements equality and a single comparison function\n", "(`__lt__` in the above code), will \"fill in\" the remainder of the comparison\n", "operators. If you need very specific or complicated behaviour, then you can\n", @@ -365,7 +365,7 @@ "\n", "\n", "Refer to the [official\n", - "documentation](https://docs.python.org/3.5/reference/datamodel.html#object.__lt__)\n", + "documentation](https://docs.python.org/3/reference/datamodel.html#object.__lt__)\n", "for all of the details on supporting comparison operators.\n", "\n", "\n", @@ -387,13 +387,13 @@ "with the `[]` operator. All that is needed to support them are to implement\n", "three special methods in your class, regardless of whether your class will be\n", "indexed by sequential integers (like a `list`) or by\n", - "[hashable](https://docs.python.org/3.5/glossary.html#term-hashable) values\n", + "[hashable](https://docs.python.org/3/glossary.html#term-hashable) values\n", "(like a `dict`):\n", "\n", "\n", - "- __Retrieval__ is performed by the `__getitem__` method\n", - "- __Assignment__ is performed by the `__setitem__` method\n", - "- __Deletion__ is performed by the `__delitem__` method\n", + "- **Retrieval** is performed by the `__getitem__` method\n", + "- **Assignment** is performed by the `__setitem__` method\n", + "- **Deletion** is performed by the `__delitem__` method\n", "\n", "\n", "Note that, if you implement these methods in your own class, there is no\n", @@ -496,7 +496,7 @@ "metadata": {}, "source": [ "If you wish to support the Python `start:stop:step` [slice\n", - "notation](https://docs.python.org/3.5/library/functions.html#slice), you\n", + "notation](https://docs.python.org/3/library/functions.html#slice), you\n", "simply need to write your `__getitem__` and `__setitem__` methods so that they\n", "can detect `slice` objects:" ] @@ -556,7 +556,7 @@ "> different hashing algorithm), the `Sequence` and `MutableMapping` classes\n", "> are [a better choice](https://stackoverflow.com/a/7148602) - you can find\n", "> them in the\n", - "> [`collections.abc`](https://docs.python.org/3.5/library/collections.abc.html)\n", + "> [`collections.abc`](https://docs.python.org/3/library/collections.abc.html)\n", "> module.\n", "\n", "\n", @@ -630,7 +630,7 @@ "metadata": {}, "source": [ "> The `TimedFunction` class is conceptually very similar to a\n", - "> [decorator](https://docs.python.org/3.5/glossary.html#term-decorator) -\n", + "> [decorator](https://docs.python.org/3/glossary.html#term-decorator) -\n", "> decorators are covered in another practical.\n", "\n", "\n", @@ -643,7 +643,7 @@ "quite a niche feature, and it is easy to trip yourself up, so if you wish to\n", "use this in your own project, make sure that you carefully read (and\n", "understand) [the\n", - "documentation](https://docs.python.org/3.5/reference/datamodel.html#customizing-attribute-access),\n", + "documentation](https://docs.python.org/3/reference/datamodel.html#customizing-attribute-access),\n", "and test your code comprehensively!\n", "\n", "\n", diff --git a/advanced_topics/04_operator_overloading.md b/advanced_topics/04_operator_overloading.md index 3446401f9fa65815b69b2617ba76edcf85747d9c..ed386e736f32e1466d7d67fc1219ce834c49d07a 100644 --- a/advanced_topics/04_operator_overloading.md +++ b/advanced_topics/04_operator_overloading.md @@ -8,13 +8,13 @@ Operator overloading, in an object-oriented programming language, is the process of customising the behaviour of _operators_ (e.g. `+`, `*`, `/` and `-`) on user-defined types. This practical aims to show you that operator -overloading is __very__ easy to do in Python. +overloading is **very** easy to do in Python. This practical gives a brief overview of the operators which you may be most interested in implementing. However, there are many operators (and other special methods) which you can support in your own classes - the [official -documentation](https://docs.python.org/3.5/reference/datamodel.html#basic-customization) +documentation](https://docs.python.org/3/reference/datamodel.html#basic-customization) is the best reference if you are interested in learning more. @@ -173,7 +173,7 @@ rules are followed depending on the set of methods implemented on the operands. For example, in the expression `a + b`, if `a.__add__` is not implemented, but but `b.__radd__` is implemented, then the latter will be called. Take a look at the [official -documentation](https://docs.python.org/3.5/reference/datamodel.html#emulating-numeric-types) +documentation](https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types) for further details, including a full list of the arithmetic and logical operators that your classes can support. @@ -252,9 +252,9 @@ print(sorted((l3, l1, l2))) The -[`@functools.total_ordering`](https://docs.python.org/3.5/library/functools.html#functools.total_ordering) +[`@functools.total_ordering`](https://docs.python.org/3/library/functools.html#functools.total_ordering) is a convenience -[decorator](https://docs.python.org/3.5/glossary.html#term-decorator) which, +[decorator](https://docs.python.org/3/glossary.html#term-decorator) which, given a class that implements equality and a single comparison function (`__lt__` in the above code), will "fill in" the remainder of the comparison operators. If you need very specific or complicated behaviour, then you can @@ -271,7 +271,7 @@ and just one of `__lt__`, `__le__`, `__gt__` or `__ge__`. Refer to the [official -documentation](https://docs.python.org/3.5/reference/datamodel.html#object.__lt__) +documentation](https://docs.python.org/3/reference/datamodel.html#object.__lt__) for all of the details on supporting comparison operators. @@ -293,13 +293,13 @@ At its essence, there are only three types of behaviours that are possible with the `[]` operator. All that is needed to support them are to implement three special methods in your class, regardless of whether your class will be indexed by sequential integers (like a `list`) or by -[hashable](https://docs.python.org/3.5/glossary.html#term-hashable) values +[hashable](https://docs.python.org/3/glossary.html#term-hashable) values (like a `dict`): -- __Retrieval__ is performed by the `__getitem__` method -- __Assignment__ is performed by the `__setitem__` method -- __Deletion__ is performed by the `__delitem__` method +- **Retrieval** is performed by the `__getitem__` method +- **Assignment** is performed by the `__setitem__` method +- **Deletion** is performed by the `__delitem__` method Note that, if you implement these methods in your own class, there is no @@ -370,7 +370,7 @@ print(tt['12345']) If you wish to support the Python `start:stop:step` [slice -notation](https://docs.python.org/3.5/library/functions.html#slice), you +notation](https://docs.python.org/3/library/functions.html#slice), you simply need to write your `__getitem__` and `__setitem__` methods so that they can detect `slice` objects: @@ -414,7 +414,7 @@ print(tt[::2]) > different hashing algorithm), the `Sequence` and `MutableMapping` classes > are [a better choice](https://stackoverflow.com/a/7148602) - you can find > them in the -> [`collections.abc`](https://docs.python.org/3.5/library/collections.abc.html) +> [`collections.abc`](https://docs.python.org/3/library/collections.abc.html) > module. @@ -472,7 +472,7 @@ inv = tf(data) > The `TimedFunction` class is conceptually very similar to a -> [decorator](https://docs.python.org/3.5/glossary.html#term-decorator) - +> [decorator](https://docs.python.org/3/glossary.html#term-decorator) - > decorators are covered in another practical. @@ -485,7 +485,7 @@ the attributes and methods of an object. This is very powerful, but is also quite a niche feature, and it is easy to trip yourself up, so if you wish to use this in your own project, make sure that you carefully read (and understand) [the -documentation](https://docs.python.org/3.5/reference/datamodel.html#customizing-attribute-access), +documentation](https://docs.python.org/3/reference/datamodel.html#customizing-attribute-access), and test your code comprehensively! diff --git a/advanced_topics/05_context_managers.ipynb b/advanced_topics/05_context_managers.ipynb index ad3c231ca860ce526be06337f65cc74a1e09920d..79d81e0697c15f7bb984363f9c69fba28cd4aeb5 100644 --- a/advanced_topics/05_context_managers.ipynb +++ b/advanced_topics/05_context_managers.ipynb @@ -31,8 +31,8 @@ "\n", "\n", "The `with` statement is obviously hiding some internal details from us. But\n", - "these internals are in fact quite straightforward, and are known as [_context\n", - "managers_](https://docs.python.org/3.5/reference/datamodel.html#context-managers).\n", + "these internals are in fact quite straightforward, and are known as [*context\n", + "managers*](https://docs.python.org/3/reference/datamodel.html#context-managers).\n", "\n", "\n", "* [Anatomy of a context manager](#anatomy-of-a-context-manager)\n", @@ -50,7 +50,7 @@ "## Anatomy of a context manager\n", "\n", "\n", - "A _context manager_ is simply an object which has two specially named methods\n", + "A *context manager* is simply an object which has two specially named methods\n", "`__enter__` and `__exit__`. Any object which has these methods can be used in\n", "a `with` statement.\n", "\n", @@ -127,7 +127,7 @@ "\n", "Context managers do not provide anything that cannot be accomplished in other\n", "ways. For example, we could accomplish very similar behaviour using\n", - "[`try` - `finally` logic](https://docs.python.org/3.5/tutorial/errors.html#handling-exceptions) -\n", + "[`try` - `finally` logic](https://docs.python.org/3/tutorial/errors.html#handling-exceptions) -\n", "the statements in the `finally` clause will *always* be executed, whether an\n", "error is raised or not:" ] @@ -253,7 +253,7 @@ "By now you must be [panicking](https://youtu.be/cSU_5MgtDc8?t=9) about why I\n", "haven't mentioned those conspicuous `*args` that get passed to the`__exit__`\n", "method. It turns out that a context manager's [`__exit__`\n", - "method](https://docs.python.org/3.5/reference/datamodel.html#object.__exit__)\n", + "method](https://docs.python.org/3/reference/datamodel.html#object.__exit__)\n", "is always passed three arguments.\n", "\n", "\n", @@ -321,10 +321,10 @@ "source": [ "So when an error occurs, the `__exit__` method is passed the following:\n", "\n", - "- The [`Exception`](https://docs.python.org/3.5/tutorial/errors.html)\n", + "- The [`Exception`](https://docs.python.org/3/tutorial/errors.html)\n", " type that was raised.\n", "- The `Exception` instance that was raised.\n", - "- A [`traceback`](https://docs.python.org/3.5/library/traceback.html) object\n", + "- A [`traceback`](https://docs.python.org/3/library/traceback.html) object\n", " which can be used to get more information about the exception (e.g. line\n", " number).\n", "\n", @@ -365,7 +365,7 @@ "> Note that if a function or method does not explicitly return a value, its\n", "> return value is `None` (which would evaluate to `False` when converted to a\n", "> `bool`). Also note that we are using the built-in\n", - "> [`issubclass`](https://docs.python.org/3.5/library/functions.html#issubclass)\n", + "> [`issubclass`](https://docs.python.org/3/library/functions.html#issubclass)\n", "> function, which allows us to test the type of a class.\n", "\n", "\n", @@ -440,7 +440,7 @@ "\n", "In fact, there is another way to create context managers in Python. The\n", "built-in [`contextlib`\n", - "module](https://docs.python.org/3.5/library/contextlib.html#contextlib.contextmanager)\n", + "module](https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager)\n", "has a decorator called `@contextmanager`, which allows us to turn __any\n", "function__ into a context manager. The only requirement is that the function\n", "must have a `yield` statement<sup>1</sup>. So we could rewrite our `TempDir`\n", @@ -503,11 +503,11 @@ "the variable `tmp` will be given the value `tdir`.\n", "\n", "\n", - "> <sup>1</sup> The `yield` keyword is used in _generator functions_.\n", + "> <sup>1</sup> The `yield` keyword is used in *generator functions*.\n", "> Functions which are used with the `@contextmanager` decorator must be\n", "> generator functions which yield exactly one value.\n", "> [Generators](https://www.python.org/dev/peps/pep-0289/) and [generator\n", - "> functions](https://docs.python.org/3.5/glossary.html#term-generator) are\n", + "> functions](https://docs.python.org/3/glossary.html#term-generator) are\n", "> beyond the scope of this practical.\n", "\n", "\n", @@ -774,8 +774,7 @@ "metadata": {}, "source": [ "This new `holdUpdates` method allows us to temporarily suppress notifications\n", - "from all `Line` instances. So now, we can update many `Line` properties\n", - "without performing any redundant redraws:" + "from all `Line` instances. Let's create a new plot:" ] }, { @@ -787,12 +786,27 @@ "fig = plt.figure()\n", "ax = fig.add_subplot(111)\n", "plotter = Plotter(ax)\n", + "l1 = plotter.addData(np.sin(np.linspace(0, 6 * np.pi, 50)))\n", + "l2 = plotter.addData(np.cos(np.linspace(0, 6 * np.pi, 50)))\n", "\n", - "plt.show()\n", - "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can update many `Line` properties without performing any redundant\n", + "redraws:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "with plotter.holdUpdates():\n", - " l1 = plotter.addData(np.sin(np.linspace(0, 6 * np.pi, 50)))\n", - " l2 = plotter.addData(np.cos(np.linspace(0, 6 * np.pi, 50)))\n", " l1.colour = '#0000ff'\n", " l2.colour = '#ffff00'\n", " l1.width = 1\n", @@ -809,8 +823,8 @@ "## Useful references\n", "\n", "\n", - "* [Context manager classes](https://docs.python.org/3.5/reference/datamodel.html#context-managers)\n", - "* The [`contextlib` module](https://docs.python.org/3.5/library/contextlib.html)" + "* [Context manager classes](https://docs.python.org/3/reference/datamodel.html#context-managers)\n", + "* The [`contextlib` module](https://docs.python.org/3/library/contextlib.html)" ] } ], diff --git a/advanced_topics/05_context_managers.md b/advanced_topics/05_context_managers.md index 2229471e09769cb25159ea203f4e598527b33b8a..a06cefae9fc1b96bd8aea05a0cc5e32136b0d093 100644 --- a/advanced_topics/05_context_managers.md +++ b/advanced_topics/05_context_managers.md @@ -17,8 +17,8 @@ automatically, even if an error occurs inside the `with` statement. The `with` statement is obviously hiding some internal details from us. But -these internals are in fact quite straightforward, and are known as [_context -managers_](https://docs.python.org/3.5/reference/datamodel.html#context-managers). +these internals are in fact quite straightforward, and are known as [*context +managers*](https://docs.python.org/3/reference/datamodel.html#context-managers). * [Anatomy of a context manager](#anatomy-of-a-context-manager) @@ -36,7 +36,7 @@ managers_](https://docs.python.org/3.5/reference/datamodel.html#context-managers ## Anatomy of a context manager -A _context manager_ is simply an object which has two specially named methods +A *context manager* is simply an object which has two specially named methods `__enter__` and `__exit__`. Any object which has these methods can be used in a `with` statement. @@ -89,7 +89,7 @@ finalisation logic that we always want to have executed. Context managers do not provide anything that cannot be accomplished in other ways. For example, we could accomplish very similar behaviour using -[`try` - `finally` logic](https://docs.python.org/3.5/tutorial/errors.html#handling-exceptions) - +[`try` - `finally` logic](https://docs.python.org/3/tutorial/errors.html#handling-exceptions) - the statements in the `finally` clause will *always* be executed, whether an error is raised or not: @@ -183,7 +183,7 @@ with TempDir(): By now you must be [panicking](https://youtu.be/cSU_5MgtDc8?t=9) about why I haven't mentioned those conspicuous `*args` that get passed to the`__exit__` method. It turns out that a context manager's [`__exit__` -method](https://docs.python.org/3.5/reference/datamodel.html#object.__exit__) +method](https://docs.python.org/3/reference/datamodel.html#object.__exit__) is always passed three arguments. @@ -227,10 +227,10 @@ with MyContextManager(): So when an error occurs, the `__exit__` method is passed the following: -- The [`Exception`](https://docs.python.org/3.5/tutorial/errors.html) +- The [`Exception`](https://docs.python.org/3/tutorial/errors.html) type that was raised. - The `Exception` instance that was raised. -- A [`traceback`](https://docs.python.org/3.5/library/traceback.html) object +- A [`traceback`](https://docs.python.org/3/library/traceback.html) object which can be used to get more information about the exception (e.g. line number). @@ -262,7 +262,7 @@ class MyContextManager(object): > Note that if a function or method does not explicitly return a value, its > return value is `None` (which would evaluate to `False` when converted to a > `bool`). Also note that we are using the built-in -> [`issubclass`](https://docs.python.org/3.5/library/functions.html#issubclass) +> [`issubclass`](https://docs.python.org/3/library/functions.html#issubclass) > function, which allows us to test the type of a class. @@ -312,7 +312,7 @@ with open('05_context_managers.md', 'rt') as inf, \ In fact, there is another way to create context managers in Python. The built-in [`contextlib` -module](https://docs.python.org/3.5/library/contextlib.html#contextlib.contextmanager) +module](https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager) has a decorator called `@contextmanager`, which allows us to turn __any function__ into a context manager. The only requirement is that the function must have a `yield` statement<sup>1</sup>. So we could rewrite our `TempDir` @@ -359,11 +359,11 @@ to be passed to the `with` statement, so in the line `with tempdir() as tmp`, the variable `tmp` will be given the value `tdir`. -> <sup>1</sup> The `yield` keyword is used in _generator functions_. +> <sup>1</sup> The `yield` keyword is used in *generator functions*. > Functions which are used with the `@contextmanager` decorator must be > generator functions which yield exactly one value. > [Generators](https://www.python.org/dev/peps/pep-0289/) and [generator -> functions](https://docs.python.org/3.5/glossary.html#term-generator) are +> functions](https://docs.python.org/3/glossary.html#term-generator) are > beyond the scope of this practical. @@ -582,20 +582,24 @@ class Plotter(object): This new `holdUpdates` method allows us to temporarily suppress notifications -from all `Line` instances. So now, we can update many `Line` properties -without performing any redundant redraws: +from all `Line` instances. Let's create a new plot: ``` fig = plt.figure() ax = fig.add_subplot(111) plotter = Plotter(ax) +l1 = plotter.addData(np.sin(np.linspace(0, 6 * np.pi, 50))) +l2 = plotter.addData(np.cos(np.linspace(0, 6 * np.pi, 50))) plt.show() +``` +Now, we can update many `Line` properties without performing any redundant +redraws: + +``` with plotter.holdUpdates(): - l1 = plotter.addData(np.sin(np.linspace(0, 6 * np.pi, 50))) - l2 = plotter.addData(np.cos(np.linspace(0, 6 * np.pi, 50))) l1.colour = '#0000ff' l2.colour = '#ffff00' l1.width = 1 @@ -609,5 +613,5 @@ with plotter.holdUpdates(): ## Useful references -* [Context manager classes](https://docs.python.org/3.5/reference/datamodel.html#context-managers) -* The [`contextlib` module](https://docs.python.org/3.5/library/contextlib.html) +* [Context manager classes](https://docs.python.org/3/reference/datamodel.html#context-managers) +* The [`contextlib` module](https://docs.python.org/3/library/contextlib.html) diff --git a/advanced_topics/06_decorators.ipynb b/advanced_topics/06_decorators.ipynb index e7ad3814896813f70b44ee9d026285e87eb598e1..cbbf21f720944dccfe680f8d769aea480702bcff 100644 --- a/advanced_topics/06_decorators.ipynb +++ b/advanced_topics/06_decorators.ipynb @@ -130,7 +130,7 @@ "as its sole argument. It then creates and returns a new function,\n", "`wrapperFunc`. This `wrapperFunc` function calls and times the function that\n", "was passed to `timeFunc`. But note that when `timeFunc` is called,\n", - "`wrapperFunc` is _not_ called - it is only created and returned.\n", + "`wrapperFunc` is *not* called - it is only created and returned.\n", "\n", "\n", "Let's use our new `timeFunc` implementation:" @@ -189,9 +189,9 @@ "> [functions are not special](#appendix-functions-are-not-special).\n", "\n", "\n", - "Guess what? We have just created a __decorator__. A decorator is simply a\n", + "Guess what? We have just created a **decorator**. A decorator is simply a\n", "function which accepts a function as its input, and returns another function\n", - "as its output. In the example above, we have _decorated_ the `inverse`\n", + "as its output. In the example above, we have *decorated* the `inverse`\n", "function with the `timeFunc` decorator.\n", "\n", "\n", @@ -306,7 +306,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "So only one `wrapperFunc` function exists, and this function is _shared_ by\n", + "So only one `wrapperFunc` function exists, and this function is *shared* by\n", "all instances of the `MiscMaths` class - (such as the `mm1` and `mm2`\n", "instances in the example above). In many cases this is not a problem, but\n", "there can be situations where you need each instance of your class to have its\n", @@ -519,11 +519,11 @@ "metadata": {}, "source": [ "> We used the handy\n", - "> [`collections.OrderedDict`](https://docs.python.org/3.5/library/collections.html#collections.OrderedDict)\n", + "> [`collections.OrderedDict`](https://docs.python.org/3/library/collections.html#collections.OrderedDict)\n", "> class here which preserves the insertion order of key-value pairs.\n", "\n", "\n", - "This is starting to look a little complicated - we now have _three_ layers of\n", + "This is starting to look a little complicated - we now have *three* layers of\n", "functions. This is necessary when you wish to write a decorator which accepts\n", "arguments (refer to the\n", "[appendix](#appendix-decorators-without-arguments-versus-decorators-with-arguments)\n", @@ -656,7 +656,7 @@ "\n", "\n", "By now, you will have gained the impression that a decorator is a function\n", - "which _decorates_ another function. But if you went through the practical on\n", + "which *decorates* another function. But if you went through the practical on\n", "operator overloading, you might remember the special `__call__` method, that\n", "allows an object to be called as if it were a function.\n", "\n", @@ -773,17 +773,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> Unit testing is something which you must do! This is __especially__\n", + "> Unit testing is something which you must do! This is **especially**\n", "> important in an interpreted language such as Python, where there is no\n", "> compiler to catch all of your mistakes.\n", ">\n", "> Python has a built-in\n", - "> [`unittest`](https://docs.python.org/3.5/library/unittest.html) module,\n", + "> [`unittest`](https://docs.python.org/3/library/unittest.html) module,\n", "> however the third-party [`pytest`](https://docs.pytest.org/en/latest/) and\n", "> [`nose`](http://nose2.readthedocs.io/en/latest/) are popular. It is also\n", "> wise to combine your unit tests with\n", "> [`coverage`](https://coverage.readthedocs.io/en/coverage-4.5.1/), which\n", - "> tells you how much of your code was executed, or _covered_ when your\n", + "> tells you how much of your code was executed, or *covered* when your\n", "> tests were run.\n", "\n", "\n", @@ -955,7 +955,7 @@ "> If it bothers you that `print(inv2)` resulted in\n", "> `<function inverse at ...>`, and not `<function inv2 at ...>`, then refer to\n", "> the appendix on\n", - "> [preserving function metdata](#appendix-preserving-function-metadata).\n", + "> [preserving function metadata](#appendix-preserving-function-metadata).\n", "\n", "\n", "<a class=\"anchor\" id=\"appendix-closures\"></a>\n", @@ -963,7 +963,7 @@ "\n", "\n", "Whenever we define or use a decorator, we are taking advantage of a concept\n", - "called a [_closure_][wiki-closure]. Take a second to re-familiarise yourself\n", + "called a [*closure*][wiki-closure]. Take a second to re-familiarise yourself\n", "with our `memoize` decorator function from earlier - when `memoize` is called,\n", "it creates and returns a function called `wrapper`:\n", "\n", @@ -1041,7 +1041,7 @@ "\n", "\n", "This is what is known as a\n", - "[_closure_](https://www.geeksforgeeks.org/python-closures/). Closures are a\n", + "[*closure*](https://www.geeksforgeeks.org/python-closures/). Closures are a\n", "fundamental, and extremely powerful, aspect of Python and other high level\n", "languages. So there's your answer,\n", "[fishbulb](https://www.youtube.com/watch?v=CiAaEPcnlOg).\n", @@ -1101,7 +1101,7 @@ "\n", "\n", "But if a decorator function is \"called\" (scenarios 2 or 3), both the decorator\n", - "function (`decorator`), __and its return value__ (`wrapper`) are called - the\n", + "function (`decorator`), **and its return value** (`wrapper`) are called - the\n", "decorator function is passed the arguments that were provided, and its return\n", "value is passed the decorated function.\n", "\n", @@ -1281,7 +1281,7 @@ "mean that we couldn't use `ensureNumeric` with standalone functions.\n", "\n", "\n", - "But we _can_ manually apply the `ensureNumeric` decorator to `MiscMaths`\n", + "But we *can* manually apply the `ensureNumeric` decorator to `MiscMaths`\n", "instances when they are initialised. We can't use the nice `@ensureNumeric`\n", "syntax to apply our decorators, but this is a viable approach:" ] @@ -1311,7 +1311,7 @@ "Another approach is to use a second decorator, which dynamically creates the\n", "real decorator when it is accessed on an instance. This requires the use of an\n", "advanced Python technique called\n", - "[_descriptors_](https://docs.python.org/3.5/howto/descriptor.html), which is\n", + "[*descriptors*](https://docs.python.org/3/howto/descriptor.html), which is\n", "beyond the scope of this practical. But if you are interested, you can see an\n", "implementation of this approach\n", "[here](https://git.fmrib.ox.ac.uk/fsl/fslpy/blob/1.6.8/fsl/utils/memoize.py#L249).\n", @@ -1408,7 +1408,7 @@ "\n", "\n", "Fortunately, there is a workaround, available in the built-in\n", - "[`functools`](https://docs.python.org/3.5/library/functools.html#functools.wraps)\n", + "[`functools`](https://docs.python.org/3/library/functools.html#functools.wraps)\n", "module:" ] }, @@ -1462,11 +1462,11 @@ "## Appendix: Class decorators\n", "\n", "\n", - "> Not to be confused with [_decorator classes_](#decorator-classes)!\n", + "> Not to be confused with [*decorator classes*](#decorator-classes)!\n", "\n", "\n", "In this practical, we have shown how decorators can be applied to functions\n", - "and methods. But decorators can in fact also be applied to _classes_. This is\n", + "and methods. But decorators can in fact also be applied to *classes*. This is\n", "a fairly niche feature that you are probably not likely to need, so we will\n", "only cover it briefly.\n", "\n", diff --git a/advanced_topics/06_decorators.md b/advanced_topics/06_decorators.md index 8da29fa1998e88afbe115e589d792c31192ba3d4..590f232a57981aa88adac7f839331714dceb866b 100644 --- a/advanced_topics/06_decorators.md +++ b/advanced_topics/06_decorators.md @@ -100,7 +100,7 @@ This new `timeFunc` function is again passed a function `func`, but this time as its sole argument. It then creates and returns a new function, `wrapperFunc`. This `wrapperFunc` function calls and times the function that was passed to `timeFunc`. But note that when `timeFunc` is called, -`wrapperFunc` is _not_ called - it is only created and returned. +`wrapperFunc` is *not* called - it is only created and returned. Let's use our new `timeFunc` implementation: @@ -151,9 +151,9 @@ which holds a reference to the original definition of `inverse`. > [functions are not special](#appendix-functions-are-not-special). -Guess what? We have just created a __decorator__. A decorator is simply a +Guess what? We have just created a **decorator**. A decorator is simply a function which accepts a function as its input, and returns another function -as its output. In the example above, we have _decorated_ the `inverse` +as its output. In the example above, we have *decorated* the `inverse` function with the `timeFunc` decorator. @@ -228,7 +228,7 @@ MiscMaths.inverse = timeFunc(MiscMaths.inverse) ``` -So only one `wrapperFunc` function exists, and this function is _shared_ by +So only one `wrapperFunc` function exists, and this function is *shared* by all instances of the `MiscMaths` class - (such as the `mm1` and `mm2` instances in the example above). In many cases this is not a problem, but there can be situations where you need each instance of your class to have its @@ -400,11 +400,11 @@ def limitedMemoize(maxSize): ``` > We used the handy -> [`collections.OrderedDict`](https://docs.python.org/3.5/library/collections.html#collections.OrderedDict) +> [`collections.OrderedDict`](https://docs.python.org/3/library/collections.html#collections.OrderedDict) > class here which preserves the insertion order of key-value pairs. -This is starting to look a little complicated - we now have _three_ layers of +This is starting to look a little complicated - we now have *three* layers of functions. This is necessary when you wish to write a decorator which accepts arguments (refer to the [appendix](#appendix-decorators-without-arguments-versus-decorators-with-arguments) @@ -505,7 +505,7 @@ expensiveFunc(1) By now, you will have gained the impression that a decorator is a function -which _decorates_ another function. But if you went through the practical on +which *decorates* another function. But if you went through the practical on operator overloading, you might remember the special `__call__` method, that allows an object to be called as if it were a function. @@ -596,17 +596,17 @@ registry.runTests() ``` -> Unit testing is something which you must do! This is __especially__ +> Unit testing is something which you must do! This is **especially** > important in an interpreted language such as Python, where there is no > compiler to catch all of your mistakes. > > Python has a built-in -> [`unittest`](https://docs.python.org/3.5/library/unittest.html) module, +> [`unittest`](https://docs.python.org/3/library/unittest.html) module, > however the third-party [`pytest`](https://docs.pytest.org/en/latest/) and > [`nose`](http://nose2.readthedocs.io/en/latest/) are popular. It is also > wise to combine your unit tests with > [`coverage`](https://coverage.readthedocs.io/en/coverage-4.5.1/), which -> tells you how much of your code was executed, or _covered_ when your +> tells you how much of your code was executed, or *covered* when your > tests were run. @@ -713,7 +713,7 @@ as we like. > If it bothers you that `print(inv2)` resulted in > `<function inverse at ...>`, and not `<function inv2 at ...>`, then refer to > the appendix on -> [preserving function metdata](#appendix-preserving-function-metadata). +> [preserving function metadata](#appendix-preserving-function-metadata). <a class="anchor" id="appendix-closures"></a> @@ -721,7 +721,7 @@ as we like. Whenever we define or use a decorator, we are taking advantage of a concept -called a [_closure_][wiki-closure]. Take a second to re-familiarise yourself +called a [*closure*][wiki-closure]. Take a second to re-familiarise yourself with our `memoize` decorator function from earlier - when `memoize` is called, it creates and returns a function called `wrapper`: @@ -783,7 +783,7 @@ finished. This is what is known as a -[_closure_](https://www.geeksforgeeks.org/python-closures/). Closures are a +[*closure*](https://www.geeksforgeeks.org/python-closures/). Closures are a fundamental, and extremely powerful, aspect of Python and other high level languages. So there's your answer, [fishbulb](https://www.youtube.com/watch?v=CiAaEPcnlOg). @@ -834,7 +834,7 @@ function. But if a decorator function is "called" (scenarios 2 or 3), both the decorator -function (`decorator`), __and its return value__ (`wrapper`) are called - the +function (`decorator`), **and its return value** (`wrapper`) are called - the decorator function is passed the arguments that were provided, and its return value is passed the decorated function. @@ -966,7 +966,7 @@ function, so that the `wrapper` ignores the first argument. But this would mean that we couldn't use `ensureNumeric` with standalone functions. -But we _can_ manually apply the `ensureNumeric` decorator to `MiscMaths` +But we *can* manually apply the `ensureNumeric` decorator to `MiscMaths` instances when they are initialised. We can't use the nice `@ensureNumeric` syntax to apply our decorators, but this is a viable approach: @@ -988,7 +988,7 @@ print(mm.add('5', 10)) Another approach is to use a second decorator, which dynamically creates the real decorator when it is accessed on an instance. This requires the use of an advanced Python technique called -[_descriptors_](https://docs.python.org/3.5/howto/descriptor.html), which is +[*descriptors*](https://docs.python.org/3/howto/descriptor.html), which is beyond the scope of this practical. But if you are interested, you can see an implementation of this approach [here](https://git.fmrib.ox.ac.uk/fsl/fslpy/blob/1.6.8/fsl/utils/memoize.py#L249). @@ -1053,7 +1053,7 @@ documentation](http://www.sphinx-doc.org/) for our code. Fortunately, there is a workaround, available in the built-in -[`functools`](https://docs.python.org/3.5/library/functools.html#functools.wraps) +[`functools`](https://docs.python.org/3/library/functools.html#functools.wraps) module: @@ -1091,11 +1091,11 @@ print('Help: ', add2.__doc__) ## Appendix: Class decorators -> Not to be confused with [_decorator classes_](#decorator-classes)! +> Not to be confused with [*decorator classes*](#decorator-classes)! In this practical, we have shown how decorators can be applied to functions -and methods. But decorators can in fact also be applied to _classes_. This is +and methods. But decorators can in fact also be applied to *classes*. This is a fairly niche feature that you are probably not likely to need, so we will only cover it briefly. diff --git a/advanced_topics/07_threading.ipynb b/advanced_topics/07_threading.ipynb index 83167ef78d7d49d3c5106507a48bd58f9b1f38cc..e8a1a36688775ebd745dfc791d472202eda0cf42 100644 --- a/advanced_topics/07_threading.ipynb +++ b/advanced_topics/07_threading.ipynb @@ -8,20 +8,47 @@ "\n", "\n", "The Python language has built-in support for multi-threading in the\n", - "[`threading`](https://docs.python.org/3.5/library/threading.html) module, and\n", + "[`threading`](https://docs.python.org/3/library/threading.html) module, and\n", "true parallelism in the\n", - "[`multiprocessing`](https://docs.python.org/3.5/library/multiprocessing.html)\n", + "[`multiprocessing`](https://docs.python.org/3/library/multiprocessing.html)\n", "module. If you want to be impressed, skip straight to the section on\n", "[`multiprocessing`](todo).\n", "\n", "\n", - "\n", - "\n", - "\n", + "> *Note*: If you are familiar with a \"real\" programming language such as C++\n", + "> or Java, you might be disappointed with the native support for parallelism in\n", + "> Python. Python threads do not run in parallel because of the Global\n", + "> Interpreter Lock, and if you use `multiprocessing`, be prepared to either\n", + "> bear the performance hit of copying data between processes, or jump through\n", + "> hoops order to share data between processes.\n", + ">\n", + "> This limitation *might* be solved in a future Python release by way of\n", + "> [*sub-interpreters*](https://www.python.org/dev/peps/pep-0554/), but the\n", + "> author of this practical is not holding his breath.\n", + "\n", + "\n", + "* [Threading](#threading)\n", + " * [Subclassing `Thread`](#subclassing-thread)\n", + " * [Daemon threads](#daemon-threads)\n", + " * [Thread synchronisation](#thread-synchronisation)\n", + " * [`Lock`](#lock)\n", + " * [`Event`](#event)\n", + " * [The Global Interpreter Lock (GIL)](#the-global-interpreter-lock-gil)\n", + "* [Multiprocessing](#multiprocessing)\n", + " * [`threading`-equivalent API](#threading-equivalent-api)\n", + " * [Higher-level API - the `multiprocessing.Pool`](#higher-level-api-the-multiprocessing-pool)\n", + " * [`Pool.map`](#pool-map)\n", + " * [`Pool.apply_async`](#pool-apply-async)\n", + "* [Sharing data between processes](#sharing-data-between-processes)\n", + " * [Read-only sharing](#read-only-sharing)\n", + " * [Read/write sharing](#read-write-sharing)\n", + "\n", + "\n", + "<a class=\"anchor\" id=\"threading\"></a>\n", "## Threading\n", "\n", "\n", - "The [`threading`](https://docs.python.org/3.5/library/threading.html) module\n", + "The [`threading`](https://docs.python.org/3/library/threading.html) module\n", "provides a traditional multi-threading API that should be familiar to you if\n", "you have worked with threads in other languages.\n", "\n", @@ -82,6 +109,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "<a class=\"anchor\" id=\"subclassing-thread\"></a>\n", "### Subclassing `Thread`\n", "\n", "\n", @@ -116,6 +144,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "<a class=\"anchor\" id=\"daemon-threads\"></a>\n", "### Daemon threads\n", "\n", "\n", @@ -145,26 +174,28 @@ "metadata": {}, "source": [ "See the [`Thread`\n", - "documentation](https://docs.python.org/3.5/library/threading.html#thread-objects)\n", + "documentation](https://docs.python.org/3/library/threading.html#thread-objects)\n", "for more details.\n", "\n", "\n", + "<a class=\"anchor\" id=\"thread-synchronisation\"></a>\n", "### Thread synchronisation\n", "\n", "\n", "The `threading` module provides some useful thread-synchronisation primitives\n", "- the `Lock`, `RLock` (re-entrant `Lock`), and `Event` classes. The\n", "`threading` module also provides `Condition` and `Semaphore` classes - refer\n", - "to the [documentation](https://docs.python.org/3.5/library/threading.html) for\n", + "to the [documentation](https://docs.python.org/3/library/threading.html) for\n", "more details.\n", "\n", "\n", + "<a class=\"anchor\" id=\"lock\"></a>\n", "#### `Lock`\n", "\n", "\n", - "The [`Lock`](https://docs.python.org/3.5/library/threading.html#lock-objects)\n", + "The [`Lock`](https://docs.python.org/3/library/threading.html#lock-objects)\n", "class (and its re-entrant version, the\n", - "[`RLock`](https://docs.python.org/3.5/library/threading.html#rlock-objects))\n", + "[`RLock`](https://docs.python.org/3/library/threading.html#rlock-objects))\n", "prevents a block of code from being accessed by more than one thread at a\n", "time. For example, if we have multiple threads running this `task` function,\n", "their [outputs](https://www.youtube.com/watch?v=F5fUFnfPpYU) will inevitably\n", @@ -287,11 +318,12 @@ "what it does to the output.\n", "\n", "\n", + "<a class=\"anchor\" id=\"event\"></a>\n", "#### `Event`\n", "\n", "\n", "The\n", - "[`Event`](https://docs.python.org/3.5/library/threading.html#event-objects)\n", + "[`Event`](https://docs.python.org/3/library/threading.html#event-objects)\n", "class is essentially a boolean [semaphore][semaphore-wiki]. It can be used to\n", "signal events between threads. Threads can `wait` on the event, and be awoken\n", "when the event is `set` by another thread:\n", @@ -329,11 +361,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "<a class=\"anchor\" id=\"the-global-interpreter-lock-gil\"></a>\n", "### The Global Interpreter Lock (GIL)\n", "\n", "\n", - "The [_Global Interpreter\n", - "Lock_](https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock)\n", + "The [*Global Interpreter\n", + "Lock*](https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock)\n", "is an implementation detail of [CPython](https://github.com/python/cpython)\n", "(the official Python interpreter). The GIL means that a multi-threaded\n", "program written in pure Python is not able to take advantage of multiple\n", @@ -349,11 +382,12 @@ "running on another core.\n", "\n", "\n", + "<a class=\"anchor\" id=\"multiprocessing\"></a>\n", "## Multiprocessing\n", "\n", "\n", "For true parallelism, you should check out the\n", - "[`multiprocessing`](https://docs.python.org/3.5/library/multiprocessing.html)\n", + "[`multiprocessing`](https://docs.python.org/3/library/multiprocessing.html)\n", "module.\n", "\n", "\n", @@ -363,15 +397,22 @@ "the `threading` module, and a powerful higher-level API.\n", "\n", "\n", + "> Python also provides the\n", + "> [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html)\n", + "> module, which offers a simpler alternative API to `multiprocessing`. It\n", + "> offers no functionality over `multiprocessing`, so is not covered here.\n", + "\n", + "\n", + "<a class=\"anchor\" id=\"threading-equivalent-api\"></a>\n", "### `threading`-equivalent API\n", "\n", "\n", "The\n", - "[`Process`](https://docs.python.org/3.5/library/multiprocessing.html#the-process-class)\n", + "[`Process`](https://docs.python.org/3/library/multiprocessing.html#the-process-class)\n", "class is the `multiprocessing` equivalent of the\n", - "[`threading.Thread`](https://docs.python.org/3.5/library/threading.html#thread-objects)\n", + "[`threading.Thread`](https://docs.python.org/3/library/threading.html#thread-objects)\n", "class. `multprocessing` also has equivalents of the [`Lock` and `Event`\n", - "classes](https://docs.python.org/3.5/library/multiprocessing.html#synchronization-between-processes),\n", + "classes](https://docs.python.org/3/library/multiprocessing.html#synchronization-between-processes),\n", "and the other synchronisation primitives provided by `threading`.\n", "\n", "\n", @@ -380,22 +421,41 @@ "\n", "\n", "Because your \"threads\" are now independent processes, you need to be a little\n", - "careful about how to share information across them. Fortunately, the\n", - "`multiprocessing` module provides [`Queue` and `Pipe`\n", - "classes](https://docs.python.org/3.5/library/multiprocessing.html#exchanging-objects-between-processes)\n", - "which make it easy to share data across processes.\n", + "careful about how to share information across them. If you only need to share\n", + "small amounts of data, you can use the [`Queue` and `Pipe`\n", + "classes](https://docs.python.org/3/library/multiprocessing.html#exchanging-objects-between-processes),\n", + "in the `multiprocessing` module. If you are working with large amounts of data\n", + "where copying between processes is not feasible, things become more\n", + "complicated, but read on...\n", "\n", "\n", + "<a class=\"anchor\" id=\"higher-level-api-the-multiprocessing-pool\"></a>\n", "### Higher-level API - the `multiprocessing.Pool`\n", "\n", "\n", "The real advantages of `multiprocessing` lie in its higher level API, centered\n", "around the [`Pool`\n", - "class](https://docs.python.org/3.5/library/multiprocessing.html#using-a-pool-of-workers).\n", + "class](https://docs.python.org/3/library/multiprocessing.html#using-a-pool-of-workers).\n", "\n", "\n", "Essentially, you create a `Pool` of worker processes - you specify the number\n", - "of processes when you create the pool.\n", + "of processes when you create the pool. Once you have created a `Pool`, you can\n", + "use its methods to automatically parallelise tasks. The most useful are the\n", + "`map`, `starmap` and `apply_async` methods.\n", + "\n", + "\n", + "The `Pool` class is a context manager, so can be used in a `with` statement,\n", + "e.g.:\n", + "\n", + "> ```\n", + "> with mp.Pool(processes=16) as pool:\n", + "> # do stuff with the pool\n", + "> ```\n", + "\n", + "It is possible to create a `Pool` outside of a `with` statement, but in this\n", + "case you must ensure that you call its `close` mmethod when you are finished.\n", + "Using a `Pool` in a `with` statement is therefore recommended, because you know\n", + "that it will be shut down correctly, even in the event of an error.\n", "\n", "\n", "> The best number of processes to use for a `Pool` will depend on the system\n", @@ -403,18 +463,14 @@ "> I/O bound or CPU bound).\n", "\n", "\n", - "Once you have created a `Pool`, you can use its methods to automatically\n", - "parallelise tasks. The most useful are the `map`, `starmap` and\n", - "`apply_async` methods.\n", - "\n", - "\n", + "<a class=\"anchor\" id=\"pool-map\"></a>\n", "#### `Pool.map`\n", "\n", "\n", "The\n", - "[`Pool.map`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.map)\n", + "[`Pool.map`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.map)\n", "method is the multiprocessing equivalent of the built-in\n", - "[`map`](https://docs.python.org/3.5/library/functions.html#map) function - it\n", + "[`map`](https://docs.python.org/3/library/functions.html#map) function - it\n", "is given a function, and a sequence, and it applies the function to each\n", "element in the sequence." ] @@ -449,13 +505,14 @@ "\n", "imgfiles = ['{:02d}.nii.gz'.format(i) for i in range(20)]\n", "\n", - "p = mp.Pool(processes=16)\n", - "\n", "print('Crunching images...')\n", "\n", - "start = time.time()\n", - "results = p.map(crunchImage, imgfiles)\n", - "end = time.time()\n", + "start = time.time()\n", + "\n", + "with mp.Pool(processes=16) as p:\n", + " results = p.map(crunchImage, imgfiles)\n", + "\n", + "end = time.time()\n", "\n", "print('Total execution time: {:0.2f} seconds'.format(end - start))" ] @@ -467,7 +524,7 @@ "The `Pool.map` method only works with functions that accept one argument, such\n", "as our `crunchImage` function above. If you have a function which accepts\n", "multiple arguments, use the\n", - "[`Pool.starmap`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.starmap)\n", + "[`Pool.starmap`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.starmap)\n", "method instead:" ] }, @@ -495,15 +552,16 @@ " ['t2_{:02d}.nii.gz'.format(i) for i in range(10)]\n", "modalities = ['t1'] * 10 + ['t2'] * 10\n", "\n", - "pool = mp.Pool(processes=16)\n", - "\n", "args = [(f, m) for f, m in zip(imgfiles, modalities)]\n", "\n", "print('Crunching images...')\n", "\n", - "start = time.time()\n", - "results = pool.starmap(crunchImage, args)\n", - "end = time.time()\n", + "start = time.time()\n", + "\n", + "with mp.Pool(processes=16) as pool:\n", + " results = pool.starmap(crunchImage, args)\n", + "\n", + "end = time.time()\n", "\n", "print('Total execution time: {:0.2f} seconds'.format(end - start))" ] @@ -514,24 +572,25 @@ "source": [ "The `map` and `starmap` methods also have asynchronous equivalents `map_async`\n", "and `starmap_async`, which return immediately. Refer to the\n", - "[`Pool`](https://docs.python.org/3.5/library/multiprocessing.html#module-multiprocessing.pool)\n", + "[`Pool`](https://docs.python.org/3/library/multiprocessing.html#module-multiprocessing.pool)\n", "documentation for more details.\n", "\n", "\n", + "<a class=\"anchor\" id=\"pool-apply-async\"></a>\n", "#### `Pool.apply_async`\n", "\n", "\n", "The\n", - "[`Pool.apply`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.apply)\n", + "[`Pool.apply`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.apply)\n", "method will execute a function on one of the processes, and block until it has\n", "finished. The\n", - "[`Pool.apply_async`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.apply_async)\n", + "[`Pool.apply_async`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.apply_async)\n", "method returns immediately, and is thus more suited to asynchronously\n", "scheduling multiple jobs to run in parallel.\n", "\n", "\n", "`apply_async` returns an object of type\n", - "[`AsyncResult`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.AsyncResult).\n", + "[`AsyncResult`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.AsyncResult).\n", "An `AsyncResult` object has `wait` and `get` methods which will block until\n", "the job has completed." ] @@ -564,24 +623,24 @@ "t1s = ['{:02d}_t1.nii.gz'.format(i) for i in range(20)]\n", "std = 'MNI152_T1_2mm.nii.gz'\n", "\n", - "pool = mp.Pool(processes=16)\n", - "\n", "print('Running structural-to-standard registration '\n", " 'on {} subjects...'.format(len(t1s)))\n", "\n", "# Run linear registration on all the T1s.\n", - "#\n", - "# We build a list of AsyncResult objects\n", - "linresults = [pool.apply_async(linear_registration, (t1, std))\n", - " for t1 in t1s]\n", - "\n", - "# Then we wait for each job to finish,\n", - "# and replace its AsyncResult object\n", - "# with the actual result - an affine\n", - "# transformation matrix.\n", "start = time.time()\n", - "for i, r in enumerate(linresults):\n", - " linresults[i] = r.get()\n", + "with mp.Pool(processes=16) as pool:\n", + "\n", + " # We build a list of AsyncResult objects\n", + " linresults = [pool.apply_async(linear_registration, (t1, std))\n", + " for t1 in t1s]\n", + "\n", + " # Then we wait for each job to finish,\n", + " # and replace its AsyncResult object\n", + " # with the actual result - an affine\n", + " # transformation matrix.\n", + " for i, r in enumerate(linresults):\n", + " linresults[i] = r.get()\n", + "\n", "end = time.time()\n", "\n", "print('Linear registrations completed in '\n", @@ -589,14 +648,16 @@ "\n", "# Run non-linear registration on all the T1s,\n", "# using the linear registrations to initialise.\n", - "nlinresults = [pool.apply_async(nonlinear_registration, (t1, std, aff))\n", - " for (t1, aff) in zip(t1s, linresults)]\n", - "\n", - "# Wait for each non-linear reg to finish,\n", - "# and store the resulting warp field.\n", "start = time.time()\n", - "for i, r in enumerate(nlinresults):\n", - " nlinresults[i] = r.get()\n", + "with mp.Pool(processes=16) as pool:\n", + " nlinresults = [pool.apply_async(nonlinear_registration, (t1, std, aff))\n", + " for (t1, aff) in zip(t1s, linresults)]\n", + "\n", + " # Wait for each non-linear reg to finish,\n", + " # and store the resulting warp field.\n", + " for i, r in enumerate(nlinresults):\n", + " nlinresults[i] = r.get()\n", + "\n", "end = time.time()\n", "\n", "print('Non-linear registrations completed in '\n", @@ -611,7 +672,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Sharing data between processes\n", + "<a class=\"anchor\" id=\"sharing-data-between-processes\"></a>\n", + "## Sharing data between processes\n", "\n", "\n", "When you use the `Pool.map` method (or any of the other methods we have shown)\n", @@ -621,9 +683,9 @@ "\n", "\n", "Any items which you wish to pass to a function that is executed by a `Pool`\n", - "must be - the built-in\n", - "[`pickle`](https://docs.python.org/3.5/library/pickle.html) module is used by\n", - "`multiprocessing` to serialise and de-serialise the data passed into and\n", + "must be *pickleable*<sup>1</sup> - the built-in\n", + "[`pickle`](https://docs.python.org/3/library/pickle.html) module is used by\n", + "`multiprocessing` to serialise and de-serialise the data passed to and\n", "returned from a child process. The majority of standard Python types (`list`,\n", "`dict`, `str` etc), and Numpy arrays can be pickled and unpickled, so you only\n", "need to worry about this detail if you are passing objects of a custom type\n", @@ -631,36 +693,202 @@ "third-party library).\n", "\n", "\n", + "> <sup>1</sup>*Pickleable* is the term used in the Python world to refer to\n", + "> something that is *serialisable* - basically, the process of converting an\n", + "> in-memory object into a binary form that can be stored and/or transmitted.\n", + "\n", + "\n", "There is obviously some overhead in copying data back and forth between the\n", - "main process and the worker processes. For most computationally intensive\n", - "tasks, this communication overhead is not important - the performance\n", - "bottleneck is typically going to be the computation time, rather than I/O\n", - "between the parent and child processes. You may need to spend some time\n", - "adjusting the way in which you split up your data, and the number of\n", - "processes, in order to get the best performance.\n", - "\n", - "\n", - "However, if you have determined that copying data between processes is having\n", - "a substantial impact on your performance, the `multiprocessing` module\n", - "provides the [`Value`, `Array`, and `RawArray`\n", - "classes](https://docs.python.org/3.5/library/multiprocessing.html#shared-ctypes-objects),\n", + "main process and the worker processes; this may or may not be a problem. For\n", + "most computationally intensive tasks, this communication overhead is not\n", + "important - the performance bottleneck is typically going to be the\n", + "computation time, rather than I/O between the parent and child processes.\n", + "\n", + "\n", + "However, if you are working with a large dataset, you have determined that\n", + "copying data between processes is having a substantial impact on your\n", + "performance, and instead wish to *share* a single copy of the data between\n", + "the processes, you will need to:\n", + "\n", + " 1. Structure your code so that the data you want to share is accessible at\n", + " the *module level*.\n", + " 2. Define/create/load the data *before* creating the `Pool`.\n", + "\n", + "\n", + "This is because, when you create a `Pool`, what actually happens is that the\n", + "process your Pythonn script is running in will [**fork**][wiki-fork] itself -\n", + "the child processes that are created are used as the worker processes by the\n", + "`Pool`. And if you create/load your data in your main process *before* this\n", + "fork occurs, all of the child processes will inherit the memory space of the\n", + "main process, and will therefore have (read-only) access to the data, without\n", + "any copying required.\n", + "\n", + "\n", + "[wiki-fork]: https://en.wikipedia.org/wiki/Fork_(system_call)\n", + "\n", + "\n", + "<a class=\"anchor\" id=\"read-only-sharing\"></a>\n", + "### Read-only sharing\n", + "\n", + "\n", + "Let's see this in action with a simple example. We'll start by defining a\n", + "little helper function which allows us to track the total memory usage, using\n", + "the unix `free` command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# todo mac version\n", + "import subprocess as sp\n", + "def memusage(msg):\n", + " stdout = sp.run(['free', '--mega'], capture_output=True).stdout.decode()\n", + " stdout = stdout.split('\\n')[1].split()\n", + " total = stdout[1]\n", + " used = stdout[2]\n", + " print('Memory usage {}: {} / {} MB'.format(msg, used, total))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now our task is simply to calculate the sum of a large array of numbers. We're\n", + "going to create a big chunk of data, and process it in chunks, keeping track\n", + "of memory usage as the task progresses:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import multiprocessing as mp\n", + "import numpy as np\n", + "\n", + "memusage('before creating data')\n", + "\n", + "# allocate 500MB of data\n", + "data = np.random.random(500 * (1048576 // 8))\n", + "\n", + "# Assign nelems values to each worker\n", + "# process (hard-coded so we need 12\n", + "# jobs to complete the task)\n", + "nelems = len(data) // 12\n", + "\n", + "memusage('after creating data')\n", + "\n", + "# Each job process nelems values,\n", + "# starting from the specified offset\n", + "def process_chunk(offset):\n", + " time.sleep(1)\n", + " return data[offset:offset + nelems].sum()\n", + "\n", + "# Generate an offset into the data for each job -\n", + "# we will call process_chunk for each offset\n", + "offsets = range(0, len(data), nelems)\n", + "\n", + "# Create our worker process pool\n", + "with mp.Pool(4) as pool:\n", + "\n", + " results = pool.map_async(process_chunk, offsets)\n", + "\n", + " # Wait for all of the jobs to finish\n", + " elapsed = 0\n", + " while not results.ready():\n", + " memusage('after {} seconds'.format(elapsed))\n", + " time.sleep(1)\n", + " elapsed += 1\n", + "\n", + " results = results.get()\n", + "\n", + "print('Total sum: ', sum(results))\n", + "print('Sanity check:', data.sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should be able to see that only one copy of `data` is created, and is\n", + "shared by all of the worker processes without any copying taking place.\n", + "\n", + "So things are reasonably straightforward if you only need read-only acess to\n", + "your data. But what if your worker processes need to be able to modify the\n", + "data? Go back to the code block above and:\n", + "\n", + "1. Modify the `process_chunk` function so that it modifies every element of\n", + " its assigned portion of the data before calculating and returning the sum.\n", + " For example:\n", + "\n", + " > ```\n", + " > data[offset:offset + nelems] += 1\n", + " > ```\n", + "\n", + "2. Restart the Jupyter notebook kernel (*Kernel -> Restart*) - this example is\n", + " somewhat dependent on the behaviour of the Python garbage collector, so it\n", + " helps to start afresh\n", + "\n", + "\n", + "2. Re-run the two code blocks, and watch what happens to the memory usage.\n", + "\n", + "\n", + "What happened? Well, you are seeing [copy-on-write][wiki-copy-on-write] in\n", + "action. When the `process_chunk` function is invoked, it is given a reference\n", + "to the original data array in the memory space of the parent process. But as\n", + "soon as an attempt is made to modify it, a copy of the data, in the memory\n", + "space of the child process, is created. The modifications are then applied to\n", + "this child process copy, and not to the original copy. So the total memory\n", + "usage has blown out to twice as much as before, and the changes made by each\n", + "child process are being lost!\n", + "\n", + "\n", + "[wiki-copy-on-write]: https://en.wikipedia.org/wiki/Copy-on-write\n", + "\n", + "\n", + "<a class=\"anchor\" id=\"read-write-sharing\"></a>\n", + "### Read/write sharing\n", + "\n", + "\n", + "> If you have worked with a real programming language with true parallelism\n", + "> and shared memory via within-process multi-threading, feel free to take a\n", + "> break at this point. Breathe. Relax. Go punch a hole in a wall. I've been\n", + "> coding in Python for years, and this still makes me angry. Sometimes\n", + "> ... don't tell anyone I said this ... I even find myself wishing I were\n", + "> coding in *Java* instead of Python. Ugh. I need to take a shower.\n", + "\n", + "\n", + "In order to truly share memory between multiple processes, the\n", + "`multiprocessing` module provides the [`Value`, `Array`, and `RawArray`\n", + "classes](https://docs.python.org/3/library/multiprocessing.html#shared-ctypes-objects),\n", "which allow you to share individual values, or arrays of values, respectively.\n", "\n", "\n", "The `Array` and `RawArray` classes essentially wrap a typed pointer (from the\n", - "built-in [`ctypes`](https://docs.python.org/3.5/library/ctypes.html) module)\n", - "to a block of memory. We can use the `Array` or `RawArray` class to share a\n", - "Numpy array between our worker processes. The difference between an `Array`\n", - "and a `RawArray` is that the former offers synchronised (i.e. process-safe)\n", - "access to the shared memory. This is necessary if your child processes will be\n", - "modifying the same parts of your data.\n", + "built-in [`ctypes`](https://docs.python.org/3/library/ctypes.html) module) to\n", + "a block of memory. We can use the `Array` or `RawArray` class to share a Numpy\n", + "array between our worker processes. The difference between an `Array` and a\n", + "`RawArray` is that the former offers low-level synchronised\n", + "(i.e. process-safe) access to the shared memory. This is necessary if your\n", + "child processes will be modifying the same parts of your data.\n", + "\n", "\n", + "> If you need fine-grained control over synchronising access to shared data by\n", + "> multiple processes, all of the [synchronisation\n", + "> primitives](https://docs.python.org/3/library/multiprocessing.html#synchronization-between-processes)\n", + "> from the `multiprocessing` module are at your disposal.\n", "\n", - "Due to the way that shared memory works, in order to share a Numpy array\n", - "between different processes you need to structure your code so that the\n", - "array(s) you want to share are accessible at the _module level_. Furthermore,\n", - "we need to make sure that our input and output arrays are located in shared\n", - "memory - we can do this via the `Array` or `RawArray`.\n", + "\n", + "The requirements for sharing memory between processes still apply here - we\n", + "need to make our data accessible at the *module level*, and we need to create\n", + "our data before creating the `Pool`. And to achieve read and write capability,\n", + "we also need to make sure that our input and output arrays are located in\n", + "shared memory - we can do this via the `Array` or `RawArray`.\n", "\n", "\n", "As an example, let's say we want to parallelise processing of an image by\n", @@ -746,11 +974,18 @@ " # Make the input/output data\n", " # accessible to the process_chunk\n", " # function. This must be done\n", - " # *before* the worker pool is created.\n", + " # *before* the worker pool is\n", + " # created - even though we are\n", + " # doing things differently to the\n", + " # read-only example, we are still\n", + " # making the data arrays accessible\n", + " # at the *module* level, so the\n", + " # memory they are stored in can be\n", + " # shared with the child processes.\n", " process_chunk.input_data = sindata\n", " process_chunk.output_data = soutdata\n", "\n", - " # number of boxels to be computed\n", + " # number of voxels to be computed\n", " # by each worker process.\n", " nvox = int(data.size / nprocs)\n", "\n", @@ -769,12 +1004,9 @@ " # process a list of indices, which\n", " # specify the data items which that\n", " # worker process needs to compute.\n", - " xs = [xs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + \\\n", - " [xs[nvox * nprocs:]]\n", - " ys = [ys[nvox * i:nvox * i + nvox] for i in range(nprocs)] + \\\n", - " [ys[nvox * nprocs:]]\n", - " zs = [zs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + \\\n", - " [zs[nvox * nprocs:]]\n", + " xs = [xs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + [xs[nvox * nprocs:]]\n", + " ys = [ys[nvox * i:nvox * i + nvox] for i in range(nprocs)] + [ys[nvox * nprocs:]]\n", + " zs = [zs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + [zs[nvox * nprocs:]]\n", "\n", " # Build the argument lists for\n", " # each worker process.\n", @@ -782,9 +1014,8 @@ "\n", " # Create a pool of worker\n", " # processes and run the jobs.\n", - " pool = mp.Pool(processes=nprocs)\n", - "\n", - " pool.starmap(process_chunk, args)\n", + " with mp.Pool(processes=nprocs) as pool:\n", + " pool.starmap(process_chunk, args)\n", "\n", " return outdata" ] @@ -802,12 +1033,11 @@ "metadata": {}, "outputs": [], "source": [ - "data = np.array(np.arange(64).reshape((4, 4, 4)), dtype=np.float64)\n", - "\n", - "outdata = process_dataset(data)\n", + "indata = np.array(np.arange(64).reshape((4, 4, 4)), dtype=np.float64)\n", + "outdata = process_dataset(indata)\n", "\n", "print('Input')\n", - "print(data)\n", + "print(indata)\n", "\n", "print('Output')\n", "print(outdata)" diff --git a/advanced_topics/07_threading.md b/advanced_topics/07_threading.md index 460396ddffb2241a8ef666bbcd3602188cb40d0e..2c09234d29527148586a09133d28a437077dac47 100644 --- a/advanced_topics/07_threading.md +++ b/advanced_topics/07_threading.md @@ -2,20 +2,47 @@ The Python language has built-in support for multi-threading in the -[`threading`](https://docs.python.org/3.5/library/threading.html) module, and +[`threading`](https://docs.python.org/3/library/threading.html) module, and true parallelism in the -[`multiprocessing`](https://docs.python.org/3.5/library/multiprocessing.html) +[`multiprocessing`](https://docs.python.org/3/library/multiprocessing.html) module. If you want to be impressed, skip straight to the section on [`multiprocessing`](todo). - - - +> *Note*: If you are familiar with a "real" programming language such as C++ +> or Java, you might be disappointed with the native support for parallelism in +> Python. Python threads do not run in parallel because of the Global +> Interpreter Lock, and if you use `multiprocessing`, be prepared to either +> bear the performance hit of copying data between processes, or jump through +> hoops order to share data between processes. +> +> This limitation *might* be solved in a future Python release by way of +> [*sub-interpreters*](https://www.python.org/dev/peps/pep-0554/), but the +> author of this practical is not holding his breath. + + +* [Threading](#threading) + * [Subclassing `Thread`](#subclassing-thread) + * [Daemon threads](#daemon-threads) + * [Thread synchronisation](#thread-synchronisation) + * [`Lock`](#lock) + * [`Event`](#event) + * [The Global Interpreter Lock (GIL)](#the-global-interpreter-lock-gil) +* [Multiprocessing](#multiprocessing) + * [`threading`-equivalent API](#threading-equivalent-api) + * [Higher-level API - the `multiprocessing.Pool`](#higher-level-api-the-multiprocessing-pool) + * [`Pool.map`](#pool-map) + * [`Pool.apply_async`](#pool-apply-async) +* [Sharing data between processes](#sharing-data-between-processes) + * [Read-only sharing](#read-only-sharing) + * [Read/write sharing](#read-write-sharing) + + +<a class="anchor" id="threading"></a> ## Threading -The [`threading`](https://docs.python.org/3.5/library/threading.html) module +The [`threading`](https://docs.python.org/3/library/threading.html) module provides a traditional multi-threading API that should be familiar to you if you have worked with threads in other languages. @@ -60,6 +87,7 @@ print('Finished!') ``` +<a class="anchor" id="subclassing-thread"></a> ### Subclassing `Thread` @@ -86,6 +114,7 @@ print('Done') ``` +<a class="anchor" id="daemon-threads"></a> ### Daemon threads @@ -107,26 +136,28 @@ t.daemon = True See the [`Thread` -documentation](https://docs.python.org/3.5/library/threading.html#thread-objects) +documentation](https://docs.python.org/3/library/threading.html#thread-objects) for more details. +<a class="anchor" id="thread-synchronisation"></a> ### Thread synchronisation The `threading` module provides some useful thread-synchronisation primitives - the `Lock`, `RLock` (re-entrant `Lock`), and `Event` classes. The `threading` module also provides `Condition` and `Semaphore` classes - refer -to the [documentation](https://docs.python.org/3.5/library/threading.html) for +to the [documentation](https://docs.python.org/3/library/threading.html) for more details. +<a class="anchor" id="lock"></a> #### `Lock` -The [`Lock`](https://docs.python.org/3.5/library/threading.html#lock-objects) +The [`Lock`](https://docs.python.org/3/library/threading.html#lock-objects) class (and its re-entrant version, the -[`RLock`](https://docs.python.org/3.5/library/threading.html#rlock-objects)) +[`RLock`](https://docs.python.org/3/library/threading.html#rlock-objects)) prevents a block of code from being accessed by more than one thread at a time. For example, if we have multiple threads running this `task` function, their [outputs](https://www.youtube.com/watch?v=F5fUFnfPpYU) will inevitably @@ -225,11 +256,12 @@ Try removing the `mutex` lock from the two methods in the above code, and see what it does to the output. +<a class="anchor" id="event"></a> #### `Event` The -[`Event`](https://docs.python.org/3.5/library/threading.html#event-objects) +[`Event`](https://docs.python.org/3/library/threading.html#event-objects) class is essentially a boolean [semaphore][semaphore-wiki]. It can be used to signal events between threads. Threads can `wait` on the event, and be awoken when the event is `set` by another thread: @@ -258,11 +290,13 @@ processingFinished.wait() print('Processing finished!') ``` + +<a class="anchor" id="the-global-interpreter-lock-gil"></a> ### The Global Interpreter Lock (GIL) -The [_Global Interpreter -Lock_](https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock) +The [*Global Interpreter +Lock*](https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock) is an implementation detail of [CPython](https://github.com/python/cpython) (the official Python interpreter). The GIL means that a multi-threaded program written in pure Python is not able to take advantage of multiple @@ -278,11 +312,12 @@ running on one core, whilst having another thread (e.g. user interaction) running on another core. +<a class="anchor" id="multiprocessing"></a> ## Multiprocessing For true parallelism, you should check out the -[`multiprocessing`](https://docs.python.org/3.5/library/multiprocessing.html) +[`multiprocessing`](https://docs.python.org/3/library/multiprocessing.html) module. @@ -292,15 +327,22 @@ from. It provides two APIs - a "traditional" equivalent to that provided by the `threading` module, and a powerful higher-level API. +> Python also provides the +> [`concurrent.futures`](https://docs.python.org/3/library/concurrent.futures.html) +> module, which offers a simpler alternative API to `multiprocessing`. It +> offers no functionality over `multiprocessing`, so is not covered here. + + +<a class="anchor" id="threading-equivalent-api"></a> ### `threading`-equivalent API The -[`Process`](https://docs.python.org/3.5/library/multiprocessing.html#the-process-class) +[`Process`](https://docs.python.org/3/library/multiprocessing.html#the-process-class) class is the `multiprocessing` equivalent of the -[`threading.Thread`](https://docs.python.org/3.5/library/threading.html#thread-objects) +[`threading.Thread`](https://docs.python.org/3/library/threading.html#thread-objects) class. `multprocessing` also has equivalents of the [`Lock` and `Event` -classes](https://docs.python.org/3.5/library/multiprocessing.html#synchronization-between-processes), +classes](https://docs.python.org/3/library/multiprocessing.html#synchronization-between-processes), and the other synchronisation primitives provided by `threading`. @@ -309,22 +351,41 @@ and you will have true parallelism. Because your "threads" are now independent processes, you need to be a little -careful about how to share information across them. Fortunately, the -`multiprocessing` module provides [`Queue` and `Pipe` -classes](https://docs.python.org/3.5/library/multiprocessing.html#exchanging-objects-between-processes) -which make it easy to share data across processes. +careful about how to share information across them. If you only need to share +small amounts of data, you can use the [`Queue` and `Pipe` +classes](https://docs.python.org/3/library/multiprocessing.html#exchanging-objects-between-processes), +in the `multiprocessing` module. If you are working with large amounts of data +where copying between processes is not feasible, things become more +complicated, but read on... +<a class="anchor" id="higher-level-api-the-multiprocessing-pool"></a> ### Higher-level API - the `multiprocessing.Pool` The real advantages of `multiprocessing` lie in its higher level API, centered around the [`Pool` -class](https://docs.python.org/3.5/library/multiprocessing.html#using-a-pool-of-workers). +class](https://docs.python.org/3/library/multiprocessing.html#using-a-pool-of-workers). Essentially, you create a `Pool` of worker processes - you specify the number -of processes when you create the pool. +of processes when you create the pool. Once you have created a `Pool`, you can +use its methods to automatically parallelise tasks. The most useful are the +`map`, `starmap` and `apply_async` methods. + + +The `Pool` class is a context manager, so can be used in a `with` statement, +e.g.: + +> ``` +> with mp.Pool(processes=16) as pool: +> # do stuff with the pool +> ``` + +It is possible to create a `Pool` outside of a `with` statement, but in this +case you must ensure that you call its `close` mmethod when you are finished. +Using a `Pool` in a `with` statement is therefore recommended, because you know +that it will be shut down correctly, even in the event of an error. > The best number of processes to use for a `Pool` will depend on the system @@ -332,18 +393,14 @@ of processes when you create the pool. > I/O bound or CPU bound). -Once you have created a `Pool`, you can use its methods to automatically -parallelise tasks. The most useful are the `map`, `starmap` and -`apply_async` methods. - - +<a class="anchor" id="pool-map"></a> #### `Pool.map` The -[`Pool.map`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.map) +[`Pool.map`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.map) method is the multiprocessing equivalent of the built-in -[`map`](https://docs.python.org/3.5/library/functions.html#map) function - it +[`map`](https://docs.python.org/3/library/functions.html#map) function - it is given a function, and a sequence, and it applies the function to each element in the sequence. @@ -373,13 +430,14 @@ def crunchImage(imgfile): imgfiles = ['{:02d}.nii.gz'.format(i) for i in range(20)] -p = mp.Pool(processes=16) - print('Crunching images...') -start = time.time() -results = p.map(crunchImage, imgfiles) -end = time.time() +start = time.time() + +with mp.Pool(processes=16) as p: + results = p.map(crunchImage, imgfiles) + +end = time.time() print('Total execution time: {:0.2f} seconds'.format(end - start)) ``` @@ -388,7 +446,7 @@ print('Total execution time: {:0.2f} seconds'.format(end - start)) The `Pool.map` method only works with functions that accept one argument, such as our `crunchImage` function above. If you have a function which accepts multiple arguments, use the -[`Pool.starmap`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.starmap) +[`Pool.starmap`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.starmap) method instead: @@ -411,15 +469,16 @@ imgfiles = ['t1_{:02d}.nii.gz'.format(i) for i in range(10)] + \ ['t2_{:02d}.nii.gz'.format(i) for i in range(10)] modalities = ['t1'] * 10 + ['t2'] * 10 -pool = mp.Pool(processes=16) - args = [(f, m) for f, m in zip(imgfiles, modalities)] print('Crunching images...') -start = time.time() -results = pool.starmap(crunchImage, args) -end = time.time() +start = time.time() + +with mp.Pool(processes=16) as pool: + results = pool.starmap(crunchImage, args) + +end = time.time() print('Total execution time: {:0.2f} seconds'.format(end - start)) ``` @@ -427,24 +486,25 @@ print('Total execution time: {:0.2f} seconds'.format(end - start)) The `map` and `starmap` methods also have asynchronous equivalents `map_async` and `starmap_async`, which return immediately. Refer to the -[`Pool`](https://docs.python.org/3.5/library/multiprocessing.html#module-multiprocessing.pool) +[`Pool`](https://docs.python.org/3/library/multiprocessing.html#module-multiprocessing.pool) documentation for more details. +<a class="anchor" id="pool-apply-async"></a> #### `Pool.apply_async` The -[`Pool.apply`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.apply) +[`Pool.apply`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.apply) method will execute a function on one of the processes, and block until it has finished. The -[`Pool.apply_async`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.Pool.apply_async) +[`Pool.apply_async`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.apply_async) method returns immediately, and is thus more suited to asynchronously scheduling multiple jobs to run in parallel. `apply_async` returns an object of type -[`AsyncResult`](https://docs.python.org/3.5/library/multiprocessing.html#multiprocessing.pool.AsyncResult). +[`AsyncResult`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.AsyncResult). An `AsyncResult` object has `wait` and `get` methods which will block until the job has completed. @@ -472,24 +532,24 @@ def nonlinear_registration(src, ref, affine): t1s = ['{:02d}_t1.nii.gz'.format(i) for i in range(20)] std = 'MNI152_T1_2mm.nii.gz' -pool = mp.Pool(processes=16) - print('Running structural-to-standard registration ' 'on {} subjects...'.format(len(t1s))) # Run linear registration on all the T1s. -# -# We build a list of AsyncResult objects -linresults = [pool.apply_async(linear_registration, (t1, std)) - for t1 in t1s] - -# Then we wait for each job to finish, -# and replace its AsyncResult object -# with the actual result - an affine -# transformation matrix. start = time.time() -for i, r in enumerate(linresults): - linresults[i] = r.get() +with mp.Pool(processes=16) as pool: + + # We build a list of AsyncResult objects + linresults = [pool.apply_async(linear_registration, (t1, std)) + for t1 in t1s] + + # Then we wait for each job to finish, + # and replace its AsyncResult object + # with the actual result - an affine + # transformation matrix. + for i, r in enumerate(linresults): + linresults[i] = r.get() + end = time.time() print('Linear registrations completed in ' @@ -497,14 +557,16 @@ print('Linear registrations completed in ' # Run non-linear registration on all the T1s, # using the linear registrations to initialise. -nlinresults = [pool.apply_async(nonlinear_registration, (t1, std, aff)) - for (t1, aff) in zip(t1s, linresults)] - -# Wait for each non-linear reg to finish, -# and store the resulting warp field. start = time.time() -for i, r in enumerate(nlinresults): - nlinresults[i] = r.get() +with mp.Pool(processes=16) as pool: + nlinresults = [pool.apply_async(nonlinear_registration, (t1, std, aff)) + for (t1, aff) in zip(t1s, linresults)] + + # Wait for each non-linear reg to finish, + # and store the resulting warp field. + for i, r in enumerate(nlinresults): + nlinresults[i] = r.get() + end = time.time() print('Non-linear registrations completed in ' @@ -516,7 +578,8 @@ for t1, result in zip(t1s, nlinresults): ``` -### Sharing data between processes +<a class="anchor" id="sharing-data-between-processes"></a> +## Sharing data between processes When you use the `Pool.map` method (or any of the other methods we have shown) @@ -526,9 +589,9 @@ the data that they return then has to be copied back to the parent process. Any items which you wish to pass to a function that is executed by a `Pool` -must be - the built-in -[`pickle`](https://docs.python.org/3.5/library/pickle.html) module is used by -`multiprocessing` to serialise and de-serialise the data passed into and +must be *pickleable*<sup>1</sup> - the built-in +[`pickle`](https://docs.python.org/3/library/pickle.html) module is used by +`multiprocessing` to serialise and de-serialise the data passed to and returned from a child process. The majority of standard Python types (`list`, `dict`, `str` etc), and Numpy arrays can be pickled and unpickled, so you only need to worry about this detail if you are passing objects of a custom type @@ -536,36 +599,186 @@ need to worry about this detail if you are passing objects of a custom type third-party library). +> <sup>1</sup>*Pickleable* is the term used in the Python world to refer to +> something that is *serialisable* - basically, the process of converting an +> in-memory object into a binary form that can be stored and/or transmitted. + + There is obviously some overhead in copying data back and forth between the -main process and the worker processes. For most computationally intensive -tasks, this communication overhead is not important - the performance -bottleneck is typically going to be the computation time, rather than I/O -between the parent and child processes. You may need to spend some time -adjusting the way in which you split up your data, and the number of -processes, in order to get the best performance. - - -However, if you have determined that copying data between processes is having -a substantial impact on your performance, the `multiprocessing` module -provides the [`Value`, `Array`, and `RawArray` -classes](https://docs.python.org/3.5/library/multiprocessing.html#shared-ctypes-objects), +main process and the worker processes; this may or may not be a problem. For +most computationally intensive tasks, this communication overhead is not +important - the performance bottleneck is typically going to be the +computation time, rather than I/O between the parent and child processes. + + +However, if you are working with a large dataset, you have determined that +copying data between processes is having a substantial impact on your +performance, and instead wish to *share* a single copy of the data between +the processes, you will need to: + + 1. Structure your code so that the data you want to share is accessible at + the *module level*. + 2. Define/create/load the data *before* creating the `Pool`. + + +This is because, when you create a `Pool`, what actually happens is that the +process your Pythonn script is running in will [**fork**][wiki-fork] itself - +the child processes that are created are used as the worker processes by the +`Pool`. And if you create/load your data in your main process *before* this +fork occurs, all of the child processes will inherit the memory space of the +main process, and will therefore have (read-only) access to the data, without +any copying required. + + +[wiki-fork]: https://en.wikipedia.org/wiki/Fork_(system_call) + + +<a class="anchor" id="read-only-sharing"></a> +### Read-only sharing + + +Let's see this in action with a simple example. We'll start by defining a +little helper function which allows us to track the total memory usage, using +the unix `free` command: + + +``` +# todo mac version +import subprocess as sp +def memusage(msg): + stdout = sp.run(['free', '--mega'], capture_output=True).stdout.decode() + stdout = stdout.split('\n')[1].split() + total = stdout[1] + used = stdout[2] + print('Memory usage {}: {} / {} MB'.format(msg, used, total)) +``` + + +Now our task is simply to calculate the sum of a large array of numbers. We're +going to create a big chunk of data, and process it in chunks, keeping track +of memory usage as the task progresses: + + +``` +import time +import multiprocessing as mp +import numpy as np + +memusage('before creating data') + +# allocate 500MB of data +data = np.random.random(500 * (1048576 // 8)) + +# Assign nelems values to each worker +# process (hard-coded so we need 12 +# jobs to complete the task) +nelems = len(data) // 12 + +memusage('after creating data') + +# Each job process nelems values, +# starting from the specified offset +def process_chunk(offset): + time.sleep(1) + return data[offset:offset + nelems].sum() + +# Generate an offset into the data for each job - +# we will call process_chunk for each offset +offsets = range(0, len(data), nelems) + +# Create our worker process pool +with mp.Pool(4) as pool: + + results = pool.map_async(process_chunk, offsets) + + # Wait for all of the jobs to finish + elapsed = 0 + while not results.ready(): + memusage('after {} seconds'.format(elapsed)) + time.sleep(1) + elapsed += 1 + + results = results.get() + +print('Total sum: ', sum(results)) +print('Sanity check:', data.sum()) +``` + + +You should be able to see that only one copy of `data` is created, and is +shared by all of the worker processes without any copying taking place. + +So things are reasonably straightforward if you only need read-only acess to +your data. But what if your worker processes need to be able to modify the +data? Go back to the code block above and: + +1. Modify the `process_chunk` function so that it modifies every element of + its assigned portion of the data before calculating and returning the sum. + For example: + + > ``` + > data[offset:offset + nelems] += 1 + > ``` + +2. Restart the Jupyter notebook kernel (*Kernel -> Restart*) - this example is + somewhat dependent on the behaviour of the Python garbage collector, so it + helps to start afresh + + +2. Re-run the two code blocks, and watch what happens to the memory usage. + + +What happened? Well, you are seeing [copy-on-write][wiki-copy-on-write] in +action. When the `process_chunk` function is invoked, it is given a reference +to the original data array in the memory space of the parent process. But as +soon as an attempt is made to modify it, a copy of the data, in the memory +space of the child process, is created. The modifications are then applied to +this child process copy, and not to the original copy. So the total memory +usage has blown out to twice as much as before, and the changes made by each +child process are being lost! + + +[wiki-copy-on-write]: https://en.wikipedia.org/wiki/Copy-on-write + + +<a class="anchor" id="read-write-sharing"></a> +### Read/write sharing + + +> If you have worked with a real programming language with true parallelism +> and shared memory via within-process multi-threading, feel free to take a +> break at this point. Breathe. Relax. Go punch a hole in a wall. I've been +> coding in Python for years, and this still makes me angry. Sometimes +> ... don't tell anyone I said this ... I even find myself wishing I were +> coding in *Java* instead of Python. Ugh. I need to take a shower. + + +In order to truly share memory between multiple processes, the +`multiprocessing` module provides the [`Value`, `Array`, and `RawArray` +classes](https://docs.python.org/3/library/multiprocessing.html#shared-ctypes-objects), which allow you to share individual values, or arrays of values, respectively. The `Array` and `RawArray` classes essentially wrap a typed pointer (from the -built-in [`ctypes`](https://docs.python.org/3.5/library/ctypes.html) module) -to a block of memory. We can use the `Array` or `RawArray` class to share a -Numpy array between our worker processes. The difference between an `Array` -and a `RawArray` is that the former offers synchronised (i.e. process-safe) -access to the shared memory. This is necessary if your child processes will be -modifying the same parts of your data. +built-in [`ctypes`](https://docs.python.org/3/library/ctypes.html) module) to +a block of memory. We can use the `Array` or `RawArray` class to share a Numpy +array between our worker processes. The difference between an `Array` and a +`RawArray` is that the former offers low-level synchronised +(i.e. process-safe) access to the shared memory. This is necessary if your +child processes will be modifying the same parts of your data. + + +> If you need fine-grained control over synchronising access to shared data by +> multiple processes, all of the [synchronisation +> primitives](https://docs.python.org/3/library/multiprocessing.html#synchronization-between-processes) +> from the `multiprocessing` module are at your disposal. -Due to the way that shared memory works, in order to share a Numpy array -between different processes you need to structure your code so that the -array(s) you want to share are accessible at the _module level_. Furthermore, -we need to make sure that our input and output arrays are located in shared -memory - we can do this via the `Array` or `RawArray`. +The requirements for sharing memory between processes still apply here - we +need to make our data accessible at the *module level*, and we need to create +our data before creating the `Pool`. And to achieve read and write capability, +we also need to make sure that our input and output arrays are located in +shared memory - we can do this via the `Array` or `RawArray`. As an example, let's say we want to parallelise processing of an image by @@ -638,11 +851,18 @@ def process_dataset(data): # Make the input/output data # accessible to the process_chunk # function. This must be done - # *before* the worker pool is created. + # *before* the worker pool is + # created - even though we are + # doing things differently to the + # read-only example, we are still + # making the data arrays accessible + # at the *module* level, so the + # memory they are stored in can be + # shared with the child processes. process_chunk.input_data = sindata process_chunk.output_data = soutdata - # number of boxels to be computed + # number of voxels to be computed # by each worker process. nvox = int(data.size / nprocs) @@ -661,12 +881,9 @@ def process_dataset(data): # process a list of indices, which # specify the data items which that # worker process needs to compute. - xs = [xs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + \ - [xs[nvox * nprocs:]] - ys = [ys[nvox * i:nvox * i + nvox] for i in range(nprocs)] + \ - [ys[nvox * nprocs:]] - zs = [zs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + \ - [zs[nvox * nprocs:]] + xs = [xs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + [xs[nvox * nprocs:]] + ys = [ys[nvox * i:nvox * i + nvox] for i in range(nprocs)] + [ys[nvox * nprocs:]] + zs = [zs[nvox * i:nvox * i + nvox] for i in range(nprocs)] + [zs[nvox * nprocs:]] # Build the argument lists for # each worker process. @@ -674,9 +891,8 @@ def process_dataset(data): # Create a pool of worker # processes and run the jobs. - pool = mp.Pool(processes=nprocs) - - pool.starmap(process_chunk, args) + with mp.Pool(processes=nprocs) as pool: + pool.starmap(process_chunk, args) return outdata ``` @@ -686,12 +902,11 @@ Now we can call our `process_data` function just like any other function: ``` -data = np.array(np.arange(64).reshape((4, 4, 4)), dtype=np.float64) - -outdata = process_dataset(data) +indata = np.array(np.arange(64).reshape((4, 4, 4)), dtype=np.float64) +outdata = process_dataset(indata) print('Input') -print(data) +print(indata) print('Output') print(outdata)