From c6e962b843c0be41930944f88a46fec1e1847b51 Mon Sep 17 00:00:00 2001
From: Paul McCarthy <pauldmccarthy@gmail.com>
Date: Fri, 6 Mar 2020 11:09:16 +0000
Subject: [PATCH] script prac

---
 getting_started/08_scripts.ipynb | 91 +++++++++++++++++++++-----------
 getting_started/08_scripts.md    | 35 ++++++++----
 2 files changed, 83 insertions(+), 43 deletions(-)

diff --git a/getting_started/08_scripts.ipynb b/getting_started/08_scripts.ipynb
index 16faa7e..2fa5d9d 100644
--- a/getting_started/08_scripts.ipynb
+++ b/getting_started/08_scripts.ipynb
@@ -6,10 +6,19 @@
    "source": [
     "# Callable scripts in python\n",
     "\n",
-    "In this tutorial we will cover how to write simple stand-alone scripts in python that can be used as alternatives to bash scripts.\n",
+    "In this tutorial we will cover how to write simple stand-alone scripts in\n",
+    "python that can be used as alternatives to bash scripts.\n",
     "\n",
-    "There are some code blocks within this webpage, but for this practical we _**strongly\n",
-    "recommend that you write the code in an IDE or editor**_ instead and then run the scripts from a terminal.\n",
+    "**Important**: Throughout this series of practicals we have been working\n",
+    "entirely within the Jupyter notebook environment. But it's now time to\n",
+    "graduate to writing *real* Python  scripts, and running them within a\n",
+    "*real* enviromnent.\n",
+    "\n",
+    "So within this practical there are some code blocks, but instead of running\n",
+    "them inside the notebook we **strongly recommend that you write the code in\n",
+    "an IDE or editor**,and then run the scripts from a terminal.  [Don't\n",
+    "panic](https://www.youtube.com/watch?v=KojYatpLPSE), we're right here,\n",
+    "ready to help.\n",
     "\n",
     "## Contents\n",
     "\n",
@@ -73,6 +82,7 @@
    "outputs": [],
    "source": [
     "import subprocess as sp\n",
+    "import shlex\n",
     "sp.run(['ls', '-la'])"
    ]
   },
@@ -80,6 +90,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "> Passing the arguments as a list is good practice and improves the safety of\n",
+    "> the call.\n",
+    "\n",
     "To suppress the output do this:"
    ]
   },
@@ -105,7 +118,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "spobj = sp.run('ls -la'.split(), stdout = sp.PIPE)\n",
+    "spobj = sp.run(shlex.split('ls -la'), stdout = sp.PIPE)\n",
     "sout = spobj.stdout.decode('utf-8')\n",
     "print(sout)"
    ]
@@ -114,7 +127,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "> Note that the `decode` call in the middle line converts the string from a byte string to a normal string. In Python 3 there is a distinction between strings (sequences of characters, possibly using multiple bytes to store each character) and bytes (sequences of bytes). The world has moved on from ASCII, so in this day and age, this distinction is absolutely necessary, and Python does a fairly good job of it.\n",
+    "> shlex.split and shlex.quote are functions designed to break up and quote\n",
+    "> (respectively) shell command lines and arguments. Quoting of user provided\n",
+    "> arguments helps to prevent unintended consequences from inappropriate inputs.\n",
+    ">\n",
+    "> Note that the `decode` call in the middle line converts the string from a byte\n",
+    "> string to a normal string. In Python 3 there is a distinction between strings\n",
+    "> (sequences of characters, possibly using multiple bytes to store each\n",
+    "> character) and bytes (sequences of bytes). The world has moved on from ASCII,\n",
+    "> so in this day and age, this distinction is absolutely necessary, and Python\n",
+    "> does a fairly good job of it.\n",
     "\n",
     "If the output is numerical then this can be extracted like this:"
    ]
@@ -129,8 +151,9 @@
     "fsldir = os.getenv('FSLDIR')\n",
     "spobj = sp.run([fsldir+'/bin/fslstats', fsldir+'/data/standard/MNI152_T1_1mm_brain', '-V'], stdout = sp.PIPE)\n",
     "sout = spobj.stdout.decode('utf-8')\n",
-    "vol_vox = float(sout.split()[0])\n",
-    "vol_mm = float(sout.split()[1])\n",
+    "results = sout.split()\n",
+    "vol_vox = float(results[0])\n",
+    "vol_mm = float(results[1])\n",
     "print('Volumes are: ', vol_vox, ' in voxels and ', vol_mm, ' in mm')"
    ]
   },
@@ -147,6 +170,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import shlex\n",
     "commands = \"\"\"\n",
     "{fsldir}/bin/fslmaths {t1} -bin {t1_mask}\n",
     "{fsldir}/bin/fslmaths {t2} -mas {t1_mask} {t2_masked}\n",
@@ -156,10 +180,10 @@
     "commands = commands.format(t1 = 't1.nii.gz', t1_mask = 't1_mask', t2 = 't2', t2_masked = 't2_masked', fsldir = fsldirpath)\n",
     "\n",
     "sout=[]\n",
-    "for cmd in commands.split('\\n'):\n",
+    "for cmd in commands.splitlines():\n",
     "    if cmd:   # avoids empty strings getting passed to sp.run()\n",
     "        print('Running command: ', cmd)\n",
-    "        spobj = sp.run(cmd.split(), stdout = sp.PIPE)\n",
+    "        spobj = sp.run(shlex.split(cmd), stdout = sp.PIPE)\n",
     "        sout.append(spobj.stdout.decode('utf-8'))"
    ]
   },
@@ -167,6 +191,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "> Don't be tempted to use the shell=True argument to subprocess.run, especially\n",
+    "> if you are dealing with user input - if the user gave\n",
+    "> *myfile; rm -f ~*\n",
+    "> as a file name and you called the command with shell=True **and** you\n",
+    "> passed the command in as a string then bad things happen!\n",
+    ">\n",
+    "> The safe way to use these kinds of inputs is to pass them through shlex.quote()\n",
+    "> before sending.\n",
+    ">\n",
+    "> ```a = shlex.quote('myfile; rm -f ~')\n",
+    "> cmd = \"ls {}\".format(a)\n",
+    "> sp.run(shlex.split(cmd))```\n",
+    "\n",
+    "\n",
+    "> If you're calling lots of FSL tools, the `fslpy` library has a number of\n",
+    "> *wrapper* functions, which can be used to call an FSL command directly\n",
+    "> from Python - check out `advanced_topics/08_fslpy.ipynb`.\n",
+    "\n",
     "<a class=\"anchor\" id=\"command-line-arguments\"></a>\n",
     "## Command line arguments\n",
     "\n",
@@ -190,6 +232,8 @@
    "source": [
     "For more sophisticated argument parsing you can use `argparse` -  good documentation and examples of this can be found on the web.\n",
     "\n",
+    "> argparse can automatically produce help text for the user, validate input etc., so it is strongly recommended.\n",
+    "\n",
     "---\n",
     "\n",
     "<a class=\"anchor\" id=\"example-script\"></a>\n",
@@ -213,7 +257,7 @@
     "outfile=$2\n",
     "# mask input image with MNI\n",
     "$FSLDIR/bin/fslmaths $infile -mas $FSLDIR/data/standard/MNI152_T1_1mm_brain $outfile\n",
-    "# calculate volumes of masked image  \n",
+    "# calculate volumes of masked image\n",
     "vv=`$FSLDIR/bin/fslstats $outfile -V`\n",
     "vol_vox=`echo $vv | awk '{ print $1 }'`\n",
     "vol_mm=`echo $vv | awk '{ print $2 }'`\n",
@@ -244,11 +288,12 @@
     "outfile = sys.argv[2]\n",
     "# mask input image with MNI\n",
     "spobj = sp.run([fsldir+'/bin/fslmaths', infile, '-mas', fsldir+'/data/standard/MNI152_T1_1mm_brain', outfile], stdout = sp.PIPE)\n",
-    "# calculate volumes of masked image  \n",
+    "# calculate volumes of masked image\n",
     "spobj = sp.run([fsldir+'/bin/fslstats', outfile, '-V'], stdout = sp.PIPE)\n",
     "sout = spobj.stdout.decode('utf-8')\n",
-    "vol_vox = float(sout.split()[0])\n",
-    "vol_mm = float(sout.split()[1])\n",
+    "results = sout.split()\n",
+    "vol_vox = float(results[0])\n",
+    "vol_mm = float(results[1])\n",
     "print('Volumes are: ', vol_vox, ' in voxels and ', vol_mm, ' in mm')"
    ]
   },
@@ -275,25 +320,7 @@
    ]
   }
  ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.2"
-  }
- },
+ "metadata": {},
  "nbformat": 4,
  "nbformat_minor": 2
 }
diff --git a/getting_started/08_scripts.md b/getting_started/08_scripts.md
index ec759f3..ab06a61 100644
--- a/getting_started/08_scripts.md
+++ b/getting_started/08_scripts.md
@@ -1,9 +1,18 @@
 # Callable scripts in python
 
-In this tutorial we will cover how to write simple stand-alone scripts in python that can be used as alternatives to bash scripts.
+In this tutorial we will cover how to write simple stand-alone scripts in
+python that can be used as alternatives to bash scripts.
 
-There are some code blocks within this webpage, but for this practical we _**strongly
-recommend that you write the code in an IDE or editor**_ instead and then run the scripts from a terminal.
+**Important**: Throughout this series of practicals we have been working
+entirely within the Jupyter notebook environment. But it's now time to
+graduate to writing *real* Python  scripts, and running them within a
+*real* enviromnent.
+
+So within this practical there are some code blocks, but instead of running
+them inside the notebook we **strongly recommend that you write the code in
+an IDE or editor**,and then run the scripts from a terminal.  [Don't
+panic](https://www.youtube.com/watch?v=KojYatpLPSE), we're right here,
+ready to help.
 
 ## Contents
 
@@ -63,10 +72,10 @@ print(sout)
 > arguments helps to prevent unintended consequences from inappropriate inputs.
 >
 > Note that the `decode` call in the middle line converts the string from a byte
-> string to a normal string. In Python 3 there is a distinction between strings 
-> (sequences of characters, possibly using multiple bytes to store each 
-> character) and bytes (sequences of bytes). The world has moved on from ASCII, 
-> so in this day and age, this distinction is absolutely necessary, and Python 
+> string to a normal string. In Python 3 there is a distinction between strings
+> (sequences of characters, possibly using multiple bytes to store each
+> character) and bytes (sequences of bytes). The world has moved on from ASCII,
+> so in this day and age, this distinction is absolutely necessary, and Python
 > does a fairly good job of it.
 
 If the output is numerical then this can be extracted like this:
@@ -100,7 +109,7 @@ for cmd in commands.splitlines():
         sout.append(spobj.stdout.decode('utf-8'))
 ```
 
-> Don't be tempted to use the shell=True argument to subprocess.run, especially 
+> Don't be tempted to use the shell=True argument to subprocess.run, especially
 > if you are dealing with user input - if the user gave
 > *myfile; rm -f ~*
 > as a file name and you called the command with shell=True **and** you
@@ -113,6 +122,11 @@ for cmd in commands.splitlines():
 > cmd = "ls {}".format(a)
 > sp.run(shlex.split(cmd))```
 
+
+> If you're calling lots of FSL tools, the `fslpy` library has a number of
+> *wrapper* functions, which can be used to call an FSL command directly
+> from Python - check out `advanced_topics/08_fslpy.ipynb`.
+
 <a class="anchor" id="command-line-arguments"></a>
 ## Command line arguments
 
@@ -144,7 +158,7 @@ infile=$1
 outfile=$2
 # mask input image with MNI
 $FSLDIR/bin/fslmaths $infile -mas $FSLDIR/data/standard/MNI152_T1_1mm_brain $outfile
-# calculate volumes of masked image  
+# calculate volumes of masked image
 vv=`$FSLDIR/bin/fslstats $outfile -V`
 vol_vox=`echo $vv | awk '{ print $1 }'`
 vol_mm=`echo $vv | awk '{ print $2 }'`
@@ -166,7 +180,7 @@ infile = sys.argv[1]
 outfile = sys.argv[2]
 # mask input image with MNI
 spobj = sp.run([fsldir+'/bin/fslmaths', infile, '-mas', fsldir+'/data/standard/MNI152_T1_1mm_brain', outfile], stdout = sp.PIPE)
-# calculate volumes of masked image  
+# calculate volumes of masked image
 spobj = sp.run([fsldir+'/bin/fslstats', outfile, '-V'], stdout = sp.PIPE)
 sout = spobj.stdout.decode('utf-8')
 results = sout.split()
@@ -186,4 +200,3 @@ mean or a _sum_ (and hence can do something that fslstats cannot!)
 ```
 # Don't write anything here - do it in a standalone script!
 ```
-
-- 
GitLab