diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d52a95da30dd45bf3875da42ae51a0d65a11fff7..66c835e87cfb1d58748b00290b3151f2ca4d53f1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,13 @@
 =====================
 
 
+0.14.0 (Friday 31st January 2025)
+---------------------------------
+
+* Adjusted the ``pyfeeds.evaluate.eval*`` routines so that they can be called
+  by external scripts.
+
+
 0.13.1 (Saturday 21st December 2024)
 ------------------------------------
 
diff --git a/doc/writing_a_test.md b/doc/writing_a_test.md
index 4d7425d64bb5e5a07f32602adf58bcd98759c9cc..d13f11ef19be30a5434aca1066a24c374ba46b1b 100644
--- a/doc/writing_a_test.md
+++ b/doc/writing_a_test.md
@@ -264,6 +264,10 @@ sp.call('flirt -in {} -ref {} -out {} -omat {}'.format(
     inimg, refimg, outimg, outxform).split())
 ```
 
+If you want to perform your own evaluation within your `feedsRun` script, you
+may find some of the routines in the `pyfeeds.evaluate` module useful. For
+example, the `evalImage` routine can be used to compare two NIfTI images.
+
 
 ## Other examples
 
diff --git a/pyfeeds/__init__.py b/pyfeeds/__init__.py
index 294e66ac205b9a433ee35c468ad686c435e89d2b..5f1fb6a2daa90cfc00842fb239e4cefd75635a2a 100644
--- a/pyfeeds/__init__.py
+++ b/pyfeeds/__init__.py
@@ -5,7 +5,7 @@
 # Author: Paul McCarthy <pauldmccarthy@gmail.com>
 #
 
-__version__ = '0.13.1'
+__version__ = '0.14.0'
 """The pyfeeds version number. """
 
 
@@ -17,9 +17,14 @@ from .common import (
 
 
 from .evaluate import (
+    evalHeader,
+    evalHeaderRestrictDims,
     evalImage,
+    evalImageMaxDiff,
     evalNumericalText,
     evalVectorImage,
+    evalPolarCoordinateImageGroup,
+    evalGiftiVertices,
     evalMD5,
     cmpArrays,
     cmpVectorArrays
diff --git a/pyfeeds/common.py b/pyfeeds/common.py
index 9711f5e3b15b98a2aa673a94e443ccea3aab17c6..33a49f7d807a8d7954cd783993e00d8b9e08f440 100644
--- a/pyfeeds/common.py
+++ b/pyfeeds/common.py
@@ -23,6 +23,7 @@ pyfeeds, and made available to test scripts.
 
 
 import            collections
+import            contextlib
 import            datetime
 import            tempfile
 import            logging
@@ -36,6 +37,23 @@ import numpy   as np
 log = logging.getLogger(__name__)
 
 
+@contextlib.contextmanager
+def tempdir():
+    """Context manager which creates and changes into a temporary directory,
+    On exit, the directory is deleted and the current directory restored.
+    """
+
+    prevdir = os.getcwd()
+
+    with tempfile.TemporaryDirectory(delete=True) as td:
+        try:
+            os.chdir(testdir)
+            yield td.name
+
+        finally:
+            os.chdir(prevdir)
+
+
 def printColumns(titles, columns):
     """Convenience function which pretty-prints a collection of columns in a
     tabular format.
diff --git a/pyfeeds/evaluate.py b/pyfeeds/evaluate.py
index 56dec2f1a86bcee73c71adef78e7a4dd2813d6dc..3f9bc3c19837ebfc79c6586d9b9d998020cc1578 100644
--- a/pyfeeds/evaluate.py
+++ b/pyfeeds/evaluate.py
@@ -13,8 +13,15 @@ identified by their name - any function with a name that begins in ``eval``,
 but not ``evaluate``, is considered to be an evaluation routine.
 
 
-All evaluation routines must accept two parameters, which are the names of two
-files to be compared.
+All evaluation routines must accept two positional arguments:
+ - Path to the file to test
+ - Path to the benchmark file to compare against
+
+And at least one keyword argument called ``pyf``, which may be ``None``. When
+an evaluation routine is called by ``pyfeeds``, this will be a
+:class:`.Pyfeeds` context object which provides access to the
+:class:`.ImageCache`. But when called by Python-based ``feedsRun`` scripts
+which evaluate their own results, this can be set to ``None``.
 
 
 Evaluation routines ending with the word ``Group`` are routines which accept
@@ -23,6 +30,7 @@ two groups of files, and evaluate the groups as a whole.
 
 All evaluation routines must return a numeric value, where 0 indicates that
 the files are identical, and non-0 indicates that the files are different.
+
 """
 
 
@@ -148,7 +156,7 @@ def getEvaluationRoutines(pyf, filename):
     for pattern, rnames in pyf.evalRoutines.items():
         if fnmatch.fnmatch(filename, pattern):
             rfuncs           = [getattr(thismod, n) for n in rnames]
-            rfuncs           = [ft.update_wrapper(ft.partial(f, pyf), f)
+            rfuncs           = [ft.update_wrapper(ft.partial(f, pyf=pyf), f)
                                 for f in rfuncs]
             routines         = list(zip(rnames, rfuncs))
             matches[pattern] = routines
@@ -391,15 +399,36 @@ def compareAllFiles(pyf, test, outputDir, benchmarkDir, logFile=None):
     return finalResult
 
 
-def evalHeader(pyf, testfile, benchmark, alldims=True):
+def loadImage(pyf, filename):
+    """Used by the ``eval*`` functions. If running within ``pyfeeds`` and the
+    ``Pyfeeds`` context object ``pyf`` is not None, loads ``filename`` via the
+    :class:`.ImageCache`. Otherwise, if ``pyf is None``, loads ``filename``
+    using ``nibabel``.
+
+    Returns a tuple containing the ``nibabel`` image, and a ``numpy``array
+    with the image data. If ``filename`` is not a NIfTI image, the second
+    returned value will be ``None``.
+    """
+    if pyf is not None:
+        return pyf.imageCache[filename]
+    else:
+        img = nib.load(filename)
+        if isinstance(image, nib.Nifti1Image):
+            data = np.asanyarray(image.dataobj)
+        else:
+            data = None
+        return img, data
+
+
+def evalHeader(testfile, benchmark, alldims=True, pyf=None):
     """Evaluation routine which compares the header fields of two NIFTI
     images.
 
     Returns 0 if they all match, 1 otherwise.
     """
 
-    img1   = pyf.imageCache[testfile][0]
-    img2   = pyf.imageCache[benchmark][0]
+    img1   = loadImage(testfile)[0]
+    img2   = loadImage(benchmark)[0]
     hdr1   = img1.header
     hdr2   = img2.header
     fields = ['dim',       'pixdim',     'intent_code',
@@ -423,7 +452,7 @@ def evalHeader(pyf, testfile, benchmark, alldims=True):
     return 0
 
 
-def evalHeaderRestrictDims(pyf, testfile, benchmark):
+def evalHeaderRestrictDims(testfile, benchmark, pyf=None):
     """Evaluation routine which compares the header fields of two NIFTI
     images. For the `dim` and `pixdim` fields, only the entries which
     are expected to be valid (e.g. `dim1`, `dim2`, and `dim3` for a 3D image)
@@ -431,33 +460,33 @@ def evalHeaderRestrictDims(pyf, testfile, benchmark):
 
     Returns 0 if they all match, 1 otherwise.
     """
-    return evalHeader(pyf, testfile, benchmark, alldims=False)
+    return evalHeader(testfile, benchmark, alldims=False, pyf=pyf)
 
 
-def evalImage(pyf, testfile, benchmark):
+def evalImage(testfile, benchmark, pyf=None):
     """Evaluation routine which compares the data from two NIFTI images.
     Returns the mean difference between the two images, normalised by
     the combined data range.
 
     The :func:`cmpArrays` function does the calculation.
     """
-    data1  = pyf.imageCache[testfile][1]
-    data2  = pyf.imageCache[benchmark][1]
+    data1 = loadImage(pyf, testfile)[1]
+    data2 = loadImage(pyf, benchmark)[1]
     return cmpArrays(data1, data2, metric='mean')
 
 
-def evalImageMaxDiff(pyf, testfile, benchmark):
+def evalImageMaxDiff(testfile, benchmark, pyf=None):
     """Evaluation routine which compares the data from two NIFTI images.
     Returns the maximum absolute difference between the two images.
 
     The :func:`cmpArrays` function does the calculation.
     """
-    data1  = pyf.imageCache[testfile][1]
-    data2  = pyf.imageCache[benchmark][1]
+    data1 = loadImage(pyf, testfile)[1]
+    data2 = loadImage(pyf, benchmark)[1]
     return cmpArrays(data1, data2, metric='max')
 
 
-def evalNumericalText(pyf, testfile, benchmark):
+def evalNumericalText(testfile, benchmark, pyf=None):
     """Evaluation routine which compares the numerical data from two text
     files.
 
@@ -470,7 +499,7 @@ def evalNumericalText(pyf, testfile, benchmark):
     return cmpArrays(data1, data2)
 
 
-def evalMD5(pyf, testfile, benchmark):
+def evalMD5(testfile, benchmark, pyf=None):
     """Compares the given files by calculating their MD5 hash. Returns
     0 if the hashes match, 1 otherwise.
     """
@@ -482,19 +511,19 @@ def evalMD5(pyf, testfile, benchmark):
     else:              return 0
 
 
-def evalVectorImage(pyf, testfile, benchmark):
+def evalVectorImage(testfile, benchmark, pyf=None):
     """Compares the given NIFTI images. It is assumed that they are
     both vector images, where each voxel contains a 3-dimensional
     vector, undirected, and centered at 0.
     """
 
-    data1 = pyf.imageCache[testfile][ 1]
-    data2 = pyf.imageCache[benchmark][1]
+    data1 = loadImage(pyf, testfile)[ 1]
+    data2 = loadImage(pyf, benchmark)[1]
 
     return cmpVectorArrays(data1, data2)
 
 
-def evalPolarCoordinateImageGroup(pyf, testfiles, benchmarks):
+def evalPolarCoordinateImageGroup(testfiles, benchmarks, pyf=None):
     """Compares the values in a set of files containing 3D polar coordinates.
 
     Currently only the ``phi`` and ``theta`` outputs of ``bedpostx`` are
@@ -531,10 +560,10 @@ def evalPolarCoordinateImageGroup(pyf, testfiles, benchmarks):
         raise ValueError('Wrong number of files: {} <-> {}'
                          ''.format(testfiles, benchmarks))
 
-    testtheta  = pyf.imageCache[testtheta[ 0]][1]
-    testphi    = pyf.imageCache[testphi[   0]][1]
-    benchtheta = pyf.imageCache[benchtheta[0]][1]
-    benchphi   = pyf.imageCache[benchphi[  0]][1]
+    testtheta  = loadImage(pyf, testtheta[ 0])[1]
+    testphi    = loadImage(pyf, testphi[   0])[1]
+    benchtheta = loadImage(pyf, benchtheta[0])[1]
+    benchphi   = loadImage(pyf, benchphi[  0])[1]
 
     if any((testphi   .shape != testtheta.shape,
             benchtheta.shape != testtheta.shape,
@@ -554,21 +583,19 @@ def evalPolarCoordinateImageGroup(pyf, testfiles, benchmarks):
     testvec  = nib.Nifti1Image(testvec,  np.eye(4), None)
     benchvec = nib.Nifti1Image(benchvec, np.eye(4), None)
 
-    name = 'evalPolarCoordinateImageGroup_' + '_'.join(testfiles)
-
-    pyf.imageCache[name + '_test']      = testvec
-    pyf.imageCache[name + '_benchmark'] = benchvec
-
-    return evalVectorImage(pyf, name + '_test', name + '_benchmark')
+    with common.tempdir():
+        nib.save(testvec,  'testvec.nii.gz')
+        nib.save(benchvec, 'benchvec.nii.gz')
+        return evalVectorImage('testvec.nii.gz', 'benchvec.nii.gz')
 
 
-def evalGiftiVertices(pyf, testfile, benchmark):
+def evalGiftiVertices(testfile, benchmark, pyf=None):
     """Compare the vertices of two GIFTI surfaces. Uses the :func:`cmpArrays`
     function.
     """
 
-    surf1 = pyf.imageCache[testfile][0]
-    surf2 = pyf.imageCache[benchmark][0]
+    surf1 = loadImage(pyf, testfile)[0]
+    surf2 = loadImage(pyf, benchmark)[0]
 
     # NIFTI_INTENT_POINTSET == 1008
     verts1 = [d for d in surf1.darrays if d.intent == 1008][0].data
diff --git a/pyfeeds/imagecache.py b/pyfeeds/imagecache.py
index a9a9a30a40f074e001d1a120d2f6c37c4e017cc5..aefca31b1b4cf479d1170892676466dd2d89aae8 100644
--- a/pyfeeds/imagecache.py
+++ b/pyfeeds/imagecache.py
@@ -72,8 +72,9 @@ class ImageCache:
     def __getitem__(self, imagefile):
         """Return the ``nibabel`` image corresponding to the given
         ``imagefile``, loading it if necessary. A tuple is returned,
-        containing the ``nibabel`` image, and a ``numpy``array with
-        the image data.
+        containing the ``nibabel`` image, and a ``numpy``array with the
+        image data. If ``filename`` is not a NIfTI image, the second
+        returned value will be ``None``.
         """
 
         imagefile = op.realpath(op.abspath(imagefile))
diff --git a/pyfeeds/tests/test_evaluate.py b/pyfeeds/tests/test_evaluate.py
index c016e787f1be52fc113e7a6d095b90031be2f691..4c9b4a0b82c1c52fcc0ef77b4439f8b29e61aa8a 100644
--- a/pyfeeds/tests/test_evaluate.py
+++ b/pyfeeds/tests/test_evaluate.py
@@ -76,9 +76,9 @@ def test_evalVectorImage():
         nib.Nifti1Image(vecarr1, np.eye(4)).to_filename(fname1)
         nib.Nifti1Image(vecarr2, np.eye(4)).to_filename(fname2)
 
-        assert evaluate.evalVectorImage(pyf, fname1, fname1) == 0
-        assert evaluate.evalVectorImage(pyf, fname2, fname2) == 0
-        assert evaluate.evalVectorImage(pyf, fname1, fname2) != 0
+        assert evaluate.evalVectorImage(fname1, fname1, pyf=pyf) == 0
+        assert evaluate.evalVectorImage(fname2, fname2, pyf=pyf) == 0
+        assert evaluate.evalVectorImage(fname1, fname2, pyf=pyf) != 0
 
 
 def test_evalImage():
@@ -95,6 +95,6 @@ def test_evalImage():
         nib.Nifti1Image(arr1, np.eye(4)).to_filename(fname1)
         nib.Nifti1Image(arr2, np.eye(4)).to_filename(fname2)
 
-        assert evaluate.evalImage(pyf, fname1, fname1) == 0
-        assert evaluate.evalImage(pyf, fname2, fname2) == 0
-        assert evaluate.evalImage(pyf, fname1, fname2) != 0
+        assert evaluate.evalImage(fname1, fname1, pyf=pyf) == 0
+        assert evaluate.evalImage(fname2, fname2, pyf=pyf) == 0
+        assert evaluate.evalImage(fname1, fname2, pyf=pyf) != 0