From dedb67a0b741322bae73e3cc5451472787916609 Mon Sep 17 00:00:00 2001
From: Paul McCarthy <pauldmccarthy@gmail.com>
Date: Wed, 30 Oct 2019 06:29:23 +0000
Subject: [PATCH] ENH: New bids metadata reader.

---
 fsl/data/bids.py  | 204 ++++++++++++++++++++++++++++++++++++++++++++++
 fsl/data/image.py |   5 +-
 2 files changed, 206 insertions(+), 3 deletions(-)
 create mode 100644 fsl/data/bids.py

diff --git a/fsl/data/bids.py b/fsl/data/bids.py
new file mode 100644
index 000000000..46595141d
--- /dev/null
+++ b/fsl/data/bids.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+#
+# bids.py - Simple BIDS metadata reader.
+#
+# Author: Paul McCarthy <pauldmccarthy@gmail.com>
+#
+"""This module provides a few functions for working with BIDS data sets.
+
+.. autosummary::
+   :nosignatures:
+
+   isBIDSDir
+   inBIDSDir
+   isBIDSFile
+   loadMetadata
+
+
+.. note::  The `pybids <https://bids-standard.github.io/pybids/>`_ library is
+           a more suitable choice if you are after a more robust and featured
+           interface for working with BIDS datasets.
+"""
+
+
+import os.path   as op
+import itertools as it
+import              re
+import              glob
+import              json
+
+import fsl.utils.memoize as memoize
+import fsl.utils.path    as fslpath
+
+
+class BIDSFile(object):
+    """The ``BIDSFile`` class parses and stores the entities and suffix contained
+    in a BIDS file. See the :func:`parseFilename` function.
+
+    The :meth:`match` method can be used to compare two ``BIDSFile`` instances.
+
+    The following attributes are available on a ``BIDSFile`` instance:
+
+     - ``filename``: Absolute path to the file
+     - ``entities``: Dict of ``key : value`` pairs, the entities that are
+       present in the file name (e.g. ``{'sub' : '01}``)
+     - ``suffix``: File suffix (e.g. ``T1w``, ``bold``, etc.)
+    """
+
+
+    def __init__(self, filename):
+        """Create a ``BIDSFile``. """
+        entities, suffix = parseFilename(filename)
+        self.filename    = op.abspath(filename)
+        self.entities    = entities
+        self.suffix      = suffix
+
+
+    def match(self, other):
+        """Compare this ``BIDSFile`` to ``other``.
+
+        :arg other: ``BIDSFile`` to compare
+        :returns:   ``True`` if ``self.suffix == other.suffix`` and if
+                    all of the entities in ``other`` are present in ``self``,
+                    ``False`` otherwise.
+        """
+
+        suffix   = self.suffix == other.suffix
+        entities = True
+
+        for key, value in other.entities.items():
+            entities = entities and self.entities.get(key, None) == value
+
+        return suffix and entities
+
+
+def parseFilename(filename):
+    """Parses a BIDS-like file name. The file name is assumed to consist of
+    zero or more "entities" (alpha-numeric ``name-value`` pairs), a "suffix",
+    all separated by underscores, and a regular file extension. For example,
+    the following file::
+
+        sub-01_ses-01_task-stim_bold.nii.gz
+
+    has suffix ``bold``, and entities ``sub=01``, ``ses=01`` and ``task=stim``.
+
+    :returns: A tuple containing:
+               - A dict containing the entities.
+               - The suffix, or ``None`` if there is no suffix.
+    """
+
+    suffix   = None
+    entities = []
+    filename = op.basename(filename)
+    filename = fslpath.removeExt(filename, ['.nii', '.nii.gz', '.json'])
+    parts    = filename.split('_')
+
+    for part in parts[:-1]:
+        entities.append(part.split('-'))
+
+    part = parts[-1].split('-')
+
+    if len(part) == 1: suffix = part[0]
+    else:              entities.append(part.split('-'))
+
+    entities = dict(entities)
+
+    return entities, suffix
+
+
+def isBIDSDir(dirname):
+    """Returns ``True`` if ``dirname`` is the root directory of a BIDS dataset.
+    """
+    return op.exists(op.join(dirname, 'dataset_description.json'))
+
+
+def inBIDSDir(filename):
+    """Returns ``True`` if ``filename`` looks like it is within a BIDS dataset
+    directory, ``False`` otherwise.
+    """
+
+    dirname = op.abspath(op.dirname(filename))
+    inBIDS  = False
+
+    while True:
+
+        if isBIDSDir(dirname):
+            inBIDS = True
+            break
+
+        prevdir = dirname
+        dirname = op.dirname(dirname)
+
+        # at filesystem root
+        if prevdir == dirname:
+            break
+
+    return inBIDS
+
+
+def isBIDSFile(filename):
+    """Returns ``True`` if ``filename`` looks like a BIDS image or JSON file.
+    """
+
+    filename  = op.basename(filename)
+    pattern   = r'([a-z0-9]+-[a-z0-9]+_)*([a-z0-9])+\.(nii|nii\.gz|json)'
+    flags     = re.ASCII | re.IGNORECASE
+
+    return inBIDSDir(filename) and re.fullmatch(pattern, filename, flags)
+
+
+@memoize.memoize
+def loadMetadataFile(filename):
+    """Load ``filename`` (assumed to be JSON), returning its contents. """
+    with open(filename, 'rt') as f:
+        return json.load(f)
+
+
+def loadMetadata(filename):
+    """Load all of the metadata associated with ``filename``.
+
+    :arg filename: Path to a data file in a BIDS dataset.
+    :returns:      A dict containing all of the metadata associated with
+                   ``filename``
+    """
+
+    filename  = op.realpath(op.abspath(filename))
+    bfile     = BIDSFile(filename)
+    dirname   = op.dirname(filename)
+    prevdir   = filename
+    metafiles = []
+    metadata  = {}
+
+    # Walk up the directory tree until
+    # we hit the BIDS dataset root, or
+    # the filesystem root
+    while True:
+
+        # Gather all json files in this
+        # directory with matching entities
+        # and suffix, sorted alphabetically
+        files = sorted(glob.glob(op.join(dirname, '*.json')))
+        files = [BIDSFile(f) for f in files if isBIDSFile(f)]
+        files = [f.filename  for f in files if bfile.match(f)]
+
+        # build a list of all files
+        metafiles.append(files)
+
+        # move to the next dir up
+        prevdir = dirname
+        dirname = op.dirname(dirname)
+
+        # stop when we hit the dataset or filesystem root
+        if isBIDSDir(prevdir) or dirname == prevdir:
+            break
+
+    # Load in each json file, from
+    # shallowest to deepest, so entries
+    # in deeper files take precedence
+    # over shallower ones.
+    for f in it.chain(*reversed(metafiles)):
+
+        # assuming here that every file contains a dict
+        metadata.update(loadMetadataFile(f))
+
+    return metadata
diff --git a/fsl/data/image.py b/fsl/data/image.py
index 188167b9a..a31ab3173 100644
--- a/fsl/data/image.py
+++ b/fsl/data/image.py
@@ -61,8 +61,8 @@ log = logging.getLogger(__name__)
 
 ALLOWED_EXTENSIONS = ['.nii.gz', '.nii', '.img', '.hdr', '.img.gz', '.hdr.gz']
 """The file extensions which we understand. This list is used as the default
-if the ``allowedExts`` parameter is not passed to any of the functions
-below.
+if the ``allowedExts`` parameter is not passed to any of the ``*Ext``
+functions, or the :func:`looksLikeImage` function.
 """
 
 
@@ -246,7 +246,6 @@ class Nifti(notifier.Notifier, meta.Meta):
         if not isinstance(header, nib.analyze.AnalyzeHeader):
             raise ValueError('Unrecognised header: {}'.format(header))
 
-        header                   = header
         origShape, shape, pixdim = Nifti.determineShape(header)
         voxToWorldMat            = Nifti.determineAffine(header)
         affines, isneuro         = Nifti.generateAffines(voxToWorldMat,
-- 
GitLab