diff --git a/CHANGELOG.rst b/CHANGELOG.rst index be2e3f0f6e4ba72357d2fec32f715c2cb9a58b63..b2a848897d812547adb0c8a97c735a880fbb7140 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,13 +15,18 @@ Added tasks passed to :func:`.idle.idle` to be executed synchronously. * New :meth:`.IdleLoop.synchronous` context manager, to temporarily change the value of :meth:`.IdleLoop.neverQueue`. +* New :mod:`.bids` module, containing a few simple functions for working with + `BIDS <https://bids.neuroimaging.io>`_ datasets. +* New :func:`.image.loadMetadata` function, and ``loadMeta`` option to the + :class:`.Image` class, to automatically find and load any sidecar JSON files + associated with an image file. Changed ^^^^^^^ -* Internal reorganisation inm the :mod:`.idle` module. +* Internal reorganisation in the :mod:`.idle` module. Deprecated diff --git a/doc/fsl.utils.bids.rst b/doc/fsl.utils.bids.rst new file mode 100644 index 0000000000000000000000000000000000000000..4fa875b6cd71eaa72ad6ef8c2455472a162281d4 --- /dev/null +++ b/doc/fsl.utils.bids.rst @@ -0,0 +1,7 @@ +``fsl.utils.bids`` +================== + +.. automodule:: fsl.utils.bids + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/fsl.utils.rst b/doc/fsl.utils.rst index f253bac7e11914f3d5ccebcd152a576cd326ce66..d4d921736d3427664f1a6d514e7969bd458443c8 100644 --- a/doc/fsl.utils.rst +++ b/doc/fsl.utils.rst @@ -5,6 +5,7 @@ :hidden: fsl.utils.assertions + fsl.utils.bids fsl.utils.cache fsl.utils.deprecated fsl.utils.ensure diff --git a/fsl/data/image.py b/fsl/data/image.py index 188167b9ae417e7e8ba75a59d9f4bc158cf34cac..4cde726519f32c255c17a37dfc27f9452f4f67da 100644 --- a/fsl/data/image.py +++ b/fsl/data/image.py @@ -35,6 +35,7 @@ and file names: import os import os.path as op import itertools as it +import json import string import logging import tempfile @@ -52,6 +53,7 @@ import fsl.utils.notifier as notifier import fsl.utils.memoize as memoize import fsl.utils.path as fslpath import fsl.utils.deprecated as deprecated +import fsl.utils.bids as fslbids import fsl.data.constants as constants import fsl.data.imagewrapper as imagewrapper @@ -61,8 +63,8 @@ log = logging.getLogger(__name__) ALLOWED_EXTENSIONS = ['.nii.gz', '.nii', '.img', '.hdr', '.img.gz', '.hdr.gz'] """The file extensions which we understand. This list is used as the default -if the ``allowedExts`` parameter is not passed to any of the functions -below. +if the ``allowedExts`` parameter is not passed to any of the ``*Ext`` +functions, or the :func:`looksLikeImage` function. """ @@ -246,7 +248,6 @@ class Nifti(notifier.Notifier, meta.Meta): if not isinstance(header, nib.analyze.AnalyzeHeader): raise ValueError('Unrecognised header: {}'.format(header)) - header = header origShape, shape, pixdim = Nifti.determineShape(header) voxToWorldMat = Nifti.determineAffine(header) affines, isneuro = Nifti.generateAffines(voxToWorldMat, @@ -905,6 +906,7 @@ class Image(Nifti): indexed=False, threaded=False, dataSource=None, + loadMeta=False, **kwargs): """Create an ``Image`` object with the given image data or file name. @@ -954,6 +956,12 @@ class Image(Nifti): used to specify the file from which the image was loaded. + :arg loadMeta: If ``True``, any metadata contained in JSON sidecar + files is loaded and attached to this ``Image`` via + the :class:`.Meta` interface. if ``False``, metadata + can be loaded at a later stage via the + :func:`loadMeta` function. Defaults to ``False``. + All other arguments are passed through to the ``nibabel.load`` function (if it is called). """ @@ -1078,6 +1086,9 @@ class Image(Nifti): if calcRange: self.calcRange() + if self.dataSource is not None and loadMeta: + self.updateMeta(loadMetadata(self)) + self.__imageWrapper.register(self.__lName, self.__dataRangeChanged) @@ -1419,6 +1430,36 @@ def canonicalShape(shape): return shape +def loadMetadata(image): + """Searches for and loads any sidecar JSON files associated with the given + :class:`.Image`. + + If the image looks to be part of a BIDS data set, + :func:`.bids.loadMetadata` is used. Otherwise, if a JSON file with the same + file prefix is present alongside the image, it is directly loaded. + + :arg image: :class:`.Image` instance + :returns: Dict containing any metadata that was loaded. + """ + + if image.dataSource is None: + return {} + + filename = image.dataSource + basename = op.basename(removeExt(filename)) + dirname = op.dirname(filename) + + if fslbids.inBIDSDir(image.dataSource): + return fslbids.loadMetadata(image.dataSource) + + jsonfile = op.join(dirname, '{}.json'.format(basename)) + if op.exists(jsonfile): + with open(jsonfile, 'rt') as f: + return json.load(f) + + return {} + + def looksLikeImage(filename, allowedExts=None): """Returns ``True`` if the given file looks like a NIFTI image, ``False`` otherwise. diff --git a/fsl/utils/bids.py b/fsl/utils/bids.py new file mode 100644 index 0000000000000000000000000000000000000000..17f428f5c8a8927a542187658af8a17c49a57960 --- /dev/null +++ b/fsl/utils/bids.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# +# bids.py - Simple BIDS metadata reader. +# +# Author: Paul McCarthy <pauldmccarthy@gmail.com> +# +"""This module provides a few functions for working with BIDS data sets. + +.. autosummary:: + :nosignatures: + + isBIDSDir + inBIDSDir + isBIDSFile + loadMetadata + +All of the other functions in this module should not be considered part of the +public API. + + +.. note:: The `pybids <https://bids-standard.github.io/pybids/>`_ library is + a more suitable choice if you are after a more robust and featured + interface for working with BIDS datasets. +""" + + +import os.path as op +import itertools as it +import re +import glob +import json + +import fsl.utils.memoize as memoize +import fsl.utils.path as fslpath + + +class BIDSFile(object): + """The ``BIDSFile`` class parses and stores the entities and suffix contained + in a BIDS file. See the :func:`parseFilename` function. + + The :meth:`match` method can be used to compare two ``BIDSFile`` instances. + + The following attributes are available on a ``BIDSFile`` instance: + + - ``filename``: Absolute path to the file + - ``entities``: Dict of ``key : value`` pairs, the entities that are + present in the file name (e.g. ``{'sub' : '01}``) + - ``suffix``: File suffix (e.g. ``T1w``, ``bold``, etc.) + """ + + + def __init__(self, filename): + """Create a ``BIDSFile``. """ + entities, suffix = parseFilename(filename) + self.filename = op.abspath(filename) + self.entities = entities + self.suffix = suffix + + + def match(self, other): + """Compare this ``BIDSFile`` to ``other``. + + :arg other: ``BIDSFile`` to compare + :returns: ``True`` if ``self.suffix == other.suffix`` and if + all of the entities in ``other`` are present in ``self``, + ``False`` otherwise. + """ + + suffix = self.suffix == other.suffix + entities = True + + for key, value in other.entities.items(): + entities = entities and self.entities.get(key, None) == value + + return suffix and entities + + +def parseFilename(filename): + """Parses a BIDS-like file name. The file name must consist of zero or more + "entities" (alpha-numeric ``name-value`` pairs), a "suffix", all separated + by underscores, and a regular file extension. For example, the following + file:: + + sub-01_ses-01_task-stim_bold.nii.gz + + has suffix ``bold``, and entities ``sub=01``, ``ses=01`` and ``task=stim``. + + :returns: A tuple containing: + - A dict containing the entities + - The suffix + """ + + if not isBIDSFile(filename, strict=False): + raise ValueError('Does not look like a BIDS ' + 'file: {}'.format(filename)) + + suffix = None + entities = [] + filename = op.basename(filename) + filename = fslpath.removeExt(filename, ['.nii', '.nii.gz', '.json']) + parts = filename.split('_') + + for part in parts[:-1]: + entities.append(part.split('-')) + + suffix = parts[-1] + entities = dict(entities) + + return entities, suffix + + +def isBIDSDir(dirname): + """Returns ``True`` if ``dirname`` is the root directory of a BIDS dataset. + """ + return op.exists(op.join(dirname, 'dataset_description.json')) + + +def inBIDSDir(filename): + """Returns ``True`` if ``filename`` looks like it is within a BIDS dataset + directory, ``False`` otherwise. + """ + + dirname = op.abspath(op.dirname(filename)) + inBIDS = False + + while True: + + if isBIDSDir(dirname): + inBIDS = True + break + + prevdir = dirname + dirname = op.dirname(dirname) + + # at filesystem root + if prevdir == dirname: + break + + return inBIDS + + +def isBIDSFile(filename, strict=True): + """Returns ``True`` if ``filename`` looks like a BIDS image or JSON file. + + :arg filename: Name of file to check + :arg strict: If ``True`` (the default), the file must be within a BIDS + dataset directory, as defined by :func:`inBIDSDir`. + """ + + name = op.basename(filename) + pattern = r'([a-z0-9]+-[a-z0-9]+_)*([a-z0-9])+\.(nii|nii\.gz|json)' + flags = re.ASCII | re.IGNORECASE + match = re.fullmatch(pattern, name, flags) + + return ((not strict) or inBIDSDir(filename)) and match + + +@memoize.memoize +def loadMetadataFile(filename): + """Load ``filename`` (assumed to be JSON), returning its contents. """ + with open(filename, 'rt') as f: + return json.load(f) + + +def loadMetadata(filename): + """Load all of the metadata associated with ``filename``. + + :arg filename: Path to a data file in a BIDS dataset. + :returns: A dict containing all of the metadata associated with + ``filename`` + """ + + filename = op.realpath(op.abspath(filename)) + bfile = BIDSFile(filename) + dirname = op.dirname(filename) + prevdir = filename + metafiles = [] + metadata = {} + + # Walk up the directory tree until + # we hit the BIDS dataset root, or + # the filesystem root + while True: + + # Gather all json files in this + # directory with matching entities + # and suffix, sorted alphabetically + # and reversed, so that earlier + # ones take precedence + files = reversed(sorted(glob.glob(op.join(dirname, '*.json')))) + files = [BIDSFile(f) for f in files if isBIDSFile(f)] + files = [f.filename for f in files if bfile.match(f)] + + # build a list of all files + metafiles.append(files) + + # move to the next dir up + prevdir = dirname + dirname = op.dirname(dirname) + + # stop when we hit the dataset or filesystem root + if isBIDSDir(prevdir) or dirname == prevdir: + break + + # Load in each json file, from + # shallowest to deepest, so entries + # in deeper files take precedence + # over shallower ones. + for f in it.chain(*reversed(metafiles)): + + # assuming here that every file contains a dict + metadata.update(loadMetadataFile(f)) + + return metadata diff --git a/fsl/utils/meta.py b/fsl/utils/meta.py index 3e65d398ec078817242dd5c8ee205db7b7e957e0..30d18a567021f014f9ae41fcbcb381f43b47155d 100644 --- a/fsl/utils/meta.py +++ b/fsl/utils/meta.py @@ -25,6 +25,7 @@ class Meta(object): metaItems getMeta setMeta + updateMeta """ def __new__(cls, *args, **kwargs): @@ -67,3 +68,8 @@ class Meta(object): """Add some metadata with the specified key (``dict.__setitem__``). """ self.__meta.__setitem__(*args, **kwargs) + + + def updateMeta(self, *args, **kwargs): + """Update the metadata dict (``dict.update``). """ + self.__meta.update(*args, **kwargs) diff --git a/tests/test_bids.py b/tests/test_bids.py new file mode 100644 index 0000000000000000000000000000000000000000..58bb08f516554d1c58d85991a75ee4d9c38f7bb9 --- /dev/null +++ b/tests/test_bids.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# +# test_bids.py - +# +# Author: Paul McCarthy <pauldmccarthy@gmail.com> +# + + +import json +import os.path as op +import itertools as it +from pathlib import Path + +import pytest + +from fsl.utils.tempdir import tempdir +import fsl.utils.bids as fslbids + + +def test_parseFilename(): + with pytest.raises(ValueError): + fslbids.parseFilename('bad.txt') + + tests = [ + ('sub-01_ses-01_t1w.nii.gz', + ({'sub' : '01', 'ses' : '01'}, 't1w')), + ('a-1_b-2_c-3_d-4_e.json', + ({'a' : '1', 'b' : '2', 'c' : '3', 'd' : '4'}, 'e')), + ] + + for filename, expect in tests: + assert fslbids.parseFilename(filename) == expect + + +def test_isBIDSDir(): + with tempdir(): + assert not fslbids.isBIDSDir('.') + with tempdir(): + Path('dataset_description.json').touch() + assert fslbids.isBIDSDir('.') + + +def test_inBIDSDir(): + with tempdir(): + Path('a/b/c').mkdir(parents=True) + Path('dataset_description.json').touch() + assert fslbids.inBIDSDir(Path('.')) + assert fslbids.inBIDSDir(Path('a')) + assert fslbids.inBIDSDir(Path('a/b')) + assert fslbids.inBIDSDir(Path('a/b/c')) + with tempdir(): + Path('a/b/c').mkdir(parents=True) + assert not fslbids.inBIDSDir(Path('.')) + assert not fslbids.inBIDSDir(Path('a')) + assert not fslbids.inBIDSDir(Path('a/b')) + assert not fslbids.inBIDSDir(Path('a/b/c')) + + +def test_isBIDSFile(): + goodfiles = [ + Path('sub-01_ses-01_t1w.nii.gz'), + Path('sub-01_ses-01_t1w.nii'), + Path('sub-01_ses-01_t1w.json'), + Path('a-1_b-2_c-3_d-4_e.nii.gz'), + ] + badfiles = [ + Path('sub-01_ses-01.nii.gz'), + Path('sub-01_ses-01_t1w'), + Path('sub-01_ses-01_t1w.'), + Path('sub-01_ses-01_t1w.txt'), + Path('sub_ses-01_t1w.nii.gz'), + Path('sub-01_ses_t1w.nii.gz'), + ] + with tempdir(): + Path('dataset_description.json').touch() + for f in goodfiles: assert fslbids.isBIDSFile(f) + for f in badfiles: assert not fslbids.isBIDSFile(f) + with tempdir(): + for f in it.chain(goodfiles, badfiles): + assert not fslbids.isBIDSFile(f) + + +def test_loadMetadata(): + dd = Path('dataset_description.json') + t1 = Path('sub-01/func/sub-01_task-stim_bold.nii.gz') + json1 = Path('sub-01/func/sub-01_task-stim_bold.json') + json2 = Path('sub-01/sub-01_bold.json') + json3 = Path('sub-01_t1w.json') + json4 = Path('sub-01/task-stim_bold.json') + meta1 = {'a' : '1', 'b' : '2'} + meta2 = {'a' : '10', 'c' : '3'} + meta3 = {'a' : '109', 'b' : '99'} + meta4 = {'c' : '9', 'd' : '5'} + + with tempdir(): + dd.touch() + Path(op.dirname(t1)).mkdir(parents=True) + t1.touch() + assert fslbids.loadMetadata(t1) == {} + json1.write_text(json.dumps(meta1)) + assert fslbids.loadMetadata(t1) == meta1 + json2.write_text(json.dumps(meta2)) + assert fslbids.loadMetadata(t1) == {**meta2, **meta1} + json3.write_text(json.dumps(meta3)) + assert fslbids.loadMetadata(t1) == {**meta2, **meta1} + json4.write_text(json.dumps(meta4)) + assert fslbids.loadMetadata(t1) == {**meta4, **meta2, **meta1} diff --git a/tests/test_image.py b/tests/test_image.py index 22402c4c2c4b3d5ec3255259f9d606d22d463024..ace4dcb510006bc71acf2c9aceba4493d2944c9c 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -8,6 +8,7 @@ import os +import json import os.path as op import itertools as it @@ -1382,3 +1383,50 @@ def test_complex(): assert image[3, 3, 3] == data[3, 3, 3] assert dmin == data.min() assert dmax == data.max() + + +def test_loadMeta(): + with tempdir(): + make_image('image.nii.gz') + + meta = {'a' : 1, 'b' : 2} + with open('image.json', 'wt') as f: + json.dump(meta, f) + + img = fslimage.Image('image.nii.gz', loadMeta=True) + + assert img.getMeta('a') == 1 + assert img.getMeta('b') == 2 + + +def test_loadMetadata(): + with tempdir(): + make_image('image.nii.gz') + + meta = {'a' : 1, 'b' : 2} + with open('image.json', 'wt') as f: + json.dump(meta, f) + + img = fslimage.Image('image.nii.gz') + gotmeta = fslimage.loadMetadata(img) + + assert gotmeta == meta + + with tempdir(): + imgfile = op.join('data', 'sub-01', 'anat', 'sub-01_T1w.nii.gz') + metafile = op.join('data', 'T1w.json') + + os.makedirs(op.dirname(imgfile)) + make_image(imgfile) + + with open(op.join('data', 'dataset_description.json'), 'wt') as f: + pass + + meta = {'a' : 1, 'b' : 2} + with open(metafile, 'wt') as f: + json.dump(meta, f) + + img = fslimage.Image(imgfile) + gotmeta = fslimage.loadMetadata(img) + + assert gotmeta == meta diff --git a/tests/test_meta.py b/tests/test_meta.py index 2d323d066e3d8ddd822d226a96031a4f10486265..791f1873b0b070e8b3a61c78aa0e8ae58917f541 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -22,3 +22,8 @@ def test_meta(): assert list(data.keys()) == list(m.metaKeys()) assert list(data.values()) == list(m.metaValues()) assert list(data.items()) == list(m.metaItems()) + + data.update( {'d' : 4, 'e' : 5}) + m.updateMeta({'d' : 4, 'e' : 5}) + + assert list(data.items()) == list(m.metaItems())