Commit eab4ee98 authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

Merge branch 'enh/imagetree' into 'master'

filetree query

See merge request fsl/fslpy!100
parents dc3ee77e dc8fa25c
Pipeline #3497 failed with stages
in 79 minutes and 33 seconds
......@@ -28,7 +28,7 @@ if [ "$TEST_STYLE"x != "x" ]; then exit 0; fi
# tests, and need $FSLDIR to be defined
export FSLDIR=/fsl/
mkdir -p $FSLDIR/data/
rsync -rv "fsldownload:data/atlases/" "$FSLDIR/data/atlases/"
rsync -rv "fsldownload:$FSL_ATLAS_DIR" "$FSLDIR/data/atlases/"
# Finally, run the damned tests.
TEST_OPTS="--cov-report= --cov-append"
......@@ -72,6 +72,9 @@ stages:
# - FSL_HOST: - Username@host to download FSL data from
# (e.g. "")
# - FSL_ATLAS_DIR: - Location of the FSL atlas data on
# - TWINE_USERNAME: - Username to use when uploading to pypi
# - TWINE_PASSWORD: - Password to use when uploading to pypi
......@@ -2,8 +2,8 @@ This document contains the ``fslpy`` release history in reverse chronological
2.0.0 (Under development)
2.0.0 (Friday March 20th 2019)
......@@ -13,18 +13,23 @@ Added
file/directory templates (Michiel Cottaar).
* Simple built-in :mod:`.deprecated` decorator.
* New :mod:`` module, which currently contains one function
:func:`.guessType`, which guess the data type of a file/directory path.
:func:`.guessType`, which guesses the data type of a file/directory path.
* New `.commonBase` function for finding the common prefix of a set of
file/directory paths.
* Removed support for Python 2.7 and 3.4.
* Minimum required version of ``nibabel`` is now 2.3.
* The :class:`.Image` class now fully delegates to ``nibabel`` for managing
file handles.
* The :class:`.GiftiMesh` class can now load surface files which contain
vertex data.
vertex data, and will accept surface files which end in ``.gii``, rather
than requiring files which end in ``.surf.gii``.
* The ``name`` property of :class:`.Mesh` instances can now be updated.
......@@ -33,6 +38,14 @@ Removed
* Many deprecated items removed.
* Deprecated the :func:`.loadIndexedImageFile` function, and the ``indexed``
flag to the :class:`.Image` constructor.
1.13.3 (Friday February 8th 2019)
.. automodule:: fsl.utils.filetree.query
......@@ -6,6 +6,7 @@
.. automodule:: fsl.utils.filetree
......@@ -241,6 +241,12 @@ class Mesh(notifier.Notifier, meta.Meta):
return self.__name
def name(self, name):
"""Set the name of this ``Mesh``. """
self.__name = name
def dataSource(self):
"""Returns the data source of this ``Mesh``. """
......@@ -277,4 +277,5 @@ of the short variable names defined in the
__author__ = 'Michiel Cottaar <>'
from .filetree import FileTree, register_tree, MissingVariable
from .parse import tree_directories
from .parse import tree_directories, list_all_trees
from .query import FileTreeQuery
from pathlib import Path, PurePath
from typing import Tuple, Optional, Dict, Any, Set
from typing import Tuple, Optional, List, Dict, Any, Set
from copy import deepcopy
from . import parse
import pickle
import glob
import os.path as op
from . import filetree
from pathlib import PurePath
from typing import Tuple
from typing import Tuple, List
import re
......@@ -26,6 +27,17 @@ def search_tree(name: str) -> str:
raise ValueError("No file tree found for %s" % name)
def list_all_trees() -> List[str]:
"""Return a list containing paths to all tree files that can be found in
trees = []
for directory in tree_directories:
directory = op.abspath(directory)
trees.extend(glob.glob(op.join(directory, '*.tree')))
return trees
def read_line(line: str) -> Tuple[int, PurePath, str]:
Parses line from the tree file
#!/usr/bin/env python
# - The FileTreeQuery class
# Author: Paul McCarthy <>
# Author: Michiel Cottaar <>
"""This module contains the :class:`FileTreeQuery` class, which can be used to
search for files in a directory described by a `.FileTree`. A
``FileTreeQuery`` object returns :class:`Match` objects which each represent a
file that is described by the ``FileTree``, and which is present in the
The following utility functions, used by the ``FileTreeQuery`` class, are also
defined in this module:
.. autosummary::
import logging
import collections
import os.path as op
from typing import Dict, List, Tuple
import numpy as np
from . import FileTree
log = logging.getLogger(__name__)
class FileTreeQuery(object):
"""The ``FileTreeQuery`` class uses a :class:`.FileTree` to search
a directory for files which match a specific query.
A ``FileTreeQuery`` scans the contents of a directory which is described
by a :class:`.FileTree`, and identifies all file types (a.k.a. _templates_
or _short names_) that are present, and the values of variables within each
short name that are present. The :meth:`query` method can be used to
retrieve files which match a specific short name, and variable values.
The :meth:`query` method returns a multi-dimensional ``numpy.array``
which contains :class:`Match` objects, where each dimension one
represents variable for the short name in question.
Example usage::
>>> from fsl.utils.filetree import FileTree, FileTreeQuery
>>> tree ='bids_raw', './my_bids_data')
>>> query = FileTreeQuery(tree)
>>> query.axes('anat_image')
['acq', 'ext', 'modality', 'participant', 'rec', 'run_index',
>>> query.variables('anat_image')
{'acq': [None],
'ext': ['.nii.gz'],
'modality': ['T1w', 'T2w'],
'participant': ['01', '02', '03'],
'rec': [None],
'run_index': [None, '01', '02', '03'],
'session': [None]}
>>> query.query('anat_image', participant='01')
[nan]]]]]]], dtype=object)
def __init__(self, tree):
"""Create a ``FileTreeQuery``. The contents of the tree directory are
scanned via the :func:`scan` function, which may take some time for
large data sets.
:arg tree: The :class:`.FileTree` object
# Find all files present in the directory
# (as Match objects), and find all variables,
# plus their values, and all short names,
# that are present in the directory.
matches = scan(tree)
allvars, shortnamevars = allVariables(tree, matches)
# Now we are going to build a series of ND
# arrays to store Match objects. We create
# one array for each short name. Each axis
# in an array corresponds to a variable
# present in files of that short name type,
# and each position along an axis corresponds
# to one value of that variable.
# These arrays will be used to store and
# retrieve Match objects - given a short
# name and a set of variable values, we
# can quickly find the corresponding Match
# object (or objects).
# matcharrays contains {shortname : ndarray}
# mappings, and varidxs contains
# {shortname : {varvalue : index}} mappings
matcharrays = {}
varidxs = {}
for shortname in shortnamevars.keys():
snvars = shortnamevars[shortname]
snvarlens = [len(allvars[v]) for v in snvars]
# An ND array for this short
# name. Each element is a
# Match object, or nan.
matcharray = np.zeros(snvarlens, dtype=np.object)
matcharray[:] = np.nan
# indices into the match array
# for each variable value
snvaridxs = {}
for v in snvars:
snvaridxs[v] = {n : i for i, n in enumerate(allvars[v])}
matcharrays[shortname] = matcharray
varidxs[ shortname] = snvaridxs
# Populate the match arrays
for match in matches:
snvars = shortnamevars[match.short_name]
snvaridxs = varidxs[ match.short_name]
snarr = matcharrays[ match.short_name]
idx = []
for var in snvars:
val = match.variables[var]
snarr[tuple(idx)] = match
self.__allvars = allvars
self.__shortnamevars = shortnamevars
self.__matches = matches
self.__matcharrays = matcharrays
self.__varidxs = varidxs
def axes(self, short_name) -> List[str]:
"""Returns a list containing the names of variables present in files
of the given ``short_name`` type, in the same order of the axes of
:class:`Match` arrays that are returned by the :meth:`query` method.
return self.__shortnamevars[short_name]
def variables(self, short_name=None) -> Dict[str, List]:
"""Return a dict of ``{variable : [values]}`` mappings.
This dict describes all variables and their possible values in
the tree.
If a ``short_name`` is specified, only variables which are present in
files of that ``short_name`` type are returned.
if short_name is None:
return {var : list(vals) for var, vals in self.__allvars.items()}
varnames = self.__shortnamevars[short_name]
return {var : list(self.__allvars[var]) for var in varnames}
def short_names(self) -> List[str]:
"""Returns a list containing all short names of the ``FileTree`` that
are present in the directory.
return list(self.__shortnamevars.keys())
def query(self, short_name, asarray=False, **variables):
"""Search for files of the given ``short_name``, which match
the specified ``variables``. All hits are returned for variables
that are unspecified.
:arg short_name: Short name of files to search for.
:arg asarray: If ``True``, the relevant :class:`Match` objects are
returned in a in a ND ``numpy.array`` where each
dimension corresponds to a variable for the
``short_name`` in question (as returned by
:meth:`axes`). Otherwise (the default), they are
returned in a list.
All other arguments are assumed to be ``variable=value`` pairs,
used to restrict which matches are returned. All values are returned
for variables that are not specified, or variables which are given a
value of ``'*'``.
:returns: A list of ``Match`` objects, (or a ``numpy.array`` if
varnames = list(variables.keys())
allvarnames = self.__shortnamevars[short_name]
varidxs = self.__varidxs[ short_name]
matcharray = self.__matcharrays[short_name]
slc = []
for var in allvarnames:
if var in varnames: val = variables[var]
else: val = '*'
# We're using np.newaxis to retain
# the full dimensionality of the
# array, so that the axis labels
# returned by the axes() method
# are valid.
if val == '*': slc.append(slice(None))
else: slc.extend([np.newaxis, varidxs[var][val]])
result = matcharray[tuple(slc)]
if asarray: return result
else: return [m for m in result.flat if isinstance(m, Match)]
class Match(object):
"""A ``Match`` object represents a file with a name matching a template in
a ``FileTree``. The :func:`scan` function and :meth:`FileTree.query`
method both return ``Match`` objects.
def __init__(self, filename, short_name, variables):
"""Create a ``Match`` object. All arguments are added as attributes.
:arg filename: name of existing file
:arg short_name: template identifier
:arg variables: Dictionary of ``{variable : value}`` mappings
containing all variables present in the file name.
self.__filename = filename
self.__short_name = short_name
self.__variables = dict(variables)
def filename(self):
return self.__filename
def short_name(self):
return self.__short_name
def variables(self):
return dict(self.__variables)
def __eq__(self, other):
return (isinstance(other, Match) and
self.filename == other.filename and
self.short_name == other.short_name and
self.variables == other.variables)
def __lt__(self, other):
return isinstance(other, Match) and self.filename < other.filename
def __le__(self, other):
return isinstance(other, Match) and self.filename <= other.filename
def __repr__(self):
"""Returns a string representation of this ``Match``. """
return 'Match({})'.format(self.filename)
def __str__(self):
"""Returns a string representation of this ``Match``. """
return repr(self)
def scan(tree : FileTree) -> List[Match]:
"""Scans the directory of the given ``FileTree`` to find all files which
match a tree template.
:return: list of :class:`Match` objects
matches = []
for template in tree.templates:
for filename in tree.get_all(template, glob_vars='all'):
if not op.isfile(filename):
variables = dict(tree.extract_variables(template, filename))
matches.append(Match(filename, template, variables))
for tree_name, sub_tree in tree.sub_trees.items():
return matches
def allVariables(
tree : FileTree,
matches : List[Match]) -> Tuple[Dict[str, List], Dict[str, List]]:
"""Identifies the ``FileTree`` variables which are actually represented
in files in the directory.
:arg filetree: The ``FileTree``object
:arg matches: list of ``Match`` objects (e.g. as returned by :func:`scan`)
:returns: a tuple containing two dicts:
- A dict of ``{ variable : [values] }`` mappings containing all
variables and their possible values present in the given list
of ``Match`` objects.
- A dict of ``{ short_name : [variables] }`` mappings,
containing the variables which are relevant to each short
allvars = collections.defaultdict(set)
allshortnames = collections.defaultdict(set)
for m in matches:
for var, val in m.variables.items():
allvars[ var] .add(val)
# allow us to compare None with strings
def key(v):
if v is None: return ''
else: return v
allvars = {var : list(sorted(vals, key=key))
for var, vals in allvars.items()}
allshortnames = {sn : list(sorted(vars))
for sn, vars in allshortnames.items()}
return allvars, allshortnames
......@@ -158,6 +158,7 @@ def extract_variables(template, filename, known_vars=None):
while '//' in sub_re:
sub_re = sub_re.replace('//', '/')
sub_re = sub_re.replace('.', '\.')
if re.match(sub_re, filename) is None:
......@@ -22,12 +22,14 @@ paths.
import os.path as op
import os
import glob
import operator
class PathError(Exception):
......@@ -471,3 +473,30 @@ def uniquePrefix(path):
hits = [h for h in hits if h.startswith(prefix)]
return prefix
def commonBase(paths):
"""Identifies the deepest common base directory shared by all files
in ``paths``.
Raises a :exc:`PathError` if the paths have no common base. This will
never happen for absolute paths (as the base will be e.g. ``'/'``).
depths = [len(p.split(op.sep)) for p in paths]
base = max(zip(depths, paths), key=operator.itemgetter(0))[1]
last = base
while True:
base = op.split(base)[0]
if base == last or len(base) == 0:
last = base
if all([p.startswith(base) for p in paths]):
return base
raise PathError('No common base')
#!/usr/bin/env python
# -
# Author: Paul McCarthy <>
import os
import glob
import shutil
import os.path as op
import contextlib
import textwrap as tw
import itertools as it
from .. import testdir
import fsl.utils.filetree as filetree
import fsl.utils.filetree.query as ftquery
_test_tree = """
T1w.nii.gz (T1w)
T2w.nii.gz (T2w)
{hemi}.{surf}.gii (surface)
_subjs = ['01', '02', '03']
_sess = ['1', '2']
_hemis = ['L', 'R']
_surfs = ['midthickness', 'pial', 'white']
def _test_data():
files = []
for subj, ses in it.product(_subjs, _sess):
sesdir = op.join('subj-{}'.format(subj), 'ses-{}'.format(ses))
files.append(op.join(sesdir, 'T1w.nii.gz'))
files.append(op.join(sesdir, 'T2w.nii.gz'))
for hemi, surf in it.product(_hemis, _surfs):
files.append(op.join(sesdir, '{}.{}.gii'.format(hemi, surf)))
with testdir(files):
with open('_test_tree.tree', 'wt') as f:
def _expected_matches(short_name, **kwargs):
matches = []
subjs = kwargs.get('participant', _subjs)
sess = kwargs.get('session', _sess)
surfs = kwargs.get('surf', _surfs)
hemis = kwargs.get('hemi', _hemis)