diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0cc4dabc4521544b65fb56a788c02018a60da80e..4ff49b20c5fd9ccfcf6c5d0605f036e1c0ef01c8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,8 @@ Changed * The :class:`.Cache` class has a new ``lru`` option, allowing it to be used as a least-recently-used cache. +* The :mod:`.filetree` module has been refactored to make it easier for the + :mod:`.query` module to work with file tree hierarchies. 2.5.0 (Tuesday 6th August 2019) diff --git a/fsl/utils/filetree/filetree.py b/fsl/utils/filetree/filetree.py index a58744668d07767d477b4021e33fab50c10368e7..cf21f34b9d777ea87d9cd35a39a99f61cfd88d70 100644 --- a/fsl/utils/filetree/filetree.py +++ b/fsl/utils/filetree/filetree.py @@ -25,12 +25,14 @@ class FileTree(object): - ``variables``: dictionary mapping variables in the templates to specific values (variables set to None are explicitly unset) - ``sub_trees``: filename trees describing specific sub-directories - ``parent``: parent FileTree, of which this sub-tree is a sub-directory + - ``name``: descriptive name of the tree """ def __init__(self, templates: Dict[str, str], variables: Dict[str, Any], - sub_trees: Dict[str, "FileTree"]=None, - parent: Optional["FileTree"]=None): + sub_trees: Dict[str, "FileTree"] = None, + parent: Optional["FileTree"] = None, + name: str = None): """ Creates a new filename tree. """ @@ -40,6 +42,7 @@ class FileTree(object): sub_trees = {} self.sub_trees = sub_trees self._parent = parent + self._name = name @property def parent(self, ): @@ -48,6 +51,15 @@ class FileTree(object): """ return self._parent + + @property + def name(self, ): + """ + Name of this ``FileTree``, or ``None`` if it has no name. + """ + return self._name + + @property def all_variables(self, ): """ @@ -346,6 +358,7 @@ class FileTree(object): filename = tree_name + '.tree' else: filename = parse.search_tree(tree_name) + tree_name = op.splitext(op.basename(filename))[0] filename = Path(filename) templates = {} @@ -384,6 +397,7 @@ class FileTree(object): raise ValueError("Name of sub_tree {short_name} used multiple times in {tree_name}.tree".format(**locals())) sub_trees[short_name] = sub_tree + sub_tree._name = short_name elif '=' in line: key, value = line.split('=') if len(key.split()) != 1: @@ -413,7 +427,7 @@ class FileTree(object): templates[short_name] = str(current_filename) file_variables.update(variables) - res = get_registered(tree_name, cls)(templates, variables=file_variables, sub_trees=sub_trees) + res = get_registered(tree_name, cls)(templates, variables=file_variables, sub_trees=sub_trees, name=tree_name) for tree in sub_trees.values(): tree._parent = res return res diff --git a/fsl/utils/filetree/query.py b/fsl/utils/filetree/query.py index e54968515d7e3e061370ce263f8afedc39d3e25a..49433fa546384f4ce4f03b95bf786ce0f6d64a95 100644 --- a/fsl/utils/filetree/query.py +++ b/fsl/utils/filetree/query.py @@ -30,7 +30,8 @@ from typing import Dict, List, Tuple import numpy as np -from . import FileTree +from fsl.utils.deprecated import deprecated +from . import FileTree log = logging.getLogger(__name__) @@ -44,11 +45,11 @@ class FileTreeQuery(object): by a :class:`.FileTree`, and identifies all file types (a.k.a. *templates* or *short names*) that are present, and the values of variables within each short name that are present. The :meth:`query` method can be used to - retrieve files which match a specific short name, and variable values. + retrieve files which match a specific template, and variable values. The :meth:`query` method returns a multi-dimensional ``numpy.array`` which contains :class:`Match` objects, where each dimension one - represents variable for the short name in question. + represents variable for the template in question. Example usage:: @@ -71,15 +72,13 @@ class FileTreeQuery(object): 'session': [None]} >>> query.query('anat_image', participant='01') - array([[[[[[[Match(./my_bids_data/sub-01/anat/sub-01_T1w.nii.gz)], - [nan], - [nan], - [nan]]]], - - [[[[Match(./my_bids_data/sub-01/anat/sub-01_T2w.nii.gz)], - [nan], - [nan], - [nan]]]]]]], dtype=object) + [Match(./my_bids_data/sub-01/anat/sub-01_T1w.nii.gz), + Match(./my_bids_data/sub-01/anat/sub-01_T2w.nii.gz)] + + + Matches for templates contained within sub-trees are referred to by + constructing a hierarchical path from the sub-tree template name(s), + and the template name - see the :meth:`Match.full_name` method. """ @@ -93,115 +92,132 @@ class FileTreeQuery(object): # Find all files present in the directory # (as Match objects), and find all variables, - # plus their values, and all short names, + # plus their values, and all templates, # that are present in the directory. - matches = scan(tree) - allvars, shortnamevars = allVariables(tree, matches) + matches = scan(tree) + allvars, templatevars = allVariables(tree, matches) # Now we are going to build a series of ND # arrays to store Match objects. We create - # one array for each short name. Each axis + # one array for each template. Each axis # in an array corresponds to a variable - # present in files of that short name type, + # present in files of that template type, # and each position along an axis corresponds # to one value of that variable. # # These arrays will be used to store and - # retrieve Match objects - given a short - # name and a set of variable values, we - # can quickly find the corresponding Match + # retrieve Match objects - given a template + # and a set of variable values, we can + # quickly find the corresponding Match # object (or objects). - # matcharrays contains {shortname : ndarray} + # matcharrays contains {template : ndarray} # mappings, and varidxs contains - # {shortname : {varvalue : index}} mappings + # {template : {varvalue : index}} mappings matcharrays = {} varidxs = {} - for shortname in shortnamevars.keys(): + for template, tvars in templatevars.items(): - snvars = shortnamevars[shortname] - snvarlens = [len(allvars[v]) for v in snvars] + tvarlens = [len(allvars[v]) for v in tvars] # An ND array for this short # name. Each element is a # Match object, or nan. - matcharray = np.zeros(snvarlens, dtype=np.object) + matcharray = np.zeros(tvarlens, dtype=np.object) matcharray[:] = np.nan # indices into the match array # for each variable value - snvaridxs = {} - for v in snvars: - snvaridxs[v] = {n : i for i, n in enumerate(allvars[v])} + tvaridxs = {} + for v in tvars: + tvaridxs[v] = {n : i for i, n in enumerate(allvars[v])} - matcharrays[shortname] = matcharray - varidxs[ shortname] = snvaridxs + matcharrays[template] = matcharray + varidxs[ template] = tvaridxs # Populate the match arrays for match in matches: - snvars = shortnamevars[match.short_name] - snvaridxs = varidxs[ match.short_name] - snarr = matcharrays[ match.short_name] - idx = [] - for var in snvars: + tvars = templatevars[match.full_name] + tvaridxs = varidxs[ match.full_name] + tarr = matcharrays[ match.full_name] + idx = [] + for var in tvars: val = match.variables[var] - idx.append(snvaridxs[var][val]) + idx.append(tvaridxs[var][val]) - snarr[tuple(idx)] = match + tarr[tuple(idx)] = match + self.__tree = tree self.__allvars = allvars - self.__shortnamevars = shortnamevars + self.__templatevars = templatevars self.__matches = matches self.__matcharrays = matcharrays self.__varidxs = varidxs - def axes(self, short_name) -> List[str]: + def axes(self, template) -> List[str]: """Returns a list containing the names of variables present in files - of the given ``short_name`` type, in the same order of the axes of + of the given ``template`` type, in the same order of the axes of :class:`Match` arrays that are returned by the :meth:`query` method. """ - return self.__shortnamevars[short_name] + return self.__templatevars[template] - def variables(self, short_name=None) -> Dict[str, List]: + def variables(self, template=None) -> Dict[str, List]: """Return a dict of ``{variable : [values]}`` mappings. This dict describes all variables and their possible values in the tree. - If a ``short_name`` is specified, only variables which are present in - files of that ``short_name`` type are returned. + If a ``template`` is specified, only variables which are present in + files of that ``template`` type are returned. """ - if short_name is None: + if template is None: return {var : list(vals) for var, vals in self.__allvars.items()} else: - varnames = self.__shortnamevars[short_name] + varnames = self.__templatevars[template] return {var : list(self.__allvars[var]) for var in varnames} @property + def tree(self): + """Returns the :class:`.FileTree` associated with this + ``FileTreeQuery``. + """ + return self.__tree + + + @property + def templates(self) -> List[str]: + """Returns a list containing all templates of the ``FileTree`` that + are present in the directory. + """ + return list(self.__templatevars.keys()) + + + @property + @deprecated('2.6.0', '3.0.0', 'Use templates instead') def short_names(self) -> List[str]: - """Returns a list containing all short names of the ``FileTree`` that + """Returns a list containing all templates of the ``FileTree`` that are present in the directory. """ - return list(self.__shortnamevars.keys()) + return self.templates - def query(self, short_name, asarray=False, **variables): - """Search for files of the given ``short_name``, which match + def query(self, template, asarray=False, **variables): + """Search for files of the given ``template``, which match the specified ``variables``. All hits are returned for variables that are unspecified. - :arg short_name: Short name of files to search for. + :arg template: Template of files to search for. - :arg asarray: If ``True``, the relevant :class:`Match` objects are - returned in a in a ND ``numpy.array`` where each - dimension corresponds to a variable for the - ``short_name`` in question (as returned by - :meth:`axes`). Otherwise (the default), they are - returned in a list. + :arg asarray: If ``True``, the relevant :class:`Match` objects are + returned in a in a ND ``numpy.array`` where each + dimension corresponds to a variable for the + ``templates`` in question (as returned by + :meth:`axes`). Otherwise (the default), they are + returned in a list. All other arguments are assumed to be ``variable=value`` pairs, used to restrict which matches are returned. All values are returned @@ -213,9 +229,9 @@ class FileTreeQuery(object): """ varnames = list(variables.keys()) - allvarnames = self.__shortnamevars[short_name] - varidxs = self.__varidxs[ short_name] - matcharray = self.__matcharrays[short_name] + allvarnames = self.__templatevars[template] + varidxs = self.__varidxs[ template] + matcharray = self.__matcharrays[ template] slc = [] for var in allvarnames: @@ -244,16 +260,18 @@ class Match(object): """ - def __init__(self, filename, short_name, variables): + def __init__(self, filename, template, tree, variables): """Create a ``Match`` object. All arguments are added as attributes. :arg filename: name of existing file - :arg short_name: template identifier + :arg template: template identifier + :arg tree: :class:`.FileTree` which contains this ``Match`` :arg variables: Dictionary of ``{variable : value}`` mappings containing all variables present in the file name. """ self.__filename = filename - self.__short_name = short_name + self.__template = template + self.__tree = tree self.__variables = dict(variables) @@ -263,8 +281,44 @@ class Match(object): @property + @deprecated('2.6.0', '3.0.0', 'Use template instead') def short_name(self): - return self.__short_name + return self.template + + + @property + def template(self): + return self.__template + + + @property + def full_name(self): + """The ``full_name`` of a ``Match`` is a combination of the + ``template`` (i.e. the matched template), and the name(s) of + the relevant ``FileTree`` objects. + + It allows one to unamiguously identify the location of a ``Match`` + in a ``FileTree`` hierarchy, where the same ``short_name`` may be + used in different sub-trees. + """ + + def parents(tree): + if tree.parent is None: + return [] + else: + return [tree.parent] + parents(tree.parent) + + trees = [self.tree] + parents(self.tree) + + # Drop the root tree + trees = list(reversed(trees))[1:] + + return '/'.join([t.name for t in trees] + [self.template]) + + + @property + def tree(self): + return self.__tree @property @@ -275,7 +329,8 @@ class Match(object): def __eq__(self, other): return (isinstance(other, Match) and self.filename == other.filename and - self.short_name == other.short_name and + self.template == other.template and + self.tree is other.tree and self.variables == other.variables) @@ -289,7 +344,7 @@ class Match(object): def __repr__(self): """Returns a string representation of this ``Match``. """ - return 'Match({})'.format(self.filename) + return 'Match({}: {})'.format(self.full_name, self.filename) def __str__(self): @@ -301,11 +356,13 @@ def scan(tree : FileTree) -> List[Match]: """Scans the directory of the given ``FileTree`` to find all files which match a tree template. - :return: list of :class:`Match` objects + :arg tree: :class:`.FileTree` to scan + :returns: list of :class:`Match` objects """ matches = [] for template in tree.templates: + for filename in tree.get_all(template, glob_vars='all'): if not op.isfile(filename): @@ -313,7 +370,7 @@ def scan(tree : FileTree) -> List[Match]: variables = dict(tree.extract_variables(template, filename)) - matches.append(Match(filename, template, variables)) + matches.append(Match(filename, template, tree, variables)) for tree_name, sub_tree in tree.sub_trees.items(): matches.extend(scan(sub_tree)) @@ -336,26 +393,25 @@ def allVariables( variables and their possible values present in the given list of ``Match`` objects. - - A dict of ``{ short_name : [variables] }`` mappings, - containing the variables which are relevant to each short - name. + - A dict of ``{ full_name : [variables] }`` mappings, + containing the variables which are relevant to each template. """ - allvars = collections.defaultdict(set) - allshortnames = collections.defaultdict(set) + allvars = collections.defaultdict(set) + alltemplates = collections.defaultdict(set) for m in matches: for var, val in m.variables.items(): - allvars[ var] .add(val) - allshortnames[m.short_name].add(var) + allvars[ var] .add(val) + alltemplates[m.full_name].add(var) # allow us to compare None with strings def key(v): if v is None: return '' else: return v - allvars = {var : list(sorted(vals, key=key)) - for var, vals in allvars.items()} - allshortnames = {sn : list(sorted(vars)) - for sn, vars in allshortnames.items()} + allvars = {var : list(sorted(vals, key=key)) + for var, vals in allvars.items()} + alltemplates = {sn : list(sorted(vars)) + for sn, vars in alltemplates.items()} - return allvars, allshortnames + return allvars, alltemplates diff --git a/tests/test_filetree/test_query.py b/tests/test_filetree/test_query.py index 66e50c1f8d0c494b721e2e2ad0846e0629ef4b73..274604312e5fdbe536abc3d24bb8ec8f4a662606 100644 --- a/tests/test_filetree/test_query.py +++ b/tests/test_filetree/test_query.py @@ -52,7 +52,7 @@ def _test_data(): yield -def _expected_matches(short_name, **kwargs): +def _expected_matches(template, tree, **kwargs): matches = [] subjs = kwargs.get('participant', _subjs) @@ -64,18 +64,20 @@ def _expected_matches(short_name, **kwargs): sesdir = op.join('subj-{}'.format(subj), 'ses-{}'.format(ses)) - if short_name in ('T1w', 'T2w'): - f = op.join(sesdir, '{}.nii.gz'.format(short_name)) + if template in ('T1w', 'T2w'): + f = op.join(sesdir, '{}.nii.gz'.format(template)) matches.append(ftquery.Match(f, - short_name, + template, + tree, {'participant' : subj, 'session' : ses})) - elif short_name == 'surface': + elif template == 'surface': for hemi, surf in it.product(hemis, surfs): f = op.join(sesdir, '{}.{}.gii'.format(hemi, surf)) matches.append(ftquery.Match(f, - short_name, + template, + tree, {'participant' : subj, 'session' : ses, 'hemi' : hemi, @@ -84,19 +86,19 @@ def _expected_matches(short_name, **kwargs): return matches -def _run_and_check_query(query, short_name, asarray=False, **vars): +def _run_and_check_query(query, template, asarray=False, **vars): - gotmatches = query.query( short_name, asarray=asarray, **vars) - expmatches = _expected_matches(short_name, **{k : [v] - for k, v - in vars.items()}) + gotmatches = query.query( template, asarray=asarray, **vars) + expmatches = _expected_matches(template, query.tree, **{k : [v] + for k, v + in vars.items()}) if not asarray: assert len(gotmatches) == len(expmatches) for got, exp in zip(sorted(gotmatches), sorted(expmatches)): assert got == exp else: - snvars = query.variables(short_name) + snvars = query.variables(template) assert len(snvars) == len(gotmatches.shape) @@ -108,7 +110,7 @@ def _run_and_check_query(query, short_name, asarray=False, **vars): for expmatch in expmatches: slc = [] - for var in query.axes(short_name): + for var in query.axes(template): if var not in vars or vars[var] == '*': vidx = snvars[var].index(expmatch.variables[var]) slc.append(vidx) @@ -128,7 +130,7 @@ def test_query_properties(): 'participant', 'session', 'surf'] - assert sorted(query.short_names) == ['T1w', 'T2w', 'surface'] + assert sorted(query.templates) == ['T1w', 'T2w', 'surface'] assert query.variables('T1w') == {'participant' : ['01', '02', '03'], 'session' : ['1', '2']} @@ -318,7 +320,7 @@ def test_query_subtree(): tree = filetree.FileTree.read('tree1.tree', '.') query = filetree.FileTreeQuery(tree) - assert sorted(query.short_names) == ['T1w', 'surface'] + assert sorted(query.templates) == ['T1w', 'surfdir/surface'] qvars = query.variables() assert sorted(qvars.keys()) == ['hemi', 'participant', 'surf'] @@ -330,7 +332,7 @@ def test_query_subtree(): assert sorted(qvars.keys()) == ['participant'] assert qvars['participant'] == ['01', '02', '03'] - qvars = query.variables('surface') + qvars = query.variables('surfdir/surface') assert sorted(qvars.keys()) == ['hemi', 'participant', 'surf'] assert qvars['hemi'] == ['L', 'R'] assert qvars['participant'] == ['01', '02', '03'] @@ -346,7 +348,7 @@ def test_query_subtree(): assert [m.filename for m in sorted(got)] == [ op.join('subj-01', 'T1w.nii.gz')] - got = query.query('surface') + got = query.query('surfdir/surface') assert [m.filename for m in sorted(got)] == [ op.join('subj-01', 'surf', 'L.pial.gii'), op.join('subj-01', 'surf', 'L.white.gii'), @@ -361,7 +363,7 @@ def test_query_subtree(): op.join('subj-03', 'surf', 'R.pial.gii'), op.join('subj-03', 'surf', 'R.white.gii')] - got = query.query('surface', hemi='L') + got = query.query('surfdir/surface', hemi='L') assert [m.filename for m in sorted(got)] == [ op.join('subj-01', 'surf', 'L.pial.gii'), op.join('subj-01', 'surf', 'L.white.gii'), @@ -370,7 +372,7 @@ def test_query_subtree(): op.join('subj-03', 'surf', 'L.pial.gii'), op.join('subj-03', 'surf', 'L.white.gii')] - got = query.query('surface', surf='white') + got = query.query('surfdir/surface', surf='white') assert [m.filename for m in sorted(got)] == [ op.join('subj-01', 'surf', 'L.white.gii'), op.join('subj-01', 'surf', 'R.white.gii'), @@ -394,16 +396,17 @@ def test_scan(): t1wf = op.join(sesdir, 'T1w.nii.gz') t2wf = op.join(sesdir, 'T2w.nii.gz') - expmatches.append(ftquery.Match(t1wf, 'T1w', {'participant' : subj, - 'session' : ses})) - expmatches.append(ftquery.Match(t2wf, 'T2w', {'participant' : subj, - 'session' : ses})) + expmatches.append(ftquery.Match(t1wf, 'T1w', tree, {'participant' : subj, + 'session' : ses})) + expmatches.append(ftquery.Match(t2wf, 'T2w', tree, {'participant' : subj, + 'session' : ses})) for hemi, surf in it.product(_hemis, _surfs): surff = op.join(sesdir, '{}.{}.gii'.format(hemi, surf)) expmatches.append(ftquery.Match(surff, 'surface', + tree, {'participant' : subj, 'session' : ses, 'surf' : surf, @@ -414,7 +417,7 @@ def test_scan(): for got, exp in zip(sorted(gotmatches), sorted(expmatches)): assert got.filename == exp.filename - assert got.short_name == exp.short_name + assert got.template == exp.template assert got.variables == exp.variables