Commit 287207e5 authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

Merge branch 'rf/filetree_match' into 'master'

Rf/filetree match

See merge request fsl/fslpy!153
parents 8cd0f064 7c3ca51d
Pipeline #4121 passed with stages
in 15 minutes and 1 second
...@@ -12,6 +12,8 @@ Changed ...@@ -12,6 +12,8 @@ Changed
* The :class:`.Cache` class has a new ``lru`` option, allowing it to be used * The :class:`.Cache` class has a new ``lru`` option, allowing it to be used
as a least-recently-used cache. as a least-recently-used cache.
* The :mod:`.filetree` module has been refactored to make it easier for the
:mod:`.query` module to work with file tree hierarchies.
2.5.0 (Tuesday 6th August 2019) 2.5.0 (Tuesday 6th August 2019)
......
...@@ -25,12 +25,14 @@ class FileTree(object): ...@@ -25,12 +25,14 @@ class FileTree(object):
- ``variables``: dictionary mapping variables in the templates to specific values (variables set to None are explicitly unset) - ``variables``: dictionary mapping variables in the templates to specific values (variables set to None are explicitly unset)
- ``sub_trees``: filename trees describing specific sub-directories - ``sub_trees``: filename trees describing specific sub-directories
- ``parent``: parent FileTree, of which this sub-tree is a sub-directory - ``parent``: parent FileTree, of which this sub-tree is a sub-directory
- ``name``: descriptive name of the tree
""" """
def __init__(self, def __init__(self,
templates: Dict[str, str], templates: Dict[str, str],
variables: Dict[str, Any], variables: Dict[str, Any],
sub_trees: Dict[str, "FileTree"]=None, sub_trees: Dict[str, "FileTree"] = None,
parent: Optional["FileTree"]=None): parent: Optional["FileTree"] = None,
name: str = None):
""" """
Creates a new filename tree. Creates a new filename tree.
""" """
...@@ -40,6 +42,7 @@ class FileTree(object): ...@@ -40,6 +42,7 @@ class FileTree(object):
sub_trees = {} sub_trees = {}
self.sub_trees = sub_trees self.sub_trees = sub_trees
self._parent = parent self._parent = parent
self._name = name
@property @property
def parent(self, ): def parent(self, ):
...@@ -48,6 +51,15 @@ class FileTree(object): ...@@ -48,6 +51,15 @@ class FileTree(object):
""" """
return self._parent return self._parent
@property
def name(self, ):
"""
Name of this ``FileTree``, or ``None`` if it has no name.
"""
return self._name
@property @property
def all_variables(self, ): def all_variables(self, ):
""" """
...@@ -346,6 +358,7 @@ class FileTree(object): ...@@ -346,6 +358,7 @@ class FileTree(object):
filename = tree_name + '.tree' filename = tree_name + '.tree'
else: else:
filename = parse.search_tree(tree_name) filename = parse.search_tree(tree_name)
tree_name = op.splitext(op.basename(filename))[0]
filename = Path(filename) filename = Path(filename)
templates = {} templates = {}
...@@ -384,6 +397,7 @@ class FileTree(object): ...@@ -384,6 +397,7 @@ class FileTree(object):
raise ValueError("Name of sub_tree {short_name} used multiple times in {tree_name}.tree".format(**locals())) raise ValueError("Name of sub_tree {short_name} used multiple times in {tree_name}.tree".format(**locals()))
sub_trees[short_name] = sub_tree sub_trees[short_name] = sub_tree
sub_tree._name = short_name
elif '=' in line: elif '=' in line:
key, value = line.split('=') key, value = line.split('=')
if len(key.split()) != 1: if len(key.split()) != 1:
...@@ -413,7 +427,7 @@ class FileTree(object): ...@@ -413,7 +427,7 @@ class FileTree(object):
templates[short_name] = str(current_filename) templates[short_name] = str(current_filename)
file_variables.update(variables) file_variables.update(variables)
res = get_registered(tree_name, cls)(templates, variables=file_variables, sub_trees=sub_trees) res = get_registered(tree_name, cls)(templates, variables=file_variables, sub_trees=sub_trees, name=tree_name)
for tree in sub_trees.values(): for tree in sub_trees.values():
tree._parent = res tree._parent = res
return res return res
......
...@@ -30,7 +30,8 @@ from typing import Dict, List, Tuple ...@@ -30,7 +30,8 @@ from typing import Dict, List, Tuple
import numpy as np import numpy as np
from . import FileTree from fsl.utils.deprecated import deprecated
from . import FileTree
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -44,11 +45,11 @@ class FileTreeQuery(object): ...@@ -44,11 +45,11 @@ class FileTreeQuery(object):
by a :class:`.FileTree`, and identifies all file types (a.k.a. *templates* by a :class:`.FileTree`, and identifies all file types (a.k.a. *templates*
or *short names*) that are present, and the values of variables within each or *short names*) that are present, and the values of variables within each
short name that are present. The :meth:`query` method can be used to short name that are present. The :meth:`query` method can be used to
retrieve files which match a specific short name, and variable values. retrieve files which match a specific template, and variable values.
The :meth:`query` method returns a multi-dimensional ``numpy.array`` The :meth:`query` method returns a multi-dimensional ``numpy.array``
which contains :class:`Match` objects, where each dimension one which contains :class:`Match` objects, where each dimension one
represents variable for the short name in question. represents variable for the template in question.
Example usage:: Example usage::
...@@ -71,15 +72,13 @@ class FileTreeQuery(object): ...@@ -71,15 +72,13 @@ class FileTreeQuery(object):
'session': [None]} 'session': [None]}
>>> query.query('anat_image', participant='01') >>> query.query('anat_image', participant='01')
array([[[[[[[Match(./my_bids_data/sub-01/anat/sub-01_T1w.nii.gz)], [Match(./my_bids_data/sub-01/anat/sub-01_T1w.nii.gz),
[nan], Match(./my_bids_data/sub-01/anat/sub-01_T2w.nii.gz)]
[nan],
[nan]]]],
Matches for templates contained within sub-trees are referred to by
[[[[Match(./my_bids_data/sub-01/anat/sub-01_T2w.nii.gz)], constructing a hierarchical path from the sub-tree template name(s),
[nan], and the template name - see the :meth:`Match.full_name` method.
[nan],
[nan]]]]]]], dtype=object)
""" """
...@@ -93,115 +92,132 @@ class FileTreeQuery(object): ...@@ -93,115 +92,132 @@ class FileTreeQuery(object):
# Find all files present in the directory # Find all files present in the directory
# (as Match objects), and find all variables, # (as Match objects), and find all variables,
# plus their values, and all short names, # plus their values, and all templates,
# that are present in the directory. # that are present in the directory.
matches = scan(tree) matches = scan(tree)
allvars, shortnamevars = allVariables(tree, matches) allvars, templatevars = allVariables(tree, matches)
# Now we are going to build a series of ND # Now we are going to build a series of ND
# arrays to store Match objects. We create # arrays to store Match objects. We create
# one array for each short name. Each axis # one array for each template. Each axis
# in an array corresponds to a variable # in an array corresponds to a variable
# present in files of that short name type, # present in files of that template type,
# and each position along an axis corresponds # and each position along an axis corresponds
# to one value of that variable. # to one value of that variable.
# #
# These arrays will be used to store and # These arrays will be used to store and
# retrieve Match objects - given a short # retrieve Match objects - given a template
# name and a set of variable values, we # and a set of variable values, we can
# can quickly find the corresponding Match # quickly find the corresponding Match
# object (or objects). # object (or objects).
# matcharrays contains {shortname : ndarray} # matcharrays contains {template : ndarray}
# mappings, and varidxs contains # mappings, and varidxs contains
# {shortname : {varvalue : index}} mappings # {template : {varvalue : index}} mappings
matcharrays = {} matcharrays = {}
varidxs = {} varidxs = {}
for shortname in shortnamevars.keys(): for template, tvars in templatevars.items():
snvars = shortnamevars[shortname] tvarlens = [len(allvars[v]) for v in tvars]
snvarlens = [len(allvars[v]) for v in snvars]
# An ND array for this short # An ND array for this short
# name. Each element is a # name. Each element is a
# Match object, or nan. # Match object, or nan.
matcharray = np.zeros(snvarlens, dtype=np.object) matcharray = np.zeros(tvarlens, dtype=np.object)
matcharray[:] = np.nan matcharray[:] = np.nan
# indices into the match array # indices into the match array
# for each variable value # for each variable value
snvaridxs = {} tvaridxs = {}
for v in snvars: for v in tvars:
snvaridxs[v] = {n : i for i, n in enumerate(allvars[v])} tvaridxs[v] = {n : i for i, n in enumerate(allvars[v])}
matcharrays[shortname] = matcharray matcharrays[template] = matcharray
varidxs[ shortname] = snvaridxs varidxs[ template] = tvaridxs
# Populate the match arrays # Populate the match arrays
for match in matches: for match in matches:
snvars = shortnamevars[match.short_name] tvars = templatevars[match.full_name]
snvaridxs = varidxs[ match.short_name] tvaridxs = varidxs[ match.full_name]
snarr = matcharrays[ match.short_name] tarr = matcharrays[ match.full_name]
idx = [] idx = []
for var in snvars: for var in tvars:
val = match.variables[var] val = match.variables[var]
idx.append(snvaridxs[var][val]) idx.append(tvaridxs[var][val])
snarr[tuple(idx)] = match tarr[tuple(idx)] = match
self.__tree = tree
self.__allvars = allvars self.__allvars = allvars
self.__shortnamevars = shortnamevars self.__templatevars = templatevars
self.__matches = matches self.__matches = matches
self.__matcharrays = matcharrays self.__matcharrays = matcharrays
self.__varidxs = varidxs self.__varidxs = varidxs
def axes(self, short_name) -> List[str]: def axes(self, template) -> List[str]:
"""Returns a list containing the names of variables present in files """Returns a list containing the names of variables present in files
of the given ``short_name`` type, in the same order of the axes of of the given ``template`` type, in the same order of the axes of
:class:`Match` arrays that are returned by the :meth:`query` method. :class:`Match` arrays that are returned by the :meth:`query` method.
""" """
return self.__shortnamevars[short_name] return self.__templatevars[template]
def variables(self, short_name=None) -> Dict[str, List]: def variables(self, template=None) -> Dict[str, List]:
"""Return a dict of ``{variable : [values]}`` mappings. """Return a dict of ``{variable : [values]}`` mappings.
This dict describes all variables and their possible values in This dict describes all variables and their possible values in
the tree. the tree.
If a ``short_name`` is specified, only variables which are present in If a ``template`` is specified, only variables which are present in
files of that ``short_name`` type are returned. files of that ``template`` type are returned.
""" """
if short_name is None: if template is None:
return {var : list(vals) for var, vals in self.__allvars.items()} return {var : list(vals) for var, vals in self.__allvars.items()}
else: else:
varnames = self.__shortnamevars[short_name] varnames = self.__templatevars[template]
return {var : list(self.__allvars[var]) for var in varnames} return {var : list(self.__allvars[var]) for var in varnames}
@property @property
def tree(self):
"""Returns the :class:`.FileTree` associated with this
``FileTreeQuery``.
"""
return self.__tree
@property
def templates(self) -> List[str]:
"""Returns a list containing all templates of the ``FileTree`` that
are present in the directory.
"""
return list(self.__templatevars.keys())
@property
@deprecated('2.6.0', '3.0.0', 'Use templates instead')
def short_names(self) -> List[str]: def short_names(self) -> List[str]:
"""Returns a list containing all short names of the ``FileTree`` that """Returns a list containing all templates of the ``FileTree`` that
are present in the directory. are present in the directory.
""" """
return list(self.__shortnamevars.keys()) return self.templates
def query(self, short_name, asarray=False, **variables): def query(self, template, asarray=False, **variables):
"""Search for files of the given ``short_name``, which match """Search for files of the given ``template``, which match
the specified ``variables``. All hits are returned for variables the specified ``variables``. All hits are returned for variables
that are unspecified. that are unspecified.
:arg short_name: Short name of files to search for. :arg template: Template of files to search for.
:arg asarray: If ``True``, the relevant :class:`Match` objects are :arg asarray: If ``True``, the relevant :class:`Match` objects are
returned in a in a ND ``numpy.array`` where each returned in a in a ND ``numpy.array`` where each
dimension corresponds to a variable for the dimension corresponds to a variable for the
``short_name`` in question (as returned by ``templates`` in question (as returned by
:meth:`axes`). Otherwise (the default), they are :meth:`axes`). Otherwise (the default), they are
returned in a list. returned in a list.
All other arguments are assumed to be ``variable=value`` pairs, All other arguments are assumed to be ``variable=value`` pairs,
used to restrict which matches are returned. All values are returned used to restrict which matches are returned. All values are returned
...@@ -213,9 +229,9 @@ class FileTreeQuery(object): ...@@ -213,9 +229,9 @@ class FileTreeQuery(object):
""" """
varnames = list(variables.keys()) varnames = list(variables.keys())
allvarnames = self.__shortnamevars[short_name] allvarnames = self.__templatevars[template]
varidxs = self.__varidxs[ short_name] varidxs = self.__varidxs[ template]
matcharray = self.__matcharrays[short_name] matcharray = self.__matcharrays[ template]
slc = [] slc = []
for var in allvarnames: for var in allvarnames:
...@@ -244,16 +260,18 @@ class Match(object): ...@@ -244,16 +260,18 @@ class Match(object):
""" """
def __init__(self, filename, short_name, variables): def __init__(self, filename, template, tree, variables):
"""Create a ``Match`` object. All arguments are added as attributes. """Create a ``Match`` object. All arguments are added as attributes.
:arg filename: name of existing file :arg filename: name of existing file
:arg short_name: template identifier :arg template: template identifier
:arg tree: :class:`.FileTree` which contains this ``Match``
:arg variables: Dictionary of ``{variable : value}`` mappings :arg variables: Dictionary of ``{variable : value}`` mappings
containing all variables present in the file name. containing all variables present in the file name.
""" """
self.__filename = filename self.__filename = filename
self.__short_name = short_name self.__template = template
self.__tree = tree
self.__variables = dict(variables) self.__variables = dict(variables)
...@@ -263,8 +281,44 @@ class Match(object): ...@@ -263,8 +281,44 @@ class Match(object):
@property @property
@deprecated('2.6.0', '3.0.0', 'Use template instead')
def short_name(self): def short_name(self):
return self.__short_name return self.template
@property
def template(self):
return self.__template
@property
def full_name(self):
"""The ``full_name`` of a ``Match`` is a combination of the
``template`` (i.e. the matched template), and the name(s) of
the relevant ``FileTree`` objects.
It allows one to unamiguously identify the location of a ``Match``
in a ``FileTree`` hierarchy, where the same ``short_name`` may be
used in different sub-trees.
"""
def parents(tree):
if tree.parent is None:
return []
else:
return [tree.parent] + parents(tree.parent)
trees = [self.tree] + parents(self.tree)
# Drop the root tree
trees = list(reversed(trees))[1:]
return '/'.join([t.name for t in trees] + [self.template])
@property
def tree(self):
return self.__tree
@property @property
...@@ -275,7 +329,8 @@ class Match(object): ...@@ -275,7 +329,8 @@ class Match(object):
def __eq__(self, other): def __eq__(self, other):
return (isinstance(other, Match) and return (isinstance(other, Match) and
self.filename == other.filename and self.filename == other.filename and
self.short_name == other.short_name and self.template == other.template and
self.tree is other.tree and
self.variables == other.variables) self.variables == other.variables)
...@@ -289,7 +344,7 @@ class Match(object): ...@@ -289,7 +344,7 @@ class Match(object):
def __repr__(self): def __repr__(self):
"""Returns a string representation of this ``Match``. """ """Returns a string representation of this ``Match``. """
return 'Match({})'.format(self.filename) return 'Match({}: {})'.format(self.full_name, self.filename)
def __str__(self): def __str__(self):
...@@ -301,11 +356,13 @@ def scan(tree : FileTree) -> List[Match]: ...@@ -301,11 +356,13 @@ def scan(tree : FileTree) -> List[Match]:
"""Scans the directory of the given ``FileTree`` to find all files which """Scans the directory of the given ``FileTree`` to find all files which
match a tree template. match a tree template.
:return: list of :class:`Match` objects :arg tree: :class:`.FileTree` to scan
:returns: list of :class:`Match` objects
""" """
matches = [] matches = []
for template in tree.templates: for template in tree.templates:
for filename in tree.get_all(template, glob_vars='all'): for filename in tree.get_all(template, glob_vars='all'):
if not op.isfile(filename): if not op.isfile(filename):
...@@ -313,7 +370,7 @@ def scan(tree : FileTree) -> List[Match]: ...@@ -313,7 +370,7 @@ def scan(tree : FileTree) -> List[Match]:
variables = dict(tree.extract_variables(template, filename)) variables = dict(tree.extract_variables(template, filename))
matches.append(Match(filename, template, variables)) matches.append(Match(filename, template, tree, variables))
for tree_name, sub_tree in tree.sub_trees.items(): for tree_name, sub_tree in tree.sub_trees.items():
matches.extend(scan(sub_tree)) matches.extend(scan(sub_tree))
...@@ -336,26 +393,25 @@ def allVariables( ...@@ -336,26 +393,25 @@ def allVariables(
variables and their possible values present in the given list variables and their possible values present in the given list
of ``Match`` objects. of ``Match`` objects.
- A dict of ``{ short_name : [variables] }`` mappings, - A dict of ``{ full_name : [variables] }`` mappings,
containing the variables which are relevant to each short containing the variables which are relevant to each template.
name.
""" """
allvars = collections.defaultdict(set) allvars = collections.defaultdict(set)
allshortnames = collections.defaultdict(set) alltemplates = collections.defaultdict(set)
for m in matches: for m in matches:
for var, val in m.variables.items(): for var, val in m.variables.items():
allvars[ var] .add(val) allvars[ var] .add(val)
allshortnames[m.short_name].add(var) alltemplates[m.full_name].add(var)
# allow us to compare None with strings # allow us to compare None with strings
def key(v): def key(v):
if v is None: return '' if v is None: return ''
else: return v else: return v
allvars = {var : list(sorted(vals, key=key)) allvars = {var : list(sorted(vals, key=key))
for var, vals in allvars.items()} for var, vals in allvars.items()}
allshortnames = {sn : list(sorted(vars)) alltemplates = {sn : list(sorted(vars))
for sn, vars in allshortnames.items()} for sn, vars in alltemplates.items()}
return allvars, allshortnames return allvars, alltemplates
...@@ -52,7 +52,7 @@ def _test_data(): ...@@ -52,7 +52,7 @@ def _test_data():
yield yield
def _expected_matches(short_name, **kwargs): def _expected_matches(template, tree, **kwargs):
matches = [] matches = []
subjs = kwargs.get('participant', _subjs) subjs = kwargs.get('participant', _subjs)
...@@ -64,18 +64,20 @@ def _expected_matches(short_name, **kwargs): ...@@ -64,18 +64,20 @@ def _expected_matches(short_