Skip to content
Snippets Groups Projects
Commit 287207e5 authored by Paul McCarthy's avatar Paul McCarthy :mountain_bicyclist:
Browse files

Merge branch 'rf/filetree_match' into 'master'

Rf/filetree match

See merge request fsl/fslpy!153
parents 8cd0f064 7c3ca51d
No related branches found
No related tags found
No related merge requests found
Pipeline #4121 passed
......@@ -12,6 +12,8 @@ Changed
* The :class:`.Cache` class has a new ``lru`` option, allowing it to be used
as a least-recently-used cache.
* The :mod:`.filetree` module has been refactored to make it easier for the
:mod:`.query` module to work with file tree hierarchies.
2.5.0 (Tuesday 6th August 2019)
......
......@@ -25,12 +25,14 @@ class FileTree(object):
- ``variables``: dictionary mapping variables in the templates to specific values (variables set to None are explicitly unset)
- ``sub_trees``: filename trees describing specific sub-directories
- ``parent``: parent FileTree, of which this sub-tree is a sub-directory
- ``name``: descriptive name of the tree
"""
def __init__(self,
templates: Dict[str, str],
variables: Dict[str, Any],
sub_trees: Dict[str, "FileTree"]=None,
parent: Optional["FileTree"]=None):
sub_trees: Dict[str, "FileTree"] = None,
parent: Optional["FileTree"] = None,
name: str = None):
"""
Creates a new filename tree.
"""
......@@ -40,6 +42,7 @@ class FileTree(object):
sub_trees = {}
self.sub_trees = sub_trees
self._parent = parent
self._name = name
@property
def parent(self, ):
......@@ -48,6 +51,15 @@ class FileTree(object):
"""
return self._parent
@property
def name(self, ):
"""
Name of this ``FileTree``, or ``None`` if it has no name.
"""
return self._name
@property
def all_variables(self, ):
"""
......@@ -346,6 +358,7 @@ class FileTree(object):
filename = tree_name + '.tree'
else:
filename = parse.search_tree(tree_name)
tree_name = op.splitext(op.basename(filename))[0]
filename = Path(filename)
templates = {}
......@@ -384,6 +397,7 @@ class FileTree(object):
raise ValueError("Name of sub_tree {short_name} used multiple times in {tree_name}.tree".format(**locals()))
sub_trees[short_name] = sub_tree
sub_tree._name = short_name
elif '=' in line:
key, value = line.split('=')
if len(key.split()) != 1:
......@@ -413,7 +427,7 @@ class FileTree(object):
templates[short_name] = str(current_filename)
file_variables.update(variables)
res = get_registered(tree_name, cls)(templates, variables=file_variables, sub_trees=sub_trees)
res = get_registered(tree_name, cls)(templates, variables=file_variables, sub_trees=sub_trees, name=tree_name)
for tree in sub_trees.values():
tree._parent = res
return res
......
......@@ -30,7 +30,8 @@ from typing import Dict, List, Tuple
import numpy as np
from . import FileTree
from fsl.utils.deprecated import deprecated
from . import FileTree
log = logging.getLogger(__name__)
......@@ -44,11 +45,11 @@ class FileTreeQuery(object):
by a :class:`.FileTree`, and identifies all file types (a.k.a. *templates*
or *short names*) that are present, and the values of variables within each
short name that are present. The :meth:`query` method can be used to
retrieve files which match a specific short name, and variable values.
retrieve files which match a specific template, and variable values.
The :meth:`query` method returns a multi-dimensional ``numpy.array``
which contains :class:`Match` objects, where each dimension one
represents variable for the short name in question.
represents variable for the template in question.
Example usage::
......@@ -71,15 +72,13 @@ class FileTreeQuery(object):
'session': [None]}
>>> query.query('anat_image', participant='01')
array([[[[[[[Match(./my_bids_data/sub-01/anat/sub-01_T1w.nii.gz)],
[nan],
[nan],
[nan]]]],
[[[[Match(./my_bids_data/sub-01/anat/sub-01_T2w.nii.gz)],
[nan],
[nan],
[nan]]]]]]], dtype=object)
[Match(./my_bids_data/sub-01/anat/sub-01_T1w.nii.gz),
Match(./my_bids_data/sub-01/anat/sub-01_T2w.nii.gz)]
Matches for templates contained within sub-trees are referred to by
constructing a hierarchical path from the sub-tree template name(s),
and the template name - see the :meth:`Match.full_name` method.
"""
......@@ -93,115 +92,132 @@ class FileTreeQuery(object):
# Find all files present in the directory
# (as Match objects), and find all variables,
# plus their values, and all short names,
# plus their values, and all templates,
# that are present in the directory.
matches = scan(tree)
allvars, shortnamevars = allVariables(tree, matches)
matches = scan(tree)
allvars, templatevars = allVariables(tree, matches)
# Now we are going to build a series of ND
# arrays to store Match objects. We create
# one array for each short name. Each axis
# one array for each template. Each axis
# in an array corresponds to a variable
# present in files of that short name type,
# present in files of that template type,
# and each position along an axis corresponds
# to one value of that variable.
#
# These arrays will be used to store and
# retrieve Match objects - given a short
# name and a set of variable values, we
# can quickly find the corresponding Match
# retrieve Match objects - given a template
# and a set of variable values, we can
# quickly find the corresponding Match
# object (or objects).
# matcharrays contains {shortname : ndarray}
# matcharrays contains {template : ndarray}
# mappings, and varidxs contains
# {shortname : {varvalue : index}} mappings
# {template : {varvalue : index}} mappings
matcharrays = {}
varidxs = {}
for shortname in shortnamevars.keys():
for template, tvars in templatevars.items():
snvars = shortnamevars[shortname]
snvarlens = [len(allvars[v]) for v in snvars]
tvarlens = [len(allvars[v]) for v in tvars]
# An ND array for this short
# name. Each element is a
# Match object, or nan.
matcharray = np.zeros(snvarlens, dtype=np.object)
matcharray = np.zeros(tvarlens, dtype=np.object)
matcharray[:] = np.nan
# indices into the match array
# for each variable value
snvaridxs = {}
for v in snvars:
snvaridxs[v] = {n : i for i, n in enumerate(allvars[v])}
tvaridxs = {}
for v in tvars:
tvaridxs[v] = {n : i for i, n in enumerate(allvars[v])}
matcharrays[shortname] = matcharray
varidxs[ shortname] = snvaridxs
matcharrays[template] = matcharray
varidxs[ template] = tvaridxs
# Populate the match arrays
for match in matches:
snvars = shortnamevars[match.short_name]
snvaridxs = varidxs[ match.short_name]
snarr = matcharrays[ match.short_name]
idx = []
for var in snvars:
tvars = templatevars[match.full_name]
tvaridxs = varidxs[ match.full_name]
tarr = matcharrays[ match.full_name]
idx = []
for var in tvars:
val = match.variables[var]
idx.append(snvaridxs[var][val])
idx.append(tvaridxs[var][val])
snarr[tuple(idx)] = match
tarr[tuple(idx)] = match
self.__tree = tree
self.__allvars = allvars
self.__shortnamevars = shortnamevars
self.__templatevars = templatevars
self.__matches = matches
self.__matcharrays = matcharrays
self.__varidxs = varidxs
def axes(self, short_name) -> List[str]:
def axes(self, template) -> List[str]:
"""Returns a list containing the names of variables present in files
of the given ``short_name`` type, in the same order of the axes of
of the given ``template`` type, in the same order of the axes of
:class:`Match` arrays that are returned by the :meth:`query` method.
"""
return self.__shortnamevars[short_name]
return self.__templatevars[template]
def variables(self, short_name=None) -> Dict[str, List]:
def variables(self, template=None) -> Dict[str, List]:
"""Return a dict of ``{variable : [values]}`` mappings.
This dict describes all variables and their possible values in
the tree.
If a ``short_name`` is specified, only variables which are present in
files of that ``short_name`` type are returned.
If a ``template`` is specified, only variables which are present in
files of that ``template`` type are returned.
"""
if short_name is None:
if template is None:
return {var : list(vals) for var, vals in self.__allvars.items()}
else:
varnames = self.__shortnamevars[short_name]
varnames = self.__templatevars[template]
return {var : list(self.__allvars[var]) for var in varnames}
@property
def tree(self):
"""Returns the :class:`.FileTree` associated with this
``FileTreeQuery``.
"""
return self.__tree
@property
def templates(self) -> List[str]:
"""Returns a list containing all templates of the ``FileTree`` that
are present in the directory.
"""
return list(self.__templatevars.keys())
@property
@deprecated('2.6.0', '3.0.0', 'Use templates instead')
def short_names(self) -> List[str]:
"""Returns a list containing all short names of the ``FileTree`` that
"""Returns a list containing all templates of the ``FileTree`` that
are present in the directory.
"""
return list(self.__shortnamevars.keys())
return self.templates
def query(self, short_name, asarray=False, **variables):
"""Search for files of the given ``short_name``, which match
def query(self, template, asarray=False, **variables):
"""Search for files of the given ``template``, which match
the specified ``variables``. All hits are returned for variables
that are unspecified.
:arg short_name: Short name of files to search for.
:arg template: Template of files to search for.
:arg asarray: If ``True``, the relevant :class:`Match` objects are
returned in a in a ND ``numpy.array`` where each
dimension corresponds to a variable for the
``short_name`` in question (as returned by
:meth:`axes`). Otherwise (the default), they are
returned in a list.
:arg asarray: If ``True``, the relevant :class:`Match` objects are
returned in a in a ND ``numpy.array`` where each
dimension corresponds to a variable for the
``templates`` in question (as returned by
:meth:`axes`). Otherwise (the default), they are
returned in a list.
All other arguments are assumed to be ``variable=value`` pairs,
used to restrict which matches are returned. All values are returned
......@@ -213,9 +229,9 @@ class FileTreeQuery(object):
"""
varnames = list(variables.keys())
allvarnames = self.__shortnamevars[short_name]
varidxs = self.__varidxs[ short_name]
matcharray = self.__matcharrays[short_name]
allvarnames = self.__templatevars[template]
varidxs = self.__varidxs[ template]
matcharray = self.__matcharrays[ template]
slc = []
for var in allvarnames:
......@@ -244,16 +260,18 @@ class Match(object):
"""
def __init__(self, filename, short_name, variables):
def __init__(self, filename, template, tree, variables):
"""Create a ``Match`` object. All arguments are added as attributes.
:arg filename: name of existing file
:arg short_name: template identifier
:arg template: template identifier
:arg tree: :class:`.FileTree` which contains this ``Match``
:arg variables: Dictionary of ``{variable : value}`` mappings
containing all variables present in the file name.
"""
self.__filename = filename
self.__short_name = short_name
self.__template = template
self.__tree = tree
self.__variables = dict(variables)
......@@ -263,8 +281,44 @@ class Match(object):
@property
@deprecated('2.6.0', '3.0.0', 'Use template instead')
def short_name(self):
return self.__short_name
return self.template
@property
def template(self):
return self.__template
@property
def full_name(self):
"""The ``full_name`` of a ``Match`` is a combination of the
``template`` (i.e. the matched template), and the name(s) of
the relevant ``FileTree`` objects.
It allows one to unamiguously identify the location of a ``Match``
in a ``FileTree`` hierarchy, where the same ``short_name`` may be
used in different sub-trees.
"""
def parents(tree):
if tree.parent is None:
return []
else:
return [tree.parent] + parents(tree.parent)
trees = [self.tree] + parents(self.tree)
# Drop the root tree
trees = list(reversed(trees))[1:]
return '/'.join([t.name for t in trees] + [self.template])
@property
def tree(self):
return self.__tree
@property
......@@ -275,7 +329,8 @@ class Match(object):
def __eq__(self, other):
return (isinstance(other, Match) and
self.filename == other.filename and
self.short_name == other.short_name and
self.template == other.template and
self.tree is other.tree and
self.variables == other.variables)
......@@ -289,7 +344,7 @@ class Match(object):
def __repr__(self):
"""Returns a string representation of this ``Match``. """
return 'Match({})'.format(self.filename)
return 'Match({}: {})'.format(self.full_name, self.filename)
def __str__(self):
......@@ -301,11 +356,13 @@ def scan(tree : FileTree) -> List[Match]:
"""Scans the directory of the given ``FileTree`` to find all files which
match a tree template.
:return: list of :class:`Match` objects
:arg tree: :class:`.FileTree` to scan
:returns: list of :class:`Match` objects
"""
matches = []
for template in tree.templates:
for filename in tree.get_all(template, glob_vars='all'):
if not op.isfile(filename):
......@@ -313,7 +370,7 @@ def scan(tree : FileTree) -> List[Match]:
variables = dict(tree.extract_variables(template, filename))
matches.append(Match(filename, template, variables))
matches.append(Match(filename, template, tree, variables))
for tree_name, sub_tree in tree.sub_trees.items():
matches.extend(scan(sub_tree))
......@@ -336,26 +393,25 @@ def allVariables(
variables and their possible values present in the given list
of ``Match`` objects.
- A dict of ``{ short_name : [variables] }`` mappings,
containing the variables which are relevant to each short
name.
- A dict of ``{ full_name : [variables] }`` mappings,
containing the variables which are relevant to each template.
"""
allvars = collections.defaultdict(set)
allshortnames = collections.defaultdict(set)
allvars = collections.defaultdict(set)
alltemplates = collections.defaultdict(set)
for m in matches:
for var, val in m.variables.items():
allvars[ var] .add(val)
allshortnames[m.short_name].add(var)
allvars[ var] .add(val)
alltemplates[m.full_name].add(var)
# allow us to compare None with strings
def key(v):
if v is None: return ''
else: return v
allvars = {var : list(sorted(vals, key=key))
for var, vals in allvars.items()}
allshortnames = {sn : list(sorted(vars))
for sn, vars in allshortnames.items()}
allvars = {var : list(sorted(vals, key=key))
for var, vals in allvars.items()}
alltemplates = {sn : list(sorted(vars))
for sn, vars in alltemplates.items()}
return allvars, allshortnames
return allvars, alltemplates
......@@ -52,7 +52,7 @@ def _test_data():
yield
def _expected_matches(short_name, **kwargs):
def _expected_matches(template, tree, **kwargs):
matches = []
subjs = kwargs.get('participant', _subjs)
......@@ -64,18 +64,20 @@ def _expected_matches(short_name, **kwargs):
sesdir = op.join('subj-{}'.format(subj), 'ses-{}'.format(ses))
if short_name in ('T1w', 'T2w'):
f = op.join(sesdir, '{}.nii.gz'.format(short_name))
if template in ('T1w', 'T2w'):
f = op.join(sesdir, '{}.nii.gz'.format(template))
matches.append(ftquery.Match(f,
short_name,
template,
tree,
{'participant' : subj,
'session' : ses}))
elif short_name == 'surface':
elif template == 'surface':
for hemi, surf in it.product(hemis, surfs):
f = op.join(sesdir, '{}.{}.gii'.format(hemi, surf))
matches.append(ftquery.Match(f,
short_name,
template,
tree,
{'participant' : subj,
'session' : ses,
'hemi' : hemi,
......@@ -84,19 +86,19 @@ def _expected_matches(short_name, **kwargs):
return matches
def _run_and_check_query(query, short_name, asarray=False, **vars):
def _run_and_check_query(query, template, asarray=False, **vars):
gotmatches = query.query( short_name, asarray=asarray, **vars)
expmatches = _expected_matches(short_name, **{k : [v]
for k, v
in vars.items()})
gotmatches = query.query( template, asarray=asarray, **vars)
expmatches = _expected_matches(template, query.tree, **{k : [v]
for k, v
in vars.items()})
if not asarray:
assert len(gotmatches) == len(expmatches)
for got, exp in zip(sorted(gotmatches), sorted(expmatches)):
assert got == exp
else:
snvars = query.variables(short_name)
snvars = query.variables(template)
assert len(snvars) == len(gotmatches.shape)
......@@ -108,7 +110,7 @@ def _run_and_check_query(query, short_name, asarray=False, **vars):
for expmatch in expmatches:
slc = []
for var in query.axes(short_name):
for var in query.axes(template):
if var not in vars or vars[var] == '*':
vidx = snvars[var].index(expmatch.variables[var])
slc.append(vidx)
......@@ -128,7 +130,7 @@ def test_query_properties():
'participant',
'session',
'surf']
assert sorted(query.short_names) == ['T1w', 'T2w', 'surface']
assert sorted(query.templates) == ['T1w', 'T2w', 'surface']
assert query.variables('T1w') == {'participant' : ['01', '02', '03'],
'session' : ['1', '2']}
......@@ -318,7 +320,7 @@ def test_query_subtree():
tree = filetree.FileTree.read('tree1.tree', '.')
query = filetree.FileTreeQuery(tree)
assert sorted(query.short_names) == ['T1w', 'surface']
assert sorted(query.templates) == ['T1w', 'surfdir/surface']
qvars = query.variables()
assert sorted(qvars.keys()) == ['hemi', 'participant', 'surf']
......@@ -330,7 +332,7 @@ def test_query_subtree():
assert sorted(qvars.keys()) == ['participant']
assert qvars['participant'] == ['01', '02', '03']
qvars = query.variables('surface')
qvars = query.variables('surfdir/surface')
assert sorted(qvars.keys()) == ['hemi', 'participant', 'surf']
assert qvars['hemi'] == ['L', 'R']
assert qvars['participant'] == ['01', '02', '03']
......@@ -346,7 +348,7 @@ def test_query_subtree():
assert [m.filename for m in sorted(got)] == [
op.join('subj-01', 'T1w.nii.gz')]
got = query.query('surface')
got = query.query('surfdir/surface')
assert [m.filename for m in sorted(got)] == [
op.join('subj-01', 'surf', 'L.pial.gii'),
op.join('subj-01', 'surf', 'L.white.gii'),
......@@ -361,7 +363,7 @@ def test_query_subtree():
op.join('subj-03', 'surf', 'R.pial.gii'),
op.join('subj-03', 'surf', 'R.white.gii')]
got = query.query('surface', hemi='L')
got = query.query('surfdir/surface', hemi='L')
assert [m.filename for m in sorted(got)] == [
op.join('subj-01', 'surf', 'L.pial.gii'),
op.join('subj-01', 'surf', 'L.white.gii'),
......@@ -370,7 +372,7 @@ def test_query_subtree():
op.join('subj-03', 'surf', 'L.pial.gii'),
op.join('subj-03', 'surf', 'L.white.gii')]
got = query.query('surface', surf='white')
got = query.query('surfdir/surface', surf='white')
assert [m.filename for m in sorted(got)] == [
op.join('subj-01', 'surf', 'L.white.gii'),
op.join('subj-01', 'surf', 'R.white.gii'),
......@@ -394,16 +396,17 @@ def test_scan():
t1wf = op.join(sesdir, 'T1w.nii.gz')
t2wf = op.join(sesdir, 'T2w.nii.gz')
expmatches.append(ftquery.Match(t1wf, 'T1w', {'participant' : subj,
'session' : ses}))
expmatches.append(ftquery.Match(t2wf, 'T2w', {'participant' : subj,
'session' : ses}))
expmatches.append(ftquery.Match(t1wf, 'T1w', tree, {'participant' : subj,
'session' : ses}))
expmatches.append(ftquery.Match(t2wf, 'T2w', tree, {'participant' : subj,
'session' : ses}))
for hemi, surf in it.product(_hemis, _surfs):
surff = op.join(sesdir, '{}.{}.gii'.format(hemi, surf))
expmatches.append(ftquery.Match(surff,
'surface',
tree,
{'participant' : subj,
'session' : ses,
'surf' : surf,
......@@ -414,7 +417,7 @@ def test_scan():
for got, exp in zip(sorted(gotmatches), sorted(expmatches)):
assert got.filename == exp.filename
assert got.short_name == exp.short_name
assert got.template == exp.template
assert got.variables == exp.variables
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment