Skip to content
Snippets Groups Projects
Commit 689f5fc3 authored by Paul McCarthy's avatar Paul McCarthy :mountain_bicyclist:
Browse files

ENH: Fleshed out FileTreeQuery implementation. Need to write some tests.

parent 7db08c9c
No related branches found
No related tags found
No related merge requests found
...@@ -5,7 +5,20 @@ ...@@ -5,7 +5,20 @@
# Author: Paul McCarthy <pauldmccarthy@gmail.com> # Author: Paul McCarthy <pauldmccarthy@gmail.com>
# Author: Michiel Cottaar <michiel.cottaar@.ndcn.ox.ac.uk> # Author: Michiel Cottaar <michiel.cottaar@.ndcn.ox.ac.uk>
# #
""" """This module contains the :class:`FileTreeQuery` class, which can be used to
search for files in a directory described by a `.FileTree`. A
``FileTreeQuery`` object returns :class:`Match` objects which each represent a
file that is described by the ``FileTree``, and which is present in the
directory.
The following utility functions, used by the ``FileTreeQuery`` class, are also
defined in this module:
.. autosummary::
:nosignatures:
scan
allVariables
""" """
...@@ -13,107 +26,244 @@ import logging ...@@ -13,107 +26,244 @@ import logging
import collections import collections
import os.path as op import os.path as op
from typing import Dict, Set, List from typing import Dict, List, Tuple
import numpy as np
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class FileTreeQuery(object): class FileTreeQuery(object):
"""The ``FileTreeQuery`` class uses a :class:`.FileTree` to search
a directory for files which match a specific query.
"""
def __init__(self, tree): def __init__(self, tree):
"""Create a ``FileTreeQuery``.
:arg tree: The ``FileTree`` object
""" """
# Find all files present in the directory
# (as Match objects), and find all variables,
# plus their values, and all short names,
# that are present in the directory.
matches = scan(tree)
allvars, shortnamevars = allVariables(tree, matches)
# Now we are going to build a series of ND
# arrays to store Match objects. We create
# one array for each short name. Each axis
# in an array corresponds to a variable
# present in files of that short name type,
# and each position along an axis corresponds
# to one value of that variable.
#
# These arrays will be used to store and
# retrieve Match objects - given a short
# name and a set of variable values, we
# can quickly find the corresponding Match
# object (or objects).
# matcharrays contains {shortname : ndarray}
# mappings, and varidxs contains
# {shortname : {varvalue : index}} mappings
matcharrays = {}
varidxs = {}
for shortname in shortnamevars.keys():
snvars = shortnamevars[shortname]
snvarlens = [len(allvars[v]) for v in snvars]
# An ND array for this short
# name. Each element is a
# Match object, or nan.
matcharray = np.zeros(snvarlens, dtype=np.object)
matcharray[:] = np.nan
# indices into the match array
# for each variable value
snvaridxs = {}
for v in snvars:
snvaridxs[v] = {n : i for i, n in enumerate(allvars[v])}
matcharrays[shortname] = matcharray
varidxs[ shortname] = snvaridxs
# Populate the match arrays
for match in matches:
snvars = shortnamevars[match.short_name]
snvaridxs = varidxs[ match.short_name]
snarr = matcharrays[ match.short_name]
idx = []
for var in snvars:
# TODO handle optional variables. Need
# an extra element on each axis which
# represents a missing value
val = match.variables[var]
idx.append(snvaridxs[var][val])
snarr[tuple(idx)] = match
self.__allvars = allvars
self.__shortnamevars = shortnamevars
self.__matches = matches
self.__matcharrays = matcharrays
self.__varidxs = varidxs
def axes(self, short_name) -> List[str]:
"""Returns a list containing the names of variables present in files
of the given ``short_name`` type, in the same order of the axes of
:class:`Match` arrays that are returned by the :meth:`query` method.
""" """
self.__tree = tree return self.__shortnamevars[short_name]
self.__matches = Match.scan(tree)
self.__variables = Match.allVariables(tree, self.__matches)
def variables(self) -> Dict[str, Set]: def variables(self, short_name=None) -> Dict[str, List]:
"""Return a dict of ``{variable : [values]}`` mappings. """Return a dict of ``{variable : [values]}`` mappings.
This dict describes all variables and their possible values in This dict describes all variables and their possible values in
the tree. the tree.
If a ``short_name`` is specified, only variables which are present in
files of that ``short_name`` type are returned.
""" """
return dict(self.__variables) if short_name is None:
return dict(self.__allvars)
else:
varnames = self.__shortnamevars[short_name]
return {var : self.__allvars[var] for var in varnames}
def query(self, **variables) -> List[str]: @property
"""Return all ``Match`` objects which match the given set of def short_names(self) -> List[str]:
``variable=value`` arguments. """Returns a list containing all short names of the ``FileTree`` that
are present in the directory.
""" """
hits = [] return list(self.__shortnamevars.keys())
for m in self.__matches:
if all([m.variables.get(n, None) == v
for n, v in variables.items()]):
hits.append(m)
return hits def query(self, short_name, **variables):
"""Search for files of the given ``short_name``, which match
the specified ``variables``.
:arg short_name: Short name of files to search for.
class Match(object): All other arguments are
""" """
Filename matching a template in the file tree
varnames = list(variables.keys())
allvarnames = self.__shortnamevars[short_name]
varidxs = self.__varidxs[ short_name]
matcharray = self.__matcharrays[short_name]
slc = []
for var in allvarnames:
if var in varnames: val = variables[var]
else: val = '*'
# We're using np.newaxis to retain
# the full dimensionality of the
# array, so that the axis labels
# returned by the axes() method
# are valid.
if val == '*': slc.append(slice(None))
else: slc.extend([np.newaxis, varidxs[var][val]])
return matcharray[tuple(slc)]
def scan(tree):
"""Scans the directory of the given ``FileTree`` to find all files which
match a tree template.
:return: list of :class:`Match` objects
""" """
@staticmethod matches = []
def allVariables(tree, matches) -> Dict[str, Set]: for template in tree.templates:
"""Returns a dict of ``{ variable : [values] }`` mappings for filename in tree.get_all(template, glob_vars='all'):
containing all variables and their possible values present
in the given list of ``Match`` objects.
"""
allvars = collections.defaultdict(set)
for m in matches: if not op.isfile(filename):
for var, val in m.variables.items(): continue
allvars[var].add(val)
return allvars
variables = tree.extract_variables(template, filename)
variables = {var : val
for var, val in variables.items()
if val is not None}
@staticmethod matches.append(Match(filename, template, variables))
def scan(tree):
"""
Scans the disk to find any matches
:return: list of :class:`Match` objects for tree_name, sub_tree in tree.sub_trees:
""" matches.extend(Match.scan(sub_tree))
return matches
matches = []
for template in tree.templates:
for filename in tree.get_all(template, glob_vars='all'):
if not op.isfile(filename): def allVariables(tree, matches) -> Tuple[Dict[str, List], Dict[str, List]]:
continue """Identifies the ``FileTree`` variables which are actually represented
in files in the directory.
variables = tree.extract_variables(template, filename) :arg filetree: The ``FileTree``object
variables = {var : val :arg matches: list of ``Match`` objects (e.g. as returned by :func:`scan`)
for var, val in variables.items()
if val is not None}
matches.append(Match(filename, template, variables)) :returns: a tuple containing two dicts:
for tree_name, sub_tree in tree.sub_trees: - A dict of ``{ variable : [values] }`` mappings containing all
matches.extend(Match.scan(sub_tree)) variables and their possible values present in the given list
of ``Match`` objects.
return matches - A dict of ``{ short_name : [variables] }`` mappings,
containing the variables which are relevant to each short
name.
"""
allvars = collections.defaultdict(set)
allshortnames = collections.defaultdict(set)
for m in matches:
for var, val in m.variables.items():
allvars[ var] .add(val)
allshortnames[m.short_name].add(var)
allvars = {var : list(sorted(vals))
for var, vals in allvars.items()}
allshortnames = {sn : list(sorted(vars))
for sn, vars in allshortnames.items()}
return allvars, allshortnames
class Match(object):
"""A ``Match`` object represents a file with a name matching a template in
a ``FileTree``.
"""
def __init__(self, filename, short_name, variables): def __init__(self, filename, short_name, variables):
""" """Create a ``Match`` object. All arguments are added as attributes.
Defines a new match
:param filename: name of existing file :arg filename: name of existing file
:param short_name: template identifier :arg short_name: template identifier
:param variables: variable values :arg variables: Dictionary of ``{variable : value}`` mappings
containing all variables present in the file name.
""" """
self.filename = filename self.filename = filename
self.short_name = short_name self.short_name = short_name
self.variables = dict(variables) self.variables = dict(variables)
def __repr__(self): def __repr__(self):
return self.filename """Returns a string representation of this ``Match``. """
return 'Match({})'.format(self.filename)
def __str__(self): def __str__(self):
"""Returns a string representation of this ``Match``. """
return repr(self) return repr(self)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment