Skip to content
Snippets Groups Projects
Commit 689f5fc3 authored by Paul McCarthy's avatar Paul McCarthy :mountain_bicyclist:
Browse files

ENH: Fleshed out FileTreeQuery implementation. Need to write some tests.

parent 7db08c9c
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,20 @@
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
# Author: Michiel Cottaar <michiel.cottaar@.ndcn.ox.ac.uk>
#
"""
"""This module contains the :class:`FileTreeQuery` class, which can be used to
search for files in a directory described by a `.FileTree`. A
``FileTreeQuery`` object returns :class:`Match` objects which each represent a
file that is described by the ``FileTree``, and which is present in the
directory.
The following utility functions, used by the ``FileTreeQuery`` class, are also
defined in this module:
.. autosummary::
:nosignatures:
scan
allVariables
"""
......@@ -13,107 +26,244 @@ import logging
import collections
import os.path as op
from typing import Dict, Set, List
from typing import Dict, List, Tuple
import numpy as np
log = logging.getLogger(__name__)
class FileTreeQuery(object):
"""The ``FileTreeQuery`` class uses a :class:`.FileTree` to search
a directory for files which match a specific query.
"""
def __init__(self, tree):
"""Create a ``FileTreeQuery``.
:arg tree: The ``FileTree`` object
"""
# Find all files present in the directory
# (as Match objects), and find all variables,
# plus their values, and all short names,
# that are present in the directory.
matches = scan(tree)
allvars, shortnamevars = allVariables(tree, matches)
# Now we are going to build a series of ND
# arrays to store Match objects. We create
# one array for each short name. Each axis
# in an array corresponds to a variable
# present in files of that short name type,
# and each position along an axis corresponds
# to one value of that variable.
#
# These arrays will be used to store and
# retrieve Match objects - given a short
# name and a set of variable values, we
# can quickly find the corresponding Match
# object (or objects).
# matcharrays contains {shortname : ndarray}
# mappings, and varidxs contains
# {shortname : {varvalue : index}} mappings
matcharrays = {}
varidxs = {}
for shortname in shortnamevars.keys():
snvars = shortnamevars[shortname]
snvarlens = [len(allvars[v]) for v in snvars]
# An ND array for this short
# name. Each element is a
# Match object, or nan.
matcharray = np.zeros(snvarlens, dtype=np.object)
matcharray[:] = np.nan
# indices into the match array
# for each variable value
snvaridxs = {}
for v in snvars:
snvaridxs[v] = {n : i for i, n in enumerate(allvars[v])}
matcharrays[shortname] = matcharray
varidxs[ shortname] = snvaridxs
# Populate the match arrays
for match in matches:
snvars = shortnamevars[match.short_name]
snvaridxs = varidxs[ match.short_name]
snarr = matcharrays[ match.short_name]
idx = []
for var in snvars:
# TODO handle optional variables. Need
# an extra element on each axis which
# represents a missing value
val = match.variables[var]
idx.append(snvaridxs[var][val])
snarr[tuple(idx)] = match
self.__allvars = allvars
self.__shortnamevars = shortnamevars
self.__matches = matches
self.__matcharrays = matcharrays
self.__varidxs = varidxs
def axes(self, short_name) -> List[str]:
"""Returns a list containing the names of variables present in files
of the given ``short_name`` type, in the same order of the axes of
:class:`Match` arrays that are returned by the :meth:`query` method.
"""
self.__tree = tree
self.__matches = Match.scan(tree)
self.__variables = Match.allVariables(tree, self.__matches)
return self.__shortnamevars[short_name]
def variables(self) -> Dict[str, Set]:
def variables(self, short_name=None) -> Dict[str, List]:
"""Return a dict of ``{variable : [values]}`` mappings.
This dict describes all variables and their possible values in
the tree.
If a ``short_name`` is specified, only variables which are present in
files of that ``short_name`` type are returned.
"""
return dict(self.__variables)
if short_name is None:
return dict(self.__allvars)
else:
varnames = self.__shortnamevars[short_name]
return {var : self.__allvars[var] for var in varnames}
def query(self, **variables) -> List[str]:
"""Return all ``Match`` objects which match the given set of
``variable=value`` arguments.
@property
def short_names(self) -> List[str]:
"""Returns a list containing all short names of the ``FileTree`` that
are present in the directory.
"""
hits = []
return list(self.__shortnamevars.keys())
for m in self.__matches:
if all([m.variables.get(n, None) == v
for n, v in variables.items()]):
hits.append(m)
return hits
def query(self, short_name, **variables):
"""Search for files of the given ``short_name``, which match
the specified ``variables``.
:arg short_name: Short name of files to search for.
class Match(object):
"""
Filename matching a template in the file tree
All other arguments are
"""
varnames = list(variables.keys())
allvarnames = self.__shortnamevars[short_name]
varidxs = self.__varidxs[ short_name]
matcharray = self.__matcharrays[short_name]
slc = []
for var in allvarnames:
if var in varnames: val = variables[var]
else: val = '*'
# We're using np.newaxis to retain
# the full dimensionality of the
# array, so that the axis labels
# returned by the axes() method
# are valid.
if val == '*': slc.append(slice(None))
else: slc.extend([np.newaxis, varidxs[var][val]])
return matcharray[tuple(slc)]
def scan(tree):
"""Scans the directory of the given ``FileTree`` to find all files which
match a tree template.
:return: list of :class:`Match` objects
"""
@staticmethod
def allVariables(tree, matches) -> Dict[str, Set]:
"""Returns a dict of ``{ variable : [values] }`` mappings
containing all variables and their possible values present
in the given list of ``Match`` objects.
"""
allvars = collections.defaultdict(set)
matches = []
for template in tree.templates:
for filename in tree.get_all(template, glob_vars='all'):
for m in matches:
for var, val in m.variables.items():
allvars[var].add(val)
return allvars
if not op.isfile(filename):
continue
variables = tree.extract_variables(template, filename)
variables = {var : val
for var, val in variables.items()
if val is not None}
@staticmethod
def scan(tree):
"""
Scans the disk to find any matches
matches.append(Match(filename, template, variables))
:return: list of :class:`Match` objects
"""
for tree_name, sub_tree in tree.sub_trees:
matches.extend(Match.scan(sub_tree))
return matches
matches = []
for template in tree.templates:
for filename in tree.get_all(template, glob_vars='all'):
if not op.isfile(filename):
continue
def allVariables(tree, matches) -> Tuple[Dict[str, List], Dict[str, List]]:
"""Identifies the ``FileTree`` variables which are actually represented
in files in the directory.
variables = tree.extract_variables(template, filename)
variables = {var : val
for var, val in variables.items()
if val is not None}
:arg filetree: The ``FileTree``object
:arg matches: list of ``Match`` objects (e.g. as returned by :func:`scan`)
matches.append(Match(filename, template, variables))
:returns: a tuple containing two dicts:
for tree_name, sub_tree in tree.sub_trees:
matches.extend(Match.scan(sub_tree))
- A dict of ``{ variable : [values] }`` mappings containing all
variables and their possible values present in the given list
of ``Match`` objects.
return matches
- A dict of ``{ short_name : [variables] }`` mappings,
containing the variables which are relevant to each short
name.
"""
allvars = collections.defaultdict(set)
allshortnames = collections.defaultdict(set)
for m in matches:
for var, val in m.variables.items():
allvars[ var] .add(val)
allshortnames[m.short_name].add(var)
allvars = {var : list(sorted(vals))
for var, vals in allvars.items()}
allshortnames = {sn : list(sorted(vars))
for sn, vars in allshortnames.items()}
return allvars, allshortnames
class Match(object):
"""A ``Match`` object represents a file with a name matching a template in
a ``FileTree``.
"""
def __init__(self, filename, short_name, variables):
"""
Defines a new match
"""Create a ``Match`` object. All arguments are added as attributes.
:param filename: name of existing file
:param short_name: template identifier
:param variables: variable values
:arg filename: name of existing file
:arg short_name: template identifier
:arg variables: Dictionary of ``{variable : value}`` mappings
containing all variables present in the file name.
"""
self.filename = filename
self.filename = filename
self.short_name = short_name
self.variables = dict(variables)
self.variables = dict(variables)
def __repr__(self):
return self.filename
"""Returns a string representation of this ``Match``. """
return 'Match({})'.format(self.filename)
def __str__(self):
"""Returns a string representation of this ``Match``. """
return repr(self)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment