Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
fslpy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Container Registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Michiel Cottaar
fslpy
Commits
689f5fc3
Commit
689f5fc3
authored
6 years ago
by
Paul McCarthy
Browse files
Options
Downloads
Patches
Plain Diff
ENH: Fleshed out FileTreeQuery implementation. Need to write some tests.
parent
7db08c9c
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
fsl/utils/filetree/query.py
+207
-57
207 additions, 57 deletions
fsl/utils/filetree/query.py
with
207 additions
and
57 deletions
fsl/utils/filetree/query.py
+
207
−
57
View file @
689f5fc3
...
...
@@ -5,7 +5,20 @@
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
# Author: Michiel Cottaar <michiel.cottaar@.ndcn.ox.ac.uk>
#
"""
"""
This module contains the :class:`FileTreeQuery` class, which can be used to
search for files in a directory described by a `.FileTree`. A
``FileTreeQuery`` object returns :class:`Match` objects which each represent a
file that is described by the ``FileTree``, and which is present in the
directory.
The following utility functions, used by the ``FileTreeQuery`` class, are also
defined in this module:
.. autosummary::
:nosignatures:
scan
allVariables
"""
...
...
@@ -13,107 +26,244 @@ import logging
import
collections
import
os.path
as
op
from
typing
import
Dict
,
Set
,
List
from
typing
import
Dict
,
List
,
Tuple
import
numpy
as
np
log
=
logging
.
getLogger
(
__name__
)
class
FileTreeQuery
(
object
):
"""
The ``FileTreeQuery`` class uses a :class:`.FileTree` to search
a directory for files which match a specific query.
"""
def
__init__
(
self
,
tree
):
"""
Create a ``FileTreeQuery``.
:arg tree: The ``FileTree`` object
"""
# Find all files present in the directory
# (as Match objects), and find all variables,
# plus their values, and all short names,
# that are present in the directory.
matches
=
scan
(
tree
)
allvars
,
shortnamevars
=
allVariables
(
tree
,
matches
)
# Now we are going to build a series of ND
# arrays to store Match objects. We create
# one array for each short name. Each axis
# in an array corresponds to a variable
# present in files of that short name type,
# and each position along an axis corresponds
# to one value of that variable.
#
# These arrays will be used to store and
# retrieve Match objects - given a short
# name and a set of variable values, we
# can quickly find the corresponding Match
# object (or objects).
# matcharrays contains {shortname : ndarray}
# mappings, and varidxs contains
# {shortname : {varvalue : index}} mappings
matcharrays
=
{}
varidxs
=
{}
for
shortname
in
shortnamevars
.
keys
():
snvars
=
shortnamevars
[
shortname
]
snvarlens
=
[
len
(
allvars
[
v
])
for
v
in
snvars
]
# An ND array for this short
# name. Each element is a
# Match object, or nan.
matcharray
=
np
.
zeros
(
snvarlens
,
dtype
=
np
.
object
)
matcharray
[:]
=
np
.
nan
# indices into the match array
# for each variable value
snvaridxs
=
{}
for
v
in
snvars
:
snvaridxs
[
v
]
=
{
n
:
i
for
i
,
n
in
enumerate
(
allvars
[
v
])}
matcharrays
[
shortname
]
=
matcharray
varidxs
[
shortname
]
=
snvaridxs
# Populate the match arrays
for
match
in
matches
:
snvars
=
shortnamevars
[
match
.
short_name
]
snvaridxs
=
varidxs
[
match
.
short_name
]
snarr
=
matcharrays
[
match
.
short_name
]
idx
=
[]
for
var
in
snvars
:
# TODO handle optional variables. Need
# an extra element on each axis which
# represents a missing value
val
=
match
.
variables
[
var
]
idx
.
append
(
snvaridxs
[
var
][
val
])
snarr
[
tuple
(
idx
)]
=
match
self
.
__allvars
=
allvars
self
.
__shortnamevars
=
shortnamevars
self
.
__matches
=
matches
self
.
__matcharrays
=
matcharrays
self
.
__varidxs
=
varidxs
def
axes
(
self
,
short_name
)
->
List
[
str
]:
"""
Returns a list containing the names of variables present in files
of the given ``short_name`` type, in the same order of the axes of
:class:`Match` arrays that are returned by the :meth:`query` method.
"""
self
.
__tree
=
tree
self
.
__matches
=
Match
.
scan
(
tree
)
self
.
__variables
=
Match
.
allVariables
(
tree
,
self
.
__matches
)
return
self
.
__shortnamevars
[
short_name
]
def
variables
(
self
)
->
Dict
[
str
,
Se
t
]:
def
variables
(
self
,
short_name
=
None
)
->
Dict
[
str
,
Lis
t
]:
"""
Return a dict of ``{variable : [values]}`` mappings.
This dict describes all variables and their possible values in
the tree.
If a ``short_name`` is specified, only variables which are present in
files of that ``short_name`` type are returned.
"""
return
dict
(
self
.
__variables
)
if
short_name
is
None
:
return
dict
(
self
.
__allvars
)
else
:
varnames
=
self
.
__shortnamevars
[
short_name
]
return
{
var
:
self
.
__allvars
[
var
]
for
var
in
varnames
}
def
query
(
self
,
**
variables
)
->
List
[
str
]:
"""
Return all ``Match`` objects which match the given set of
``variable=value`` arguments.
@property
def
short_names
(
self
)
->
List
[
str
]:
"""
Returns a list containing all short names of the ``FileTree`` that
are present in the directory.
"""
hits
=
[]
return
list
(
self
.
__shortnamevars
.
keys
())
for
m
in
self
.
__matches
:
if
all
([
m
.
variables
.
get
(
n
,
None
)
==
v
for
n
,
v
in
variables
.
items
()]):
hits
.
append
(
m
)
return
hits
def
query
(
self
,
short_name
,
**
variables
):
"""
Search for files of the given ``short_name``, which match
the specified ``variables``.
:arg short_name: Short name of files to search for.
class
Match
(
object
):
"""
Filename matching a template in the file tree
All other arguments are
"""
varnames
=
list
(
variables
.
keys
())
allvarnames
=
self
.
__shortnamevars
[
short_name
]
varidxs
=
self
.
__varidxs
[
short_name
]
matcharray
=
self
.
__matcharrays
[
short_name
]
slc
=
[]
for
var
in
allvarnames
:
if
var
in
varnames
:
val
=
variables
[
var
]
else
:
val
=
'
*
'
# We're using np.newaxis to retain
# the full dimensionality of the
# array, so that the axis labels
# returned by the axes() method
# are valid.
if
val
==
'
*
'
:
slc
.
append
(
slice
(
None
))
else
:
slc
.
extend
([
np
.
newaxis
,
varidxs
[
var
][
val
]])
return
matcharray
[
tuple
(
slc
)]
def
scan
(
tree
):
"""
Scans the directory of the given ``FileTree`` to find all files which
match a tree template.
:return: list of :class:`Match` objects
"""
@staticmethod
def
allVariables
(
tree
,
matches
)
->
Dict
[
str
,
Set
]:
"""
Returns a dict of ``{ variable : [values] }`` mappings
containing all variables and their possible values present
in the given list of ``Match`` objects.
"""
allvars
=
collections
.
defaultdict
(
set
)
matches
=
[]
for
template
in
tree
.
templates
:
for
filename
in
tree
.
get_all
(
template
,
glob_vars
=
'
all
'
):
for
m
in
matches
:
for
var
,
val
in
m
.
variables
.
items
():
allvars
[
var
].
add
(
val
)
return
allvars
if
not
op
.
isfile
(
filename
):
continue
variables
=
tree
.
extract_variables
(
template
,
filename
)
variables
=
{
var
:
val
for
var
,
val
in
variables
.
items
()
if
val
is
not
None
}
@staticmethod
def
scan
(
tree
):
"""
Scans the disk to find any matches
matches
.
append
(
Match
(
filename
,
template
,
variables
))
:return: list of :class:`Match` objects
"""
for
tree_name
,
sub_tree
in
tree
.
sub_trees
:
matches
.
extend
(
Match
.
scan
(
sub_tree
))
return
matches
matches
=
[]
for
template
in
tree
.
templates
:
for
filename
in
tree
.
get_all
(
template
,
glob_vars
=
'
all
'
):
if
not
op
.
isfile
(
filename
):
continue
def
allVariables
(
tree
,
matches
)
->
Tuple
[
Dict
[
str
,
List
],
Dict
[
str
,
List
]]:
"""
Identifies the ``FileTree`` variables which are actually represented
in files in the directory.
variables
=
tree
.
extract_variables
(
template
,
filename
)
variables
=
{
var
:
val
for
var
,
val
in
variables
.
items
()
if
val
is
not
None
}
:arg filetree: The ``FileTree``object
:arg matches: list of ``Match`` objects (e.g. as returned by :func:`scan`)
matches
.
append
(
Match
(
filename
,
template
,
variables
))
:returns: a tuple containing two dicts:
for
tree_name
,
sub_tree
in
tree
.
sub_trees
:
matches
.
extend
(
Match
.
scan
(
sub_tree
))
- A dict of ``{ variable : [values] }`` mappings containing all
variables and their possible values present in the given list
of ``Match`` objects.
return
matches
- A dict of ``{ short_name : [variables] }`` mappings,
containing the variables which are relevant to each short
name.
"""
allvars
=
collections
.
defaultdict
(
set
)
allshortnames
=
collections
.
defaultdict
(
set
)
for
m
in
matches
:
for
var
,
val
in
m
.
variables
.
items
():
allvars
[
var
]
.
add
(
val
)
allshortnames
[
m
.
short_name
].
add
(
var
)
allvars
=
{
var
:
list
(
sorted
(
vals
))
for
var
,
vals
in
allvars
.
items
()}
allshortnames
=
{
sn
:
list
(
sorted
(
vars
))
for
sn
,
vars
in
allshortnames
.
items
()}
return
allvars
,
allshortnames
class
Match
(
object
):
"""
A ``Match`` object represents a file with a name matching a template in
a ``FileTree``.
"""
def
__init__
(
self
,
filename
,
short_name
,
variables
):
"""
Defines a new match
"""
Create a ``Match`` object. All arguments are added as attributes.
:param filename: name of existing file
:param short_name: template identifier
:param variables: variable values
:arg filename: name of existing file
:arg short_name: template identifier
:arg variables: Dictionary of ``{variable : value}`` mappings
containing all variables present in the file name.
"""
self
.
filename
=
filename
self
.
filename
=
filename
self
.
short_name
=
short_name
self
.
variables
=
dict
(
variables
)
self
.
variables
=
dict
(
variables
)
def
__repr__
(
self
):
return
self
.
filename
"""
Returns a string representation of this ``Match``.
"""
return
'
Match({})
'
.
format
(
self
.
filename
)
def
__str__
(
self
):
"""
Returns a string representation of this ``Match``.
"""
return
repr
(
self
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment