diff --git a/doc/fsl.utils.rst b/doc/fsl.utils.rst index d7bf157ed3154a6c3abe9fefe100da7548ea4087..76631a31b4156a09fee7ca689bf964d8da6f9d02 100644 --- a/doc/fsl.utils.rst +++ b/doc/fsl.utils.rst @@ -8,6 +8,7 @@ fsl.utils.cache fsl.utils.deprecated fsl.utils.ensure + fsl.utils.filetree fsl.utils.fslsub fsl.utils.idle fsl.utils.imcp diff --git a/fsl/utils/filetree/__init__.py b/fsl/utils/filetree/__init__.py index b5ffec3ef7d33eb05b20f6e0b2a4ae2b2b26f538..ecba03c5e61ffed6a9d8a66c27b5b22741f1c149 100644 --- a/fsl/utils/filetree/__init__.py +++ b/fsl/utils/filetree/__init__.py @@ -1,4 +1,171 @@ -"""mc_filetree - Easy format to define intput/output files in a python pipeline""" +""" +Easy format to define input/output files in a python pipeline. + +The goal is to separate the definition of the input/output filenames from the actual code +by defining a directory tree (i.e., FileTree) in a separate file from the code. + +Loading FileTrees +----------------- +.. code-block:: python + + from fsl.utils.filetree import FileTree, tree_directories + tree = FileTree.read('bids_raw') + +This creates a `tree` object that describes input/output filenames +for your pipeline based on `this file <trees/bids_raw.tree>` + +:py:func:`filetree.FileTree.read` will search through the `filetree.tree_directories` list of directories +for any FileTrees matching the given name. This list by default includes the current directory. Of course, +a full path to the requested FileTree can also be provided. + +FileTree format +--------------- +The FileTrees are defined in a simple to type format, where indendation is used to indicate subdirectories, for example: + +:: + + parent + file1.txt + child + file2 + file3.txt + file4.txt + +In the top-level directory this represents one file ("file4.txt") and one directory ("parent"). The directory +contains two files ("file1.txt" and "file3.txt") and one directory ("child") which contains a single file ("file2"). + +Individual aspects of this format are defined in more detail below. + +Short names +^^^^^^^^^^^ +Each directory and file in the FileTree is assigned a short name for convenient access. +For example, for the FileTree + +:: + + parent + file1.txt + child + file2 + file3.txt + file4.txt + +We can load this FileTree using + +.. code-block:: python + + >>> tree = FileTree.read(<tree filename>) + >>> tree.get('file2') + 'parent/child/file2' + >>> tree.get('child') + 'parent/child' + +These filenames will be returned whether the underlying file exists or not (see :py:func:`filetree.FileTree.get`). + +By default the short name will be the name of the file or directory without extension (i.e., everything the first dot). +The short name can be explicitly set by including it in round brackets behind the filename, +so ``left_hippocampus_segment_from_first.nii.gz (Lhipp)`` will have the short name "Lhipp" +rather than "left_hippocampus_segment_from_first"). This allows changing of the filenames +without having to alter the short names used to refer to those filenames in the code. + +Variables +^^^^^^^^^ +FileTrees can have placeholders for variables such as subject id: + +:: + + {subject} + T1w.nii.gz + {hemi}_pial.surf.gii (pial) + +Any part of the directory or file names contained within curly brackets will have to be filled when getting the path: + +.. code-block:: python + + >>> tree = FileTree.read(<tree filename>, subject='A') + >>> tree.get('T1w') + 'A/T1w.nii.gz + >>> B_tree = tree.update(subject='B') + >>> B_tree.get('T1w') + 'B/T1w.nii.gz + >>> tree.get('pial') # note that pial was explicitly set as the short name in the file above + # Raises a MissingVariable error as the hemi variable is not defined + +Variables can be either set during initialisation of the FileTree or by :py:func:`filetree.FileTree.update`, which +returns a new `FileTree` rather than updating the existing one. + +Finally initial values for the variables can be set in the FileTree itself, for example in + +:: + + hemi = left + + {subject} + T1w.nii.gz + {hemi}_pial.surf.gii (pial) + +the variable "hemi" will be "left" unless explicitly set during initialisation or updating of the `FileTree`. + +Optional Variables +^^^^^^^^^^^^^^^^^^ +Normally having undefined variables will lead to :py:exc:`filetree.MissingVariable` being raised. +This can be avoided by putting these variables in square brackets, indicating that they can simply +be skipped. For example for the FileTree: + +:: + + {subject} + [{session}] + T1w[_{session}].nii.gz (T1w) + +.. code-block:: python + + >>> tree = FileTree.read(<tree filename>, subject='A') + >>> tree.get('T1w') + 'A/T1w.nii.gz' + >>> tree.update(session='test').get('T1w') + 'A/test/T1w_test.nii.gz' + +Note that if any variable within the square brackets is missing, any text within those square brackets is omitted. + +Extensive use of optional variables can be found in the FileTree of the BIDS raw data formatting. + +Sub-trees +^^^^^^^^^ +FileTrees can include other FileTrees within their directory structure. For example, + +:: + + {subject} + topup + b0.nii.gz + ->topup basename=out (topup) + eddy + ->eddy (eddy) + nodif_brain_mask.nii.gz + Diffusion + ->Diffusion (diff) + ->dti (dti) + +which might represent a diffusion MRI pipeline, which contains references to the predefined trees for the +"topup", "eddy", "Diffusion", and "dti" FileTrees describing the input/output of various FSL tools. + +The general format of this is: +``-><tree name> [<variable in sub-tree>=<value>, ...] (<sub-tree short name)`` + +The filenames defined in the sub-trees can be accessed using a "/" in the short name: + +.. code-block:: python + + >>> tree = FileTree.read(<tree filename>, subject='A') + >>> tree.get('dti/FA') + 'A/Diffusion/dti_FA.nii.gz' + >>> tree.get('topup/fieldcoef') + 'A/topup/out_fielcoef.nii.gz + +Extensive use of sub-trees can be found in the FileTree of the HCP pre-processed directory structure, +which amongst others refers to the HCP surface directory format FileTree. +""" __author__ = 'Michiel Cottaar <Michiel.Cottaar@ndcn.ox.ac.uk>' diff --git a/fsl/utils/filetree/filetree.py b/fsl/utils/filetree/filetree.py index 15bd8bb8841d49900362be6f2cd53860f05f782b..131ebe55eeec1be78ee4f69cb7f42a1dc12dde4f 100644 --- a/fsl/utils/filetree/filetree.py +++ b/fsl/utils/filetree/filetree.py @@ -18,7 +18,8 @@ class FileTree(object): """ Contains the input/output filename tree - Properties + Properties: + - ``templates``: dictionary mapping short names to filename templates - ``variables``: dictionary mapping variables in the templates to specific values (variables set to None are explicitly unset) - ``sub_trees``: filename trees describing specific sub-directories @@ -41,6 +42,9 @@ class FileTree(object): @property def parent(self, ): + """ + Parent FileTree, of which this sub-tree is a sub-directory + """ return self._parent @property @@ -93,8 +97,9 @@ class FileTree(object): - '../' characters in short_name refer to parents For example: - - eddy/output refers to the output in the eddy sub_tree (i.e. self.sub_trees['eddy'].templates['output'] - - ../other/name refers to the 'other' sub-tree of the parent tree (i.e., self.parent.sub_trees['other'].templates['name'] + + - "eddy/output" refers to the "output" in the "eddy" sub_tree (i.e. ``self.sub_trees['eddy'].templates['output']``) + - "../other/name" refers to the "other" sub-tree of the parent tree (i.e., ``self.parent.sub_trees['other'].templates['name']``) :param short_name: name of the template :return: tuple with the template and the variables corresponding to the template @@ -160,9 +165,9 @@ class FileTree(object): Gets all existing directory/file names matching a specific pattern :param short_name: short name of the path template - :param glob_vars: sequence of undefined variables that can take any possible values when looking for matches on the disk - Any defined variables in `glob_vars` will be ignored. - If glob_vars is set to 'all', all undefined variables will be used to look up matches + :param glob_vars: sequence of undefined variables that can take any possible values when looking for matches on the disk. + Any defined variables in `glob_vars` will be ignored. + If glob_vars is set to 'all', all undefined variables will be used to look up matches. :return: sorted sequence of paths """ text, variables = self.get_template(short_name) @@ -173,9 +178,9 @@ class FileTree(object): Gets all the parameters that generate existing filenames :param short_name: short name of the path template - :param glob_vars: sequence of undefined variables that can take any possible values when looking for matches on the disk - Any defined variables in `glob_vars` will be ignored. - If glob_vars is set to 'all', all undefined variables will be used to look up matches + :param glob_vars: sequence of undefined variables that can take any possible values when looking for matches on the disk. + Any defined variables in `glob_vars` will be ignored. + If glob_vars is set to 'all', all undefined variables will be used to look up matches. :return: sequence of dictionaries with the variables settings used to generate each filename """ return tuple(self.extract_variables(short_name, fn) for fn in self.get_all(short_name, glob_vars=glob_vars)) @@ -184,8 +189,9 @@ class FileTree(object): """ Creates a new filetree with updated variables - :arg variables: new values for the variables - Setting variables to None will explicitly unset them + :param variables: new values for the variables + Setting variables to None will explicitly unset them + :return: New FileTree with same templates for directory names and filenames, but updated variables """ new_tree = deepcopy(self) new_tree.variables.update(variables) @@ -198,7 +204,7 @@ class FileTree(object): :param short_name: short name of the path template :param filename: filename matching the template :return: variables needed to get to the given filename - Variables with None value are optional variables in the template that were not used + Variables with None value are optional variables in the template that were not used """ text, _ = self.get_template(short_name) return utils.extract_variables(text, filename, self.variables) @@ -234,11 +240,11 @@ class FileTree(object): :param on_disk: if True checks whether the files exist on disk :param error: if True raises a helpful error when the check fails :param glob_vars: sequence of undefined variables that can take any possible values when looking for matches on the disk - If `glob_vars` contains any defined variables, it will be ignored. + If `glob_vars` contains any defined variables, it will be ignored. :return: True if short names exist and optionally exist on disk (False otherwise) :raise: - - ValueError if error is set and the tree is incomplete - - IOError if error is set and any files are missing from the disk + - ValueError if error is set and the tree is incomplete + - IOError if error is set and any files are missing from the disk """ if isinstance(short_names, str): short_names = (short_names, ) @@ -273,11 +279,11 @@ class FileTree(object): """ Reads a FileTree from a specific file - The return type is ``cls`` unless the tree_name has been previously registered - The return type of any sub-tree is FileTree unless the tree_name has been previously registered + The return type is ``cls`` unless the tree_name has been previously registered. + The return type of any sub-tree is ``FileTree`` unless the tree_name has been previously registered. :param tree_name: file containing the filename tree. - Can provide the filename of a tree file or the name for a tree in the ``filetree.tree_directories``. + Can provide the filename of a tree file or the name for a tree in the ``filetree.tree_directories``. :param directory: parent directory of the full tree (defaults to current directory) :param variables: variable settings :return: dictionary from specifier to filename diff --git a/fsl/utils/filetree/parse.py b/fsl/utils/filetree/parse.py index 2e5195361b2fe1b51bb0dcb60219949adaff9a50..c89b381fd38142158f78319c7b070a331bb875a5 100644 --- a/fsl/utils/filetree/parse.py +++ b/fsl/utils/filetree/parse.py @@ -30,11 +30,12 @@ def read_line(line: str) -> Tuple[int, PurePath, str]: """ Parses line from the tree file - :param line: input line from a *.tree file + :param line: input line from a \*.tree file :return: Tuple with: - - number of spaces in front of the name - - name of the file or the sub_tree - - short name of the file + + - number of spaces in front of the name + - name of the file or the sub_tree + - short name of the file """ if line.strip()[:1] == '->': return read_subtree_line(line) @@ -54,12 +55,13 @@ def read_subtree_line(line: str, directory: str) -> Tuple[int, "filetree.FileTre """ Parses the line defining a sub_tree - :param line: input line from a *.tree file + :param line: input line from a \*.tree file :param directory: containing directory :return: Tuple with - - number of spaces in front of the name - - sub_tree - - short name of the sub_tree + + - number of spaces in front of the name + - sub_tree + - short name of the sub_tree """ match = re.match(r'^(\s*)->\s*(\S*)(.*)\((\S*)\)', line) if match is None: diff --git a/fsl/utils/filetree/utils.py b/fsl/utils/filetree/utils.py index f78046fff8f6878309225b55ae22d6c8dab519ff..4ad5f63aff9164531b1a491056f9f9574f963b70 100644 --- a/fsl/utils/filetree/utils.py +++ b/fsl/utils/filetree/utils.py @@ -27,7 +27,7 @@ def get_all(template, variables, glob_vars=()): :param template: template :param variables: (incomplete) mapping of variable names to values :param glob_vars: sequence of undefined variables that can take any possible values when looking for matches on the disk - If `glob_vars` contains any defined variables, it will be ignored. + If `glob_vars` contains any defined variables, it will be ignored. :return: sequence of filenames """ filled = fill_known(template, variables)