path.py 10.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env python
#
# path.py - Utility functions for working with file/directory paths.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module contains a few utility functions for working with file system
paths.


.. autosummary::
   :nosignatures:

   deepest
15
16
17
   shallowest
   addExt
   removeExt
18
   getExt
19
20
   splitExt
   getFileGroup
21
22
23
24
25
26
"""


import os.path as op


27
class PathError(Exception):
28
29
30
    """``Exception`` class raised by the functions defined in this module
    when something goes wrong.
    """
31
32
33
    pass


34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def deepest(path, suffixes):
    """Finds the deepest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
    """

    path = path.strip()

    if path == op.sep or path == '':
        return None

    path = path.rstrip(op.sep)

    if any([path.endswith(s) for s in suffixes]):
        return path

    return deepest(op.dirname(path), suffixes)


def shallowest(path, suffixes):
    """Finds the shallowest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
    """ 
    
    path = path.strip()

    # We've reached the root of the file system
    if path == op.sep or path == '':
        return None

    path   = path.rstrip(op.sep)
    parent = shallowest(op.dirname(path), suffixes)

    if parent is not None:
        return parent

    if any([path.endswith(s) for s in suffixes]):
        return path

    return None 
75
76


77
78
79
80
def addExt(prefix,
           allowedExts,
           mustExist=True,
           defaultExt=None,
81
           fileGroups=None):
82
83
84
85
86
87
88
89
90
    """Adds a file extension to the given file ``prefix``.

    If ``mustExist`` is False, and the file does not already have a 
    supported extension, the default extension is appended and the new
    file name returned. If the prefix already has a supported extension,
    it is returned unchanged.

    If ``mustExist`` is ``True`` (the default), the function checks to see 
    if any files exist that have the given prefix, and a supported file 
91
    extension.  A :exc:`PathError` is raised if:
92
93

       - No files exist with the given prefix and a supported extension.
94
    
95
       - ``fileGroups`` is ``None``, and more than one file exists with the
96
         given prefix, and a supported extension. 
97
98
99

    Otherwise the full file name is returned.

100
101
    :arg prefix:      The file name prefix to modify.

102
    :arg allowedExts: List of allowed file extensions.
103
104
105
    
    :arg mustExist:   Whether the file must exist or not.
    
106
    :arg defaultExt:  Default file extension to use.
107
108

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
109
110
    """

111
112
    if fileGroups is None:
        fileGroups = {}
113

114
115
116
    if not mustExist:

        # the provided file name already
Paul McCarthy's avatar
Paul McCarthy committed
117
118
        # ends with a supported extension
        if any([prefix.endswith(ext) for ext in allowedExts]):
119
120
121
122
123
124
125
            return prefix

        if defaultExt is not None: return prefix + defaultExt
        else:                      return None

    # If the provided prefix already ends with a
    # supported extension , check to see that it exists
Paul McCarthy's avatar
Paul McCarthy committed
126
    if any([prefix.endswith(ext) for ext in allowedExts]):
127
        allPaths = [prefix]
128
129
130
131
132
        
    # Otherwise, make a bunch of file names, one per
    # supported extension, and test to see if exactly
    # one of them exists.
    else:
133
        allPaths = [prefix + ext for ext in allowedExts]
134

135
136
    allPaths = [p for p in allPaths if op.isfile(p)]
    nexists  = len(allPaths)
137
138
139

    # Could not find any supported file
    # with the specified prefix
140
141
    if nexists == 0:
        raise PathError('Could not find a supported file '
142
                        'with prefix "{}"'.format(prefix))
143
144

    # Ambiguity! More than one supported
145
146
147
148
149
    # file with the specified prefix.
    elif nexists > 1:

        # Remove non-existent paths from the
        # extended list, get all their
150
151
152
153
154
155
156
157
158
159
        # suffixes, and see if they match
        # any file groups.
        suffixes     = [getExt(p, allowedExts) for p in allPaths]
        groupMatches = [sorted(suffixes) == sorted(g) for g in fileGroups]

        # Is there a match for a file suffix group?
        # If not, multiple files with the specified
        # prefix exist, and there is no way to
        # resolve the ambiguity.
        if sum(groupMatches) != 1:
160
            raise PathError('More than one file with '
161
162
163
164
165
166
167
                            'prefix "{}"'.format(prefix))

        # Otherwise, we return a path 
        # to the file which matches the 
        # first suffix in the group.
        groupIdx = groupMatches.index(True)
        allPaths = [prefix + fileGroups[groupIdx][0]]
168
169
170

    # Return the full file name of the
    # supported file that was found
171
    return allPaths[0]
172
173


174
175
def removeExt(filename, allowedExts=None):
    """Returns the base name of the given file name.  See :func:`splitExt`. """
176

177
    return splitExt(filename, allowedExts)[0]
178
179


180
181
def getExt(filename, allowedExts=None):
    """Returns the extension of the given file name.  See :func:`splitExt`. """
182

183
    return splitExt(filename, allowedExts)[1]
184
185


186
187
def splitExt(filename, allowedExts=None):
    """Returns the base name and the extension from the given file name.
188
189
190

    If ``allowedExts`` is ``None``, this function is equivalent to using::
    
191
        os.path.splitext(filename)
192
193

    If ``allowedExts`` is provided, but the file does not end with an allowed
194
    extension, a tuple containing ``(filename, '')`` is returned.
195

196
197
    :arg filename:    The file name to split.
    
198
199
200
201
202
203
    :arg allowedExts: Allowed/recognised file extensions.
    """

    # If allowedExts is not specified,
    # we just use op.splitext
    if allowedExts is None:
204
        return op.splitext(filename)
205
206
207
208

    # Otherwise, try and find a suffix match
    extMatches = [filename.endswith(ext) for ext in allowedExts]

209
    # No match, assume there is no extension
210
    if not any(extMatches):
211
        return filename, ''
212

213
214
    # Otherwise split the filename 
    # into its base and its extension
215
    extIdx = extMatches.index(True)
216
217
218
219
220
    extLen = len(allowedExts[extIdx])

    return filename[:-extLen], filename[-extLen:]


221
222
223
224
225
226
227
228
229
230
def removeDuplicates(paths, allowedExts=None, fileGroups=None):
    """Reduces the list of ``paths`` down to those which are unique with 
    respect to the specified ``fileGroups``.

    For example, if you have a directory containing::
    
        001.hdr
        001.img
        002.hdr
        002.img
231
        003.hdr
232
233
234
235
        003.img

    And you call ``removeDuplicates`` like so::

236
237
238
         paths       = ['001.img', '001.hdr',
                        '002.img', '002.hdr',
                        '003.img', '003.hdr']
239
    
240
241
         allowedExts = ['.img',  '.hdr']
         fileGroups  = [('.img', '.hdr')]
242
243
244
245
246

         removeDuplicates(paths, allowedExts, fileGroups)

    The returned list will be::

247
         ['001.img', '002.img', '003.img']
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267

    :arg paths:       List of paths to reduce.

    :arg allowedExts: Allowed/recognised file extensions.

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
    """

    unique = []

    for path in paths:

        groupFiles = getFileGroup(path, allowedExts, fileGroups)

        if not any([g in unique for g in groupFiles]):
            unique.append(path)

    return unique


268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def getFileGroup(path, allowedExts=None, fileGroups=None, fullPaths=True):
    """If the given ``path`` is part of a ``fileGroup``, returns a list 
    containing the paths to all other files in the group (including the
    ``path`` itself).

    If the ``path`` does not appear to be part of a file group, a list
    containing only the ``path`` is returned.

    File groups can be used to specify a collection of file suffixes which
    should always exist alongside each other. This can be used to resolve
    ambiguity when multiple files exist with the same ``prefix`` and supported
    extensions (e.g. ``file.hdr`` and ``file.img``). The file groups are
    specified as a list of sequences, for example::
    
        [('.img',    '.hdr'),
         ('.img.gz', '.hdr.gz')]
    
    If you specify``fileGroups=[('.img', '.hdr')]`` and ``prefix='file'``, and
    both ``file.img`` and ``file.hdr`` exist, the :func:`addExt` function would
    return ``file.img`` (i.e. the file which matches the first extension in
    the group).

290
291
292
    Similarly, if you call the :func:`.imcp.imcp` or :func:`.imcp.immv`
    functions with the above parameters, both ``file.img`` and ``file.hdr``
    will be moved.
293
294
295
296

    .. note:: The primary use-case of file groups is to resolve ambiguity with
              respect to NIFTI and ANALYSE75 image pairs. By specifying
              ``fileGroups=[('.img', '.hdr'), ('.img.gz', '.hdr.gz')]``, the
297
298
299
300
              :func:`addExt`, :func:`.imcp.immv` and :func:`.imcp.imcp`
              functions are able to figure out what you mean when you specify
              ``file``, and both ``file.hdr`` and ``file.img`` (or
              ``file.hdr.gz`` and ``file.img.gz``) exist.
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
    
    :arg path:        Path to the file. Must contain the file extension.
    
    :arg allowedExts: Allowed/recognised file extensions.
    
    :arg fileGroups:  Recognised file groups.
    
    :arg fullPaths:   If ``True`` (the default), full file paths (relative to
                      the ``path``) are returned. Otherwise, only the file
                      extensions in the group are returned.
    """

    if fileGroups is None:
        return [path]

    base, ext = splitExt(path, allowedExts)

    matchedGroups     = []
    matchedGroupFiles = []

    for group in fileGroups:

        if ext not in group:
            continue

        groupFiles = [base + s for s in group]

        if not all([op.exists(f) for f in groupFiles]):
            continue

        matchedGroups    .append(group)
        matchedGroupFiles.append(groupFiles)

    # If the given path is part of more 
    # than one existing file group, we 
    # can't resolve this ambiguity.
    if len(matchedGroupFiles) != 1:
        if fullPaths: return [path]
        else:         return [ext]
    else:
        if fullPaths: return matchedGroupFiles[0]
        else:         return matchedGroups[    0]