path.py 12.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env python
#
# path.py - Utility functions for working with file/directory paths.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module contains a few utility functions for working with file system
paths.


.. autosummary::
   :nosignatures:

   deepest
15
16
17
   shallowest
   addExt
   removeExt
18
   getExt
19
20
   splitExt
   getFileGroup
21
   removeDuplicates
22
23
24
25
26
27
"""


import os.path as op


28
class PathError(Exception):
29
30
31
    """``Exception`` class raised by the functions defined in this module
    when something goes wrong.
    """
32
33
34
    pass


35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def deepest(path, suffixes):
    """Finds the deepest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
    """

    path = path.strip()

    if path == op.sep or path == '':
        return None

    path = path.rstrip(op.sep)

    if any([path.endswith(s) for s in suffixes]):
        return path

    return deepest(op.dirname(path), suffixes)


def shallowest(path, suffixes):
    """Finds the shallowest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
    """ 
    
    path = path.strip()

    # We've reached the root of the file system
    if path == op.sep or path == '':
        return None

    path   = path.rstrip(op.sep)
    parent = shallowest(op.dirname(path), suffixes)

    if parent is not None:
        return parent

    if any([path.endswith(s) for s in suffixes]):
        return path

    return None 
76
77


78
def addExt(prefix,
79
           allowedExts=None,
80
81
           mustExist=True,
           defaultExt=None,
82
           fileGroups=None):
83
84
85
86
87
88
89
90
91
    """Adds a file extension to the given file ``prefix``.

    If ``mustExist`` is False, and the file does not already have a 
    supported extension, the default extension is appended and the new
    file name returned. If the prefix already has a supported extension,
    it is returned unchanged.

    If ``mustExist`` is ``True`` (the default), the function checks to see 
    if any files exist that have the given prefix, and a supported file 
92
    extension.  A :exc:`PathError` is raised if:
93
94

       - No files exist with the given prefix and a supported extension.
95
    
96
       - ``fileGroups`` is ``None``, and more than one file exists with the
97
         given prefix, and a supported extension. 
98
99
100

    Otherwise the full file name is returned.

101
102
    :arg prefix:      The file name prefix to modify.

103
    :arg allowedExts: List of allowed file extensions.
104
105
106
    
    :arg mustExist:   Whether the file must exist or not.
    
107
    :arg defaultExt:  Default file extension to use.
108
109

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
110
111
    """

112
113
114
115
116
    if allowedExts is None: allowedExts = []
    if fileGroups  is None: fileGroups  = {}

    if defaultExt is not None and defaultExt not in allowedExts:
        allowedExts.append(defaultExt)
117

118
119
120
    if not mustExist:

        # the provided file name already
Paul McCarthy's avatar
Paul McCarthy committed
121
122
        # ends with a supported extension
        if any([prefix.endswith(ext) for ext in allowedExts]):
123
124
125
            return prefix

        if defaultExt is not None: return prefix + defaultExt
126
127
128
129
130
131
132
133
134
        else:                      return prefix

    # If no allowed extensions were
    # provided, or the provided prefix
    # already ends with a supported
    # extension, check to see that it
    # exists.
    if len(allowedExts) == 0 or \
       any([prefix.endswith(ext) for ext in allowedExts]):
135
        allPaths = [prefix]
136
137
138
139
140
        
    # Otherwise, make a bunch of file names, one per
    # supported extension, and test to see if exactly
    # one of them exists.
    else:
141
        allPaths = [prefix + ext for ext in allowedExts]
142

143
144
    allPaths = [p for p in allPaths if op.isfile(p)]
    nexists  = len(allPaths)
145
146
147

    # Could not find any supported file
    # with the specified prefix
148
149
    if nexists == 0:
        raise PathError('Could not find a supported file '
150
                        'with prefix "{}"'.format(prefix))
151
152

    # Ambiguity! More than one supported
153
154
155
156
157
    # file with the specified prefix.
    elif nexists > 1:

        # Remove non-existent paths from the
        # extended list, get all their
158
159
160
161
162
163
164
165
166
167
        # suffixes, and see if they match
        # any file groups.
        suffixes     = [getExt(p, allowedExts) for p in allPaths]
        groupMatches = [sorted(suffixes) == sorted(g) for g in fileGroups]

        # Is there a match for a file suffix group?
        # If not, multiple files with the specified
        # prefix exist, and there is no way to
        # resolve the ambiguity.
        if sum(groupMatches) != 1:
168
            raise PathError('More than one file with '
169
170
171
172
173
174
175
                            'prefix "{}"'.format(prefix))

        # Otherwise, we return a path 
        # to the file which matches the 
        # first suffix in the group.
        groupIdx = groupMatches.index(True)
        allPaths = [prefix + fileGroups[groupIdx][0]]
176
177
178

    # Return the full file name of the
    # supported file that was found
179
    return allPaths[0]
180
181


182
183
def removeExt(filename, allowedExts=None):
    """Returns the base name of the given file name.  See :func:`splitExt`. """
184

185
    return splitExt(filename, allowedExts)[0]
186
187


188
189
def getExt(filename, allowedExts=None):
    """Returns the extension of the given file name.  See :func:`splitExt`. """
190

191
    return splitExt(filename, allowedExts)[1]
192
193


194
195
def splitExt(filename, allowedExts=None):
    """Returns the base name and the extension from the given file name.
196
197
198

    If ``allowedExts`` is ``None``, this function is equivalent to using::
    
199
        os.path.splitext(filename)
200
201

    If ``allowedExts`` is provided, but the file does not end with an allowed
202
    extension, a tuple containing ``(filename, '')`` is returned.
203

204
205
    :arg filename:    The file name to split.
    
206
207
208
209
210
211
    :arg allowedExts: Allowed/recognised file extensions.
    """

    # If allowedExts is not specified,
    # we just use op.splitext
    if allowedExts is None:
212
        return op.splitext(filename)
213
214
215
216

    # Otherwise, try and find a suffix match
    extMatches = [filename.endswith(ext) for ext in allowedExts]

217
    # No match, assume there is no extension
218
    if not any(extMatches):
219
        return filename, ''
220

221
222
    # Otherwise split the filename 
    # into its base and its extension
223
    extIdx = extMatches.index(True)
224
225
226
227
228
    extLen = len(allowedExts[extIdx])

    return filename[:-extLen], filename[-extLen:]


229
230
231
232
233
def getFileGroup(path,
                 allowedExts=None,
                 fileGroups=None,
                 fullPaths=True,
                 unambiguous=False):
234
235
236
237
    """If the given ``path`` is part of a ``fileGroup``, returns a list 
    containing the paths to all other files in the group (including the
    ``path`` itself).

238
239
240
241
242
243
    If the ``path`` does not appear to be part of a file group, or appears to
    be part of an incomplete file group, a list containing only the ``path``
    is returned.

    If the ``path`` does not exist, or appears to be part of more than one 
    file group, a :exc:`PathError` is raised.
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258

    File groups can be used to specify a collection of file suffixes which
    should always exist alongside each other. This can be used to resolve
    ambiguity when multiple files exist with the same ``prefix`` and supported
    extensions (e.g. ``file.hdr`` and ``file.img``). The file groups are
    specified as a list of sequences, for example::
    
        [('.img',    '.hdr'),
         ('.img.gz', '.hdr.gz')]
    
    If you specify``fileGroups=[('.img', '.hdr')]`` and ``prefix='file'``, and
    both ``file.img`` and ``file.hdr`` exist, the :func:`addExt` function would
    return ``file.img`` (i.e. the file which matches the first extension in
    the group).

259
260
261
    Similarly, if you call the :func:`.imcp.imcp` or :func:`.imcp.immv`
    functions with the above parameters, both ``file.img`` and ``file.hdr``
    will be moved.
262
263
264
265

    .. note:: The primary use-case of file groups is to resolve ambiguity with
              respect to NIFTI and ANALYSE75 image pairs. By specifying
              ``fileGroups=[('.img', '.hdr'), ('.img.gz', '.hdr.gz')]``, the
266
267
268
269
              :func:`addExt`, :func:`.imcp.immv` and :func:`.imcp.imcp`
              functions are able to figure out what you mean when you specify
              ``file``, and both ``file.hdr`` and ``file.img`` (or
              ``file.hdr.gz`` and ``file.img.gz``) exist.
270
271
272
273
274
275
276
277
278
279
    
    :arg path:        Path to the file. Must contain the file extension.
    
    :arg allowedExts: Allowed/recognised file extensions.
    
    :arg fileGroups:  Recognised file groups.
    
    :arg fullPaths:   If ``True`` (the default), full file paths (relative to
                      the ``path``) are returned. Otherwise, only the file
                      extensions in the group are returned.
280
281
282
283
284

    :arg unambiguous: Defaults to ``False``. If ``True``, and the path 
                      is not unambiguouosly part of one group, or part of
                      no groups, a :exc:`PathError` is raised.
                      Otherwise, the path is returned.
285
286
    """

287
    path = addExt(path, allowedExts, mustExist=True, fileGroups=fileGroups)
288
    base, ext = splitExt(path, allowedExts)
289
290
291
292
 
    if fileGroups is None:
        if fullPaths: return [path]
        else:         return [ext]
293
294
295

    matchedGroups     = []
    matchedGroupFiles = []
296
297
    fullMatches       = 0
    partialMatches    = 0
298
299
300

    for group in fileGroups:

301
        if ext != '' and ext not in group:
302
303
304
            continue

        groupFiles = [base + s for s in group]
305
        exist      = [op.exists(f) for f in groupFiles]
306

307
308
309
        if any(exist): partialMatches += 1
        if all(exist): fullMatches    += 1
        else:          continue
310
311
312
313

        matchedGroups    .append(group)
        matchedGroupFiles.append(groupFiles)

314
    # Path is not part of any group
315
    if partialMatches == 0:
316
317
        if fullPaths: return [path]
        else:         return [ext]
318

319
320
321
    # If the given path is part of more 
    # than one existing file group, we 
    # can't resolve this ambiguity.
322
    if fullMatches > 1:
323
324
325
        raise PathError('Path is part of multiple '
                        'file groups: {}'.format(path))

326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
    # If the unambiguous flag is not set,
    # we don't care about partial matches
    if not unambiguous:
        partialMatches = 0

    # The path is unambiguously part of a
    # complete file group - resolve it to
    # the first element of the group
    if fullMatches == 1 and partialMatches == 0:
        if fullPaths: return matchedGroupFiles[0]
        else:         return matchedGroups[    0]

    # The path appears to be part of
    # an incomplete group - this is
    # potentially ambiguuuos, so give
    # up (but see the partialMatches
    # clobber above).
    elif partialMatches > 0:
        raise PathError('Path is part of an incomplete '
                        'file group: {}'.format(path))
        
    else:
        if fullPaths: return [path]
        else:         return [ext]

351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406

def removeDuplicates(paths, allowedExts=None, fileGroups=None):
    """Reduces the list of ``paths`` down to those which are unique with 
    respect to the specified ``fileGroups``.

    For example, if you have a directory containing::
    
        001.hdr
        001.img
        002.hdr
        002.img
        003.hdr
        003.img

    And you call ``removeDuplicates`` like so::

         paths       = ['001.img', '001.hdr',
                        '002.img', '002.hdr',
                        '003.img', '003.hdr']
    
         allowedExts = ['.img',  '.hdr']
         fileGroups  = [('.img', '.hdr')]

         removeDuplicates(paths, allowedExts, fileGroups)

    The returned list will be::

         ['001.img', '002.img', '003.img']

    If you provide ``allowedExts``, you may specify incomplete ``paths`` (i.e.
    without extensions), as long as there are no path ambiguities.

    A :exc:`PathError` will be raised if any of the ``paths`` do not exist,
    or if there are any ambiguities with respect to incomplete paths.
    
    :arg paths:       List of paths to reduce. 

    :arg allowedExts: Allowed/recognised file extensions.

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
    """

    unique = []

    for path in paths:

        groupFiles = getFileGroup(path, allowedExts, fileGroups)

        if len(groupFiles) == 0:
            if path not in unique:
                unique.append(path)
                
        elif not any([p in unique for p in groupFiles]):
            unique.append(groupFiles[0])

    return unique