path.py 13.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env python
#
# path.py - Utility functions for working with file/directory paths.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module contains a few utility functions for working with file system
paths.


.. autosummary::
   :nosignatures:

   deepest
15
16
17
   shallowest
   addExt
   removeExt
18
   getExt
19
20
   splitExt
   getFileGroup
21
   removeDuplicates
22
   uniquePrefix
23
24
25
"""


26
import            glob
27
28
29
import os.path as op


30
class PathError(Exception):
31
32
33
    """``Exception`` class raised by the functions defined in this module
    when something goes wrong.
    """
34
35
36
    pass


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def deepest(path, suffixes):
    """Finds the deepest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
    """

    path = path.strip()

    if path == op.sep or path == '':
        return None

    path = path.rstrip(op.sep)

    if any([path.endswith(s) for s in suffixes]):
        return path

    return deepest(op.dirname(path), suffixes)


def shallowest(path, suffixes):
    """Finds the shallowest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
60
61
    """

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
    path = path.strip()

    # We've reached the root of the file system
    if path == op.sep or path == '':
        return None

    path   = path.rstrip(op.sep)
    parent = shallowest(op.dirname(path), suffixes)

    if parent is not None:
        return parent

    if any([path.endswith(s) for s in suffixes]):
        return path

77
    return None
78
79


80
def addExt(prefix,
81
           allowedExts=None,
82
83
           mustExist=True,
           defaultExt=None,
84
85
           fileGroups=None,
           unambiguous=True):
86
87
    """Adds a file extension to the given file ``prefix``.

88
    If ``mustExist`` is False, and the file does not already have a
89
90
91
92
    supported extension, the default extension is appended and the new
    file name returned. If the prefix already has a supported extension,
    it is returned unchanged.

93
94
    If ``mustExist`` is ``True`` (the default), the function checks to see
    if any files exist that have the given prefix, and a supported file
95
    extension.  A :exc:`PathError` is raised if:
96
97

       - No files exist with the given prefix and a supported extension.
98

99
100
       - ``fileGroups is None`` and ``unambiguous is True``, and more than
         one file exists with the given prefix, and a supported extension.
101
102
103

    Otherwise the full file name is returned.

104
105
    :arg prefix:      The file name prefix to modify.

106
    :arg allowedExts: List of allowed file extensions.
107

108
    :arg mustExist:   Whether the file must exist or not.
109

110
    :arg defaultExt:  Default file extension to use.
111
112

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
113
114
115
116
117

    :arg unambiguous: If ``True`` (the default), and more than one file
                      exists with the specified ``prefix``, a
                      :exc:`PathError` is raised. Otherwise, a list
                      containing *all* matching files is returned.
118
119
    """

120
121
122
123
124
    if allowedExts is None: allowedExts = []
    if fileGroups  is None: fileGroups  = {}

    if defaultExt is not None and defaultExt not in allowedExts:
        allowedExts.append(defaultExt)
125

126
127
128
    if not mustExist:

        # the provided file name already
Paul McCarthy's avatar
Paul McCarthy committed
129
130
        # ends with a supported extension
        if any([prefix.endswith(ext) for ext in allowedExts]):
131
132
133
            return prefix

        if defaultExt is not None: return prefix + defaultExt
134
135
136
137
138
139
140
141
142
        else:                      return prefix

    # If no allowed extensions were
    # provided, or the provided prefix
    # already ends with a supported
    # extension, check to see that it
    # exists.
    if len(allowedExts) == 0 or \
       any([prefix.endswith(ext) for ext in allowedExts]):
143
        allPaths = [prefix]
144

145
146
147
148
    # Otherwise, make a bunch of file names, one per
    # supported extension, and test to see if exactly
    # one of them exists.
    else:
149
        allPaths = [prefix + ext for ext in allowedExts]
150

151
152
    allPaths = [p for p in allPaths if op.isfile(p)]
    nexists  = len(allPaths)
153
154
155

    # Could not find any supported file
    # with the specified prefix
156
157
    if nexists == 0:
        raise PathError('Could not find a supported file '
158
                        'with prefix "{}"'.format(prefix))
159

160
161
162
163
164
165
166
167
    # If ambiguity is ok, return
    # all matching paths
    elif not unambiguous:
        return allPaths

    # Ambiguity is not ok! More than
    # one supported file with the
    # specified prefix.
168
169
170
171
    elif nexists > 1:

        # Remove non-existent paths from the
        # extended list, get all their
172
173
174
175
176
177
178
179
180
181
        # suffixes, and see if they match
        # any file groups.
        suffixes     = [getExt(p, allowedExts) for p in allPaths]
        groupMatches = [sorted(suffixes) == sorted(g) for g in fileGroups]

        # Is there a match for a file suffix group?
        # If not, multiple files with the specified
        # prefix exist, and there is no way to
        # resolve the ambiguity.
        if sum(groupMatches) != 1:
182
            raise PathError('More than one file with '
183
184
                            'prefix "{}"'.format(prefix))

185
186
        # Otherwise, we return a path
        # to the file which matches the
187
188
189
        # first suffix in the group.
        groupIdx = groupMatches.index(True)
        allPaths = [prefix + fileGroups[groupIdx][0]]
190
191
192

    # Return the full file name of the
    # supported file that was found
193
    return allPaths[0]
194
195


196
197
def removeExt(filename, allowedExts=None):
    """Returns the base name of the given file name.  See :func:`splitExt`. """
198

199
    return splitExt(filename, allowedExts)[0]
200
201


202
203
def getExt(filename, allowedExts=None):
    """Returns the extension of the given file name.  See :func:`splitExt`. """
204

205
    return splitExt(filename, allowedExts)[1]
206
207


208
209
def splitExt(filename, allowedExts=None):
    """Returns the base name and the extension from the given file name.
210
211

    If ``allowedExts`` is ``None``, this function is equivalent to using::
212

213
        os.path.splitext(filename)
214
215

    If ``allowedExts`` is provided, but the file does not end with an allowed
216
    extension, a tuple containing ``(filename, '')`` is returned.
217

218
    :arg filename:    The file name to split.
219

220
221
222
223
224
225
    :arg allowedExts: Allowed/recognised file extensions.
    """

    # If allowedExts is not specified,
    # we just use op.splitext
    if allowedExts is None:
226
        return op.splitext(filename)
227
228
229
230

    # Otherwise, try and find a suffix match
    extMatches = [filename.endswith(ext) for ext in allowedExts]

231
    # No match, assume there is no extension
232
    if not any(extMatches):
233
        return filename, ''
234

235
    # Otherwise split the filename
236
    # into its base and its extension
237
    extIdx = extMatches.index(True)
238
239
240
241
242
    extLen = len(allowedExts[extIdx])

    return filename[:-extLen], filename[-extLen:]


243
244
245
246
247
def getFileGroup(path,
                 allowedExts=None,
                 fileGroups=None,
                 fullPaths=True,
                 unambiguous=False):
248
    """If the given ``path`` is part of a ``fileGroup``, returns a list
249
250
251
    containing the paths to all other files in the group (including the
    ``path`` itself).

252
253
254
255
    If the ``path`` does not appear to be part of a file group, or appears to
    be part of an incomplete file group, a list containing only the ``path``
    is returned.

256
    If the ``path`` does not exist, or appears to be part of more than one
257
    file group, a :exc:`PathError` is raised.
258
259
260
261
262
263

    File groups can be used to specify a collection of file suffixes which
    should always exist alongside each other. This can be used to resolve
    ambiguity when multiple files exist with the same ``prefix`` and supported
    extensions (e.g. ``file.hdr`` and ``file.img``). The file groups are
    specified as a list of sequences, for example::
264

265
266
        [('.img',    '.hdr'),
         ('.img.gz', '.hdr.gz')]
267

268
    If you specify ``fileGroups=[('.img', '.hdr')]`` and ``prefix='file'``, and
269
270
271
272
    both ``file.img`` and ``file.hdr`` exist, the :func:`addExt` function would
    return ``file.img`` (i.e. the file which matches the first extension in
    the group).

273
274
275
    Similarly, if you call the :func:`.imcp.imcp` or :func:`.imcp.immv`
    functions with the above parameters, both ``file.img`` and ``file.hdr``
    will be moved.
276
277
278
279

    .. note:: The primary use-case of file groups is to resolve ambiguity with
              respect to NIFTI and ANALYSE75 image pairs. By specifying
              ``fileGroups=[('.img', '.hdr'), ('.img.gz', '.hdr.gz')]``, the
280
281
282
283
              :func:`addExt`, :func:`.imcp.immv` and :func:`.imcp.imcp`
              functions are able to figure out what you mean when you specify
              ``file``, and both ``file.hdr`` and ``file.img`` (or
              ``file.hdr.gz`` and ``file.img.gz``) exist.
284

285
    :arg path:        Path to the file. Must contain the file extension.
286

287
    :arg allowedExts: Allowed/recognised file extensions.
288

289
    :arg fileGroups:  Recognised file groups.
290

291
292
293
    :arg fullPaths:   If ``True`` (the default), full file paths (relative to
                      the ``path``) are returned. Otherwise, only the file
                      extensions in the group are returned.
294

295
    :arg unambiguous: Defaults to ``False``. If ``True``, and the path
296
                      is not unambiguously part of one group, or part of
297
298
                      no groups, a :exc:`PathError` is raised.
                      Otherwise, the path is returned.
299
300
    """

301
    path = addExt(path, allowedExts, mustExist=True, fileGroups=fileGroups)
302
    base, ext = splitExt(path, allowedExts)
303

304
305
306
    if fileGroups is None:
        if fullPaths: return [path]
        else:         return [ext]
307
308
309

    matchedGroups     = []
    matchedGroupFiles = []
310
311
    fullMatches       = 0
    partialMatches    = 0
312
313
314

    for group in fileGroups:

315
        if ext != '' and ext not in group:
316
317
318
            continue

        groupFiles = [base + s for s in group]
319
        exist      = [op.exists(f) for f in groupFiles]
320

321
322
        if any(exist):
            partialMatches += 1
323

324
325
326
327
        if all(exist):
            fullMatches += 1
            matchedGroups    .append(group)
            matchedGroupFiles.append(groupFiles)
328

329
    # Path is not part of any group
330
    if partialMatches == 0:
331
332
        if fullPaths: return [path]
        else:         return [ext]
333

334
335
    # If the given path is part of more
    # than one existing file group, we
336
    # can't resolve this ambiguity.
337
    if fullMatches > 1:
338
339
340
        raise PathError('Path is part of multiple '
                        'file groups: {}'.format(path))

341
342
343
344
345
346
347
348
    # If the unambiguous flag is not set,
    # we don't care about partial matches
    if not unambiguous:
        partialMatches = 0

    # The path is unambiguously part of a
    # complete file group - resolve it to
    # the first element of the group
Paul McCarthy's avatar
Paul McCarthy committed
349
    if fullMatches == 1 and partialMatches <= 1:
350
351
352
353
354
        if fullPaths: return matchedGroupFiles[0]
        else:         return matchedGroups[    0]

    # The path appears to be part of
    # an incomplete group - this is
Paul McCarthy's avatar
Paul McCarthy committed
355
    # potentially ambiguous, so give
356
357
358
359
360
    # up (but see the partialMatches
    # clobber above).
    elif partialMatches > 0:
        raise PathError('Path is part of an incomplete '
                        'file group: {}'.format(path))
361

362
363
364
365
    else:
        if fullPaths: return [path]
        else:         return [ext]

366
367

def removeDuplicates(paths, allowedExts=None, fileGroups=None):
368
    """Reduces the list of ``paths`` down to those which are unique with
369
370
371
    respect to the specified ``fileGroups``.

    For example, if you have a directory containing::
372

373
374
375
376
377
378
379
380
381
382
383
384
        001.hdr
        001.img
        002.hdr
        002.img
        003.hdr
        003.img

    And you call ``removeDuplicates`` like so::

         paths       = ['001.img', '001.hdr',
                        '002.img', '002.hdr',
                        '003.img', '003.hdr']
385

386
387
388
389
390
391
392
393
394
395
396
397
398
399
         allowedExts = ['.img',  '.hdr']
         fileGroups  = [('.img', '.hdr')]

         removeDuplicates(paths, allowedExts, fileGroups)

    The returned list will be::

         ['001.img', '002.img', '003.img']

    If you provide ``allowedExts``, you may specify incomplete ``paths`` (i.e.
    without extensions), as long as there are no path ambiguities.

    A :exc:`PathError` will be raised if any of the ``paths`` do not exist,
    or if there are any ambiguities with respect to incomplete paths.
400
401

    :arg paths:       List of paths to reduce.
402
403
404
405
406
407
408
409
410
411
412
413

    :arg allowedExts: Allowed/recognised file extensions.

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
    """

    unique = []

    for path in paths:

        groupFiles = getFileGroup(path, allowedExts, fileGroups)

414
        if not any([p in unique for p in groupFiles]):
415
416
417
            unique.append(groupFiles[0])

    return unique
418
419
420
421
422
423


def uniquePrefix(path):
    """Return the longest prefix for the given file name which unambiguously
    identifies it, relative to the other files in the same directory.

424
    Raises a :exc:`PathError` if a unique prefix could not be found (which
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
    will never happen if the path is valid).
    """

    dirname, filename = op.split(path)

    idx    = 0
    prefix = op.join(dirname, filename[0])
    hits   = glob.glob('{}*'.format(prefix))

    while True:

        # Found a unique prefix
        if len(hits) == 1:
            break

        # Should never happen if path is valid
        elif len(hits) == 0 or idx >= len(filename) - 1:
442
            raise PathError('No unique prefix for {}'.format(filename))
443
444
445
446
447
448
449
450

        # Not unique - continue looping
        else:
            idx    += 1
            prefix  = prefix + filename[idx]
            hits    = [h for h in hits if h.startswith(prefix)]

    return prefix