path.py 13.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env python
#
# path.py - Utility functions for working with file/directory paths.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module contains a few utility functions for working with file system
paths.


.. autosummary::
   :nosignatures:

   deepest
15
16
17
   shallowest
   addExt
   removeExt
18
   getExt
19
20
   splitExt
   getFileGroup
21
   removeDuplicates
22
   uniquePrefix
23
24
25
"""


26
import            glob
27
28
29
import os.path as op


30
class PathError(Exception):
31
32
33
    """``Exception`` class raised by the functions defined in this module
    when something goes wrong.
    """
34
35
36
    pass


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def deepest(path, suffixes):
    """Finds the deepest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
    """

    path = path.strip()

    if path == op.sep or path == '':
        return None

    path = path.rstrip(op.sep)

    if any([path.endswith(s) for s in suffixes]):
        return path

    return deepest(op.dirname(path), suffixes)


def shallowest(path, suffixes):
    """Finds the shallowest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
60
61
    """

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
    path = path.strip()

    # We've reached the root of the file system
    if path == op.sep or path == '':
        return None

    path   = path.rstrip(op.sep)
    parent = shallowest(op.dirname(path), suffixes)

    if parent is not None:
        return parent

    if any([path.endswith(s) for s in suffixes]):
        return path

77
    return None
78
79


80
def addExt(prefix,
81
           allowedExts=None,
82
83
           mustExist=True,
           defaultExt=None,
84
           fileGroups=None):
85
86
    """Adds a file extension to the given file ``prefix``.

87
    If ``mustExist`` is False, and the file does not already have a
88
89
90
91
    supported extension, the default extension is appended and the new
    file name returned. If the prefix already has a supported extension,
    it is returned unchanged.

92
93
    If ``mustExist`` is ``True`` (the default), the function checks to see
    if any files exist that have the given prefix, and a supported file
94
    extension.  A :exc:`PathError` is raised if:
95
96

       - No files exist with the given prefix and a supported extension.
97

98
       - ``fileGroups`` is ``None``, and more than one file exists with the
99
         given prefix, and a supported extension.
100
101
102

    Otherwise the full file name is returned.

103
104
    :arg prefix:      The file name prefix to modify.

105
    :arg allowedExts: List of allowed file extensions.
106

107
    :arg mustExist:   Whether the file must exist or not.
108

109
    :arg defaultExt:  Default file extension to use.
110
111

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
112
113
    """

114
115
116
117
118
    if allowedExts is None: allowedExts = []
    if fileGroups  is None: fileGroups  = {}

    if defaultExt is not None and defaultExt not in allowedExts:
        allowedExts.append(defaultExt)
119

120
121
122
    if not mustExist:

        # the provided file name already
Paul McCarthy's avatar
Paul McCarthy committed
123
124
        # ends with a supported extension
        if any([prefix.endswith(ext) for ext in allowedExts]):
125
126
127
            return prefix

        if defaultExt is not None: return prefix + defaultExt
128
129
130
131
132
133
134
135
136
        else:                      return prefix

    # If no allowed extensions were
    # provided, or the provided prefix
    # already ends with a supported
    # extension, check to see that it
    # exists.
    if len(allowedExts) == 0 or \
       any([prefix.endswith(ext) for ext in allowedExts]):
137
        allPaths = [prefix]
138

139
140
141
142
    # Otherwise, make a bunch of file names, one per
    # supported extension, and test to see if exactly
    # one of them exists.
    else:
143
        allPaths = [prefix + ext for ext in allowedExts]
144

145
146
    allPaths = [p for p in allPaths if op.isfile(p)]
    nexists  = len(allPaths)
147
148
149

    # Could not find any supported file
    # with the specified prefix
150
151
    if nexists == 0:
        raise PathError('Could not find a supported file '
152
                        'with prefix "{}"'.format(prefix))
153
154

    # Ambiguity! More than one supported
155
156
157
158
159
    # file with the specified prefix.
    elif nexists > 1:

        # Remove non-existent paths from the
        # extended list, get all their
160
161
162
163
164
165
166
167
168
169
        # suffixes, and see if they match
        # any file groups.
        suffixes     = [getExt(p, allowedExts) for p in allPaths]
        groupMatches = [sorted(suffixes) == sorted(g) for g in fileGroups]

        # Is there a match for a file suffix group?
        # If not, multiple files with the specified
        # prefix exist, and there is no way to
        # resolve the ambiguity.
        if sum(groupMatches) != 1:
170
            raise PathError('More than one file with '
171
172
                            'prefix "{}"'.format(prefix))

173
174
        # Otherwise, we return a path
        # to the file which matches the
175
176
177
        # first suffix in the group.
        groupIdx = groupMatches.index(True)
        allPaths = [prefix + fileGroups[groupIdx][0]]
178
179
180

    # Return the full file name of the
    # supported file that was found
181
    return allPaths[0]
182
183


184
185
def removeExt(filename, allowedExts=None):
    """Returns the base name of the given file name.  See :func:`splitExt`. """
186

187
    return splitExt(filename, allowedExts)[0]
188
189


190
191
def getExt(filename, allowedExts=None):
    """Returns the extension of the given file name.  See :func:`splitExt`. """
192

193
    return splitExt(filename, allowedExts)[1]
194
195


196
197
def splitExt(filename, allowedExts=None):
    """Returns the base name and the extension from the given file name.
198
199

    If ``allowedExts`` is ``None``, this function is equivalent to using::
200

201
        os.path.splitext(filename)
202
203

    If ``allowedExts`` is provided, but the file does not end with an allowed
204
    extension, a tuple containing ``(filename, '')`` is returned.
205

206
    :arg filename:    The file name to split.
207

208
209
210
211
212
213
    :arg allowedExts: Allowed/recognised file extensions.
    """

    # If allowedExts is not specified,
    # we just use op.splitext
    if allowedExts is None:
214
        return op.splitext(filename)
215
216
217
218

    # Otherwise, try and find a suffix match
    extMatches = [filename.endswith(ext) for ext in allowedExts]

219
    # No match, assume there is no extension
220
    if not any(extMatches):
221
        return filename, ''
222

223
    # Otherwise split the filename
224
    # into its base and its extension
225
    extIdx = extMatches.index(True)
226
227
228
229
230
    extLen = len(allowedExts[extIdx])

    return filename[:-extLen], filename[-extLen:]


231
232
233
234
235
def getFileGroup(path,
                 allowedExts=None,
                 fileGroups=None,
                 fullPaths=True,
                 unambiguous=False):
236
    """If the given ``path`` is part of a ``fileGroup``, returns a list
237
238
239
    containing the paths to all other files in the group (including the
    ``path`` itself).

240
241
242
243
    If the ``path`` does not appear to be part of a file group, or appears to
    be part of an incomplete file group, a list containing only the ``path``
    is returned.

244
    If the ``path`` does not exist, or appears to be part of more than one
245
    file group, a :exc:`PathError` is raised.
246
247
248
249
250
251

    File groups can be used to specify a collection of file suffixes which
    should always exist alongside each other. This can be used to resolve
    ambiguity when multiple files exist with the same ``prefix`` and supported
    extensions (e.g. ``file.hdr`` and ``file.img``). The file groups are
    specified as a list of sequences, for example::
252

253
254
        [('.img',    '.hdr'),
         ('.img.gz', '.hdr.gz')]
255

256
    If you specify ``fileGroups=[('.img', '.hdr')]`` and ``prefix='file'``, and
257
258
259
260
    both ``file.img`` and ``file.hdr`` exist, the :func:`addExt` function would
    return ``file.img`` (i.e. the file which matches the first extension in
    the group).

261
262
263
    Similarly, if you call the :func:`.imcp.imcp` or :func:`.imcp.immv`
    functions with the above parameters, both ``file.img`` and ``file.hdr``
    will be moved.
264
265
266
267

    .. note:: The primary use-case of file groups is to resolve ambiguity with
              respect to NIFTI and ANALYSE75 image pairs. By specifying
              ``fileGroups=[('.img', '.hdr'), ('.img.gz', '.hdr.gz')]``, the
268
269
270
271
              :func:`addExt`, :func:`.imcp.immv` and :func:`.imcp.imcp`
              functions are able to figure out what you mean when you specify
              ``file``, and both ``file.hdr`` and ``file.img`` (or
              ``file.hdr.gz`` and ``file.img.gz``) exist.
272

273
    :arg path:        Path to the file. Must contain the file extension.
274

275
    :arg allowedExts: Allowed/recognised file extensions.
276

277
    :arg fileGroups:  Recognised file groups.
278

279
280
281
    :arg fullPaths:   If ``True`` (the default), full file paths (relative to
                      the ``path``) are returned. Otherwise, only the file
                      extensions in the group are returned.
282

283
    :arg unambiguous: Defaults to ``False``. If ``True``, and the path
284
285
286
                      is not unambiguouosly part of one group, or part of
                      no groups, a :exc:`PathError` is raised.
                      Otherwise, the path is returned.
287
288
    """

289
    path = addExt(path, allowedExts, mustExist=True, fileGroups=fileGroups)
290
    base, ext = splitExt(path, allowedExts)
291

292
293
294
    if fileGroups is None:
        if fullPaths: return [path]
        else:         return [ext]
295
296
297

    matchedGroups     = []
    matchedGroupFiles = []
298
299
    fullMatches       = 0
    partialMatches    = 0
300
301
302

    for group in fileGroups:

303
        if ext != '' and ext not in group:
304
305
306
            continue

        groupFiles = [base + s for s in group]
307
        exist      = [op.exists(f) for f in groupFiles]
308

309
310
311
        if any(exist): partialMatches += 1
        if all(exist): fullMatches    += 1
        else:          continue
312
313
314
315

        matchedGroups    .append(group)
        matchedGroupFiles.append(groupFiles)

316
    # Path is not part of any group
317
    if partialMatches == 0:
318
319
        if fullPaths: return [path]
        else:         return [ext]
320

321
322
    # If the given path is part of more
    # than one existing file group, we
323
    # can't resolve this ambiguity.
324
    if fullMatches > 1:
325
326
327
        raise PathError('Path is part of multiple '
                        'file groups: {}'.format(path))

328
329
330
331
332
333
334
335
    # If the unambiguous flag is not set,
    # we don't care about partial matches
    if not unambiguous:
        partialMatches = 0

    # The path is unambiguously part of a
    # complete file group - resolve it to
    # the first element of the group
Paul McCarthy's avatar
Paul McCarthy committed
336
    if fullMatches == 1 and partialMatches <= 1:
337
338
339
340
341
        if fullPaths: return matchedGroupFiles[0]
        else:         return matchedGroups[    0]

    # The path appears to be part of
    # an incomplete group - this is
Paul McCarthy's avatar
Paul McCarthy committed
342
    # potentially ambiguous, so give
343
344
345
346
347
    # up (but see the partialMatches
    # clobber above).
    elif partialMatches > 0:
        raise PathError('Path is part of an incomplete '
                        'file group: {}'.format(path))
348

349
350
351
352
    else:
        if fullPaths: return [path]
        else:         return [ext]

353
354

def removeDuplicates(paths, allowedExts=None, fileGroups=None):
355
    """Reduces the list of ``paths`` down to those which are unique with
356
357
358
    respect to the specified ``fileGroups``.

    For example, if you have a directory containing::
359

360
361
362
363
364
365
366
367
368
369
370
371
        001.hdr
        001.img
        002.hdr
        002.img
        003.hdr
        003.img

    And you call ``removeDuplicates`` like so::

         paths       = ['001.img', '001.hdr',
                        '002.img', '002.hdr',
                        '003.img', '003.hdr']
372

373
374
375
376
377
378
379
380
381
382
383
384
385
386
         allowedExts = ['.img',  '.hdr']
         fileGroups  = [('.img', '.hdr')]

         removeDuplicates(paths, allowedExts, fileGroups)

    The returned list will be::

         ['001.img', '002.img', '003.img']

    If you provide ``allowedExts``, you may specify incomplete ``paths`` (i.e.
    without extensions), as long as there are no path ambiguities.

    A :exc:`PathError` will be raised if any of the ``paths`` do not exist,
    or if there are any ambiguities with respect to incomplete paths.
387
388

    :arg paths:       List of paths to reduce.
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403

    :arg allowedExts: Allowed/recognised file extensions.

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
    """

    unique = []

    for path in paths:

        groupFiles = getFileGroup(path, allowedExts, fileGroups)

        if len(groupFiles) == 0:
            if path not in unique:
                unique.append(path)
404

405
406
407
408
        elif not any([p in unique for p in groupFiles]):
            unique.append(groupFiles[0])

    return unique
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441


def uniquePrefix(path):
    """Return the longest prefix for the given file name which unambiguously
    identifies it, relative to the other files in the same directory.

    Raises a :exc:`ValueError` if a unique prefix could not be found (which
    will never happen if the path is valid).
    """

    dirname, filename = op.split(path)

    idx    = 0
    prefix = op.join(dirname, filename[0])
    hits   = glob.glob('{}*'.format(prefix))

    while True:

        # Found a unique prefix
        if len(hits) == 1:
            break

        # Should never happen if path is valid
        elif len(hits) == 0 or idx >= len(filename) - 1:
            raise ValueError('No unique prefix for {}'.format(filename))

        # Not unique - continue looping
        else:
            idx    += 1
            prefix  = prefix + filename[idx]
            hits    = [h for h in hits if h.startswith(prefix)]

    return prefix