path.py 13.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env python
#
# path.py - Utility functions for working with file/directory paths.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module contains a few utility functions for working with file system
paths.


.. autosummary::
   :nosignatures:

   deepest
15
   shallowest
16
   hasExt
17
18
   addExt
   removeExt
19
   getExt
20
21
   splitExt
   getFileGroup
22
   removeDuplicates
23
   uniquePrefix
24
25
26
"""


27
import            glob
28
29
30
import os.path as op


31
class PathError(Exception):
32
33
34
    """``Exception`` class raised by the functions defined in this module
    when something goes wrong.
    """
35
36
37
    pass


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def deepest(path, suffixes):
    """Finds the deepest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
    """

    path = path.strip()

    if path == op.sep or path == '':
        return None

    path = path.rstrip(op.sep)

    if any([path.endswith(s) for s in suffixes]):
        return path

    return deepest(op.dirname(path), suffixes)


def shallowest(path, suffixes):
    """Finds the shallowest directory which ends with one of the given
    sequence of suffixes, or returns ``None`` if no directories end
    with any of the suffixes.
61
62
    """

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    path = path.strip()

    # We've reached the root of the file system
    if path == op.sep or path == '':
        return None

    path   = path.rstrip(op.sep)
    parent = shallowest(op.dirname(path), suffixes)

    if parent is not None:
        return parent

    if any([path.endswith(s) for s in suffixes]):
        return path

78
    return None
79
80


81
82
83
84
85
86
87
def hasExt(path, allowedExts):
    """Convenience function which returns ``True`` if the given ``path``
    ends with any of the given ``allowedExts``, ``False`` otherwise.
    """
    return any([path.endsWith(e) for e in allowedExts])


88
def addExt(prefix,
89
           allowedExts=None,
90
91
           mustExist=True,
           defaultExt=None,
92
93
           fileGroups=None,
           unambiguous=True):
94
95
    """Adds a file extension to the given file ``prefix``.

96
    If ``mustExist`` is False, and the file does not already have a
97
98
99
100
    supported extension, the default extension is appended and the new
    file name returned. If the prefix already has a supported extension,
    it is returned unchanged.

101
102
    If ``mustExist`` is ``True`` (the default), the function checks to see
    if any files exist that have the given prefix, and a supported file
103
    extension.  A :exc:`PathError` is raised if:
104
105

       - No files exist with the given prefix and a supported extension.
106

107
108
       - ``fileGroups is None`` and ``unambiguous is True``, and more than
         one file exists with the given prefix, and a supported extension.
109
110
111

    Otherwise the full file name is returned.

112
113
    :arg prefix:      The file name prefix to modify.

114
    :arg allowedExts: List of allowed file extensions.
115

116
    :arg mustExist:   Whether the file must exist or not.
117

118
    :arg defaultExt:  Default file extension to use.
119
120

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
121
122
123
124
125

    :arg unambiguous: If ``True`` (the default), and more than one file
                      exists with the specified ``prefix``, a
                      :exc:`PathError` is raised. Otherwise, a list
                      containing *all* matching files is returned.
126
127
    """

128
129
130
131
132
    if allowedExts is None: allowedExts = []
    if fileGroups  is None: fileGroups  = {}

    if defaultExt is not None and defaultExt not in allowedExts:
        allowedExts.append(defaultExt)
133

134
135
136
    if not mustExist:

        # the provided file name already
Paul McCarthy's avatar
Paul McCarthy committed
137
        # ends with a supported extension
138
        if hasExt(prefix, allowedExts):
139
140
141
            return prefix

        if defaultExt is not None: return prefix + defaultExt
142
143
144
145
146
147
148
        else:                      return prefix

    # If no allowed extensions were
    # provided, or the provided prefix
    # already ends with a supported
    # extension, check to see that it
    # exists.
149
    if len(allowedExts) == 0 or hasExt(prefix, allowedExts):
150
        allPaths = [prefix]
151

152
153
154
155
    # Otherwise, make a bunch of file names, one per
    # supported extension, and test to see if exactly
    # one of them exists.
    else:
156
        allPaths = [prefix + ext for ext in allowedExts]
157

158
159
    allPaths = [p for p in allPaths if op.isfile(p)]
    nexists  = len(allPaths)
160
161
162

    # Could not find any supported file
    # with the specified prefix
163
164
    if nexists == 0:
        raise PathError('Could not find a supported file '
165
                        'with prefix "{}"'.format(prefix))
166

167
168
169
170
171
172
173
174
    # If ambiguity is ok, return
    # all matching paths
    elif not unambiguous:
        return allPaths

    # Ambiguity is not ok! More than
    # one supported file with the
    # specified prefix.
175
176
177
178
    elif nexists > 1:

        # Remove non-existent paths from the
        # extended list, get all their
179
180
181
182
183
184
185
186
187
188
        # suffixes, and see if they match
        # any file groups.
        suffixes     = [getExt(p, allowedExts) for p in allPaths]
        groupMatches = [sorted(suffixes) == sorted(g) for g in fileGroups]

        # Is there a match for a file suffix group?
        # If not, multiple files with the specified
        # prefix exist, and there is no way to
        # resolve the ambiguity.
        if sum(groupMatches) != 1:
189
            raise PathError('More than one file with '
190
191
                            'prefix "{}"'.format(prefix))

192
193
        # Otherwise, we return a path
        # to the file which matches the
194
195
196
        # first suffix in the group.
        groupIdx = groupMatches.index(True)
        allPaths = [prefix + fileGroups[groupIdx][0]]
197
198
199

    # Return the full file name of the
    # supported file that was found
200
    return allPaths[0]
201
202


203
204
def removeExt(filename, allowedExts=None):
    """Returns the base name of the given file name.  See :func:`splitExt`. """
205

206
    return splitExt(filename, allowedExts)[0]
207
208


209
210
def getExt(filename, allowedExts=None):
    """Returns the extension of the given file name.  See :func:`splitExt`. """
211

212
    return splitExt(filename, allowedExts)[1]
213
214


215
216
def splitExt(filename, allowedExts=None):
    """Returns the base name and the extension from the given file name.
217
218

    If ``allowedExts`` is ``None``, this function is equivalent to using::
219

220
        os.path.splitext(filename)
221
222

    If ``allowedExts`` is provided, but the file does not end with an allowed
223
    extension, a tuple containing ``(filename, '')`` is returned.
224

225
    :arg filename:    The file name to split.
226

227
228
229
230
231
232
    :arg allowedExts: Allowed/recognised file extensions.
    """

    # If allowedExts is not specified,
    # we just use op.splitext
    if allowedExts is None:
233
        return op.splitext(filename)
234
235
236
237

    # Otherwise, try and find a suffix match
    extMatches = [filename.endswith(ext) for ext in allowedExts]

238
    # No match, assume there is no extension
239
    if not any(extMatches):
240
        return filename, ''
241

242
    # Otherwise split the filename
243
    # into its base and its extension
244
    extIdx = extMatches.index(True)
245
246
247
248
249
    extLen = len(allowedExts[extIdx])

    return filename[:-extLen], filename[-extLen:]


250
251
252
253
254
def getFileGroup(path,
                 allowedExts=None,
                 fileGroups=None,
                 fullPaths=True,
                 unambiguous=False):
255
    """If the given ``path`` is part of a ``fileGroup``, returns a list
256
257
258
    containing the paths to all other files in the group (including the
    ``path`` itself).

259
260
261
262
    If the ``path`` does not appear to be part of a file group, or appears to
    be part of an incomplete file group, a list containing only the ``path``
    is returned.

263
    If the ``path`` does not exist, or appears to be part of more than one
264
    file group, a :exc:`PathError` is raised.
265
266
267
268
269
270

    File groups can be used to specify a collection of file suffixes which
    should always exist alongside each other. This can be used to resolve
    ambiguity when multiple files exist with the same ``prefix`` and supported
    extensions (e.g. ``file.hdr`` and ``file.img``). The file groups are
    specified as a list of sequences, for example::
271

272
273
        [('.img',    '.hdr'),
         ('.img.gz', '.hdr.gz')]
274

275
    If you specify ``fileGroups=[('.img', '.hdr')]`` and ``prefix='file'``, and
276
277
278
279
    both ``file.img`` and ``file.hdr`` exist, the :func:`addExt` function would
    return ``file.img`` (i.e. the file which matches the first extension in
    the group).

280
281
282
    Similarly, if you call the :func:`.imcp.imcp` or :func:`.imcp.immv`
    functions with the above parameters, both ``file.img`` and ``file.hdr``
    will be moved.
283
284
285
286

    .. note:: The primary use-case of file groups is to resolve ambiguity with
              respect to NIFTI and ANALYSE75 image pairs. By specifying
              ``fileGroups=[('.img', '.hdr'), ('.img.gz', '.hdr.gz')]``, the
287
288
289
290
              :func:`addExt`, :func:`.imcp.immv` and :func:`.imcp.imcp`
              functions are able to figure out what you mean when you specify
              ``file``, and both ``file.hdr`` and ``file.img`` (or
              ``file.hdr.gz`` and ``file.img.gz``) exist.
291

292
    :arg path:        Path to the file. Must contain the file extension.
293

294
    :arg allowedExts: Allowed/recognised file extensions.
295

296
    :arg fileGroups:  Recognised file groups.
297

298
299
300
    :arg fullPaths:   If ``True`` (the default), full file paths (relative to
                      the ``path``) are returned. Otherwise, only the file
                      extensions in the group are returned.
301

302
    :arg unambiguous: Defaults to ``False``. If ``True``, and the path
303
                      is not unambiguously part of one group, or part of
304
305
                      no groups, a :exc:`PathError` is raised.
                      Otherwise, the path is returned.
306
307
    """

308
    path = addExt(path, allowedExts, mustExist=True, fileGroups=fileGroups)
309
    base, ext = splitExt(path, allowedExts)
310

311
312
313
    if fileGroups is None:
        if fullPaths: return [path]
        else:         return [ext]
314
315
316

    matchedGroups     = []
    matchedGroupFiles = []
317
318
    fullMatches       = 0
    partialMatches    = 0
319
320
321

    for group in fileGroups:

322
        if ext != '' and ext not in group:
323
324
325
            continue

        groupFiles = [base + s for s in group]
326
        exist      = [op.exists(f) for f in groupFiles]
327

328
329
        if any(exist):
            partialMatches += 1
330

331
332
333
334
        if all(exist):
            fullMatches += 1
            matchedGroups    .append(group)
            matchedGroupFiles.append(groupFiles)
335

336
    # Path is not part of any group
337
    if partialMatches == 0:
338
339
        if fullPaths: return [path]
        else:         return [ext]
340

341
342
    # If the given path is part of more
    # than one existing file group, we
343
    # can't resolve this ambiguity.
344
    if fullMatches > 1:
345
346
347
        raise PathError('Path is part of multiple '
                        'file groups: {}'.format(path))

348
349
350
351
352
353
354
355
    # If the unambiguous flag is not set,
    # we don't care about partial matches
    if not unambiguous:
        partialMatches = 0

    # The path is unambiguously part of a
    # complete file group - resolve it to
    # the first element of the group
Paul McCarthy's avatar
Paul McCarthy committed
356
    if fullMatches == 1 and partialMatches <= 1:
357
358
359
360
361
        if fullPaths: return matchedGroupFiles[0]
        else:         return matchedGroups[    0]

    # The path appears to be part of
    # an incomplete group - this is
Paul McCarthy's avatar
Paul McCarthy committed
362
    # potentially ambiguous, so give
363
364
365
366
367
    # up (but see the partialMatches
    # clobber above).
    elif partialMatches > 0:
        raise PathError('Path is part of an incomplete '
                        'file group: {}'.format(path))
368

369
370
371
372
    else:
        if fullPaths: return [path]
        else:         return [ext]

373
374

def removeDuplicates(paths, allowedExts=None, fileGroups=None):
375
    """Reduces the list of ``paths`` down to those which are unique with
376
377
378
    respect to the specified ``fileGroups``.

    For example, if you have a directory containing::
379

380
381
382
383
384
385
386
387
388
389
390
391
        001.hdr
        001.img
        002.hdr
        002.img
        003.hdr
        003.img

    And you call ``removeDuplicates`` like so::

         paths       = ['001.img', '001.hdr',
                        '002.img', '002.hdr',
                        '003.img', '003.hdr']
392

393
394
395
396
397
398
399
400
401
402
403
404
405
406
         allowedExts = ['.img',  '.hdr']
         fileGroups  = [('.img', '.hdr')]

         removeDuplicates(paths, allowedExts, fileGroups)

    The returned list will be::

         ['001.img', '002.img', '003.img']

    If you provide ``allowedExts``, you may specify incomplete ``paths`` (i.e.
    without extensions), as long as there are no path ambiguities.

    A :exc:`PathError` will be raised if any of the ``paths`` do not exist,
    or if there are any ambiguities with respect to incomplete paths.
407
408

    :arg paths:       List of paths to reduce.
409
410
411
412
413
414
415
416
417
418
419
420

    :arg allowedExts: Allowed/recognised file extensions.

    :arg fileGroups:  Recognised file groups - see :func:`getFileGroup`.
    """

    unique = []

    for path in paths:

        groupFiles = getFileGroup(path, allowedExts, fileGroups)

421
        if not any([p in unique for p in groupFiles]):
422
423
424
            unique.append(groupFiles[0])

    return unique
425
426
427
428
429
430


def uniquePrefix(path):
    """Return the longest prefix for the given file name which unambiguously
    identifies it, relative to the other files in the same directory.

431
    Raises a :exc:`PathError` if a unique prefix could not be found (which
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
    will never happen if the path is valid).
    """

    dirname, filename = op.split(path)

    idx    = 0
    prefix = op.join(dirname, filename[0])
    hits   = glob.glob('{}*'.format(prefix))

    while True:

        # Found a unique prefix
        if len(hits) == 1:
            break

        # Should never happen if path is valid
        elif len(hits) == 0 or idx >= len(filename) - 1:
449
            raise PathError('No unique prefix for {}'.format(filename))
450
451
452
453
454
455
456
457

        # Not unique - continue looping
        else:
            idx    += 1
            prefix  = prefix + filename[idx]
            hits    = [h for h in hits if h.startswith(prefix)]

    return prefix