diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ce4881625db90f191f964d82c459f4b60ef54a03..7f82d1e5dd44526a94b1c4d7b14d25e5c0e5852d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,19 @@ This document contains the ``fslpy`` release history in reverse chronological order. + +3.21.1 (Friday 27th September 2024) +----------------------------------- + + +Changed +^^^^^^^ + +* The :func:`.fixlabels.loadLabelFile` and :func:`.fixlabels.saveLabelFile` + functions now support FIX label files which contain classification + probabilities, as generated by pyfix >= 0.9.0, and old FIX >= 1.069 (!464). + + 3.21.0 (Tuesday 23rd July 2024) ------------------------------- diff --git a/fsl/data/fixlabels.py b/fsl/data/fixlabels.py index 09918ce8d02e05bce5252547ea8f4e6cc6908577..4d0bd98a2fcaa489de1c208a0143df40e684db1f 100644 --- a/fsl/data/fixlabels.py +++ b/fsl/data/fixlabels.py @@ -17,6 +17,7 @@ import itertools as it +import math import os.path as op @@ -24,12 +25,12 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None, returnIndices=False, - missingLabel='Unknown'): - """Loads component labels from the specified file. The file is assuemd + missingLabel='Unknown', + returnProbabilities=False): + """Loads component labels from the specified file. The file is assumed to be of the format generated by FIX, Melview or ICA-AROMA; such a file should have a structure resembling the following:: - filtered_func_data.ica 1, Signal, False 2, Unclassified Noise, True @@ -41,7 +42,6 @@ def loadLabelFile(filename, 8, Signal, False [2, 5, 6, 7] - .. note:: This function will also parse files which only contain a component list, e.g.:: @@ -68,35 +68,46 @@ def loadLabelFile(filename, - One or more labels for the component (multiple labels must be comma-separated). - - ``'True'`` if the component has been classified as *bad*, - ``'False'`` otherwise. This field is optional - if the last - comma-separated token on a line is not equal (case-insensitive) - to ``True`` or ``False``, it is interpreted as a component label. + - ``'True'`` if the component has been classified as *bad*, ``'False'`` + otherwise. This field is optional - if the last non-numeric + comma-separated token on a line is not equal to ``True`` or ``False`` + (case-insensitive) , it is interpreted as a component label. + + - A value between 0 and 1, which gives the probability of the component + being signal, as generated by an automatic classifier (e.g. FIX). This + field is optional - it is output by some versions of FIX. The last line of the file contains the index (starting from 1) of all *bad* components, i.e. those components which are not classified as signal or unknown. - :arg filename: Name of the label file to load. + :arg filename: Name of the label file to load. - :arg includeLabel: If the file contains a single line containing a list - component indices, this label will be used for the - components in the list. Defaults to 'Unclassified - noise' for FIX-like files, and 'Movement' for - ICA-AROMA-like files. + :arg includeLabel: If the file contains a single line containing a + list component indices, this label will be used + for the components in the list. Defaults to + ``'Unclassified noise'`` for FIX-like files, and + ``'Movement'`` for ICA-AROMA-like files. - :arg excludeLabel: If the file contains a single line containing component - indices, this label will be used for the components - that are not in the list. Defaults to 'Signal' for - FIX-like files, and 'Unknown' for ICA-AROMA-like files. + :arg excludeLabel: If the file contains a single line containing + component indices, this label will be used for + the components that are not in the list. + Defaults to ``'Signal'`` for FIX-like files, and + ``'Unknown'`` for ICA-AROMA-like files. - :arg returnIndices: Defaults to ``False``. If ``True``, a list containing - the noisy component numbers that were listed in the - file is returned. + :arg returnIndices: Defaults to ``False``. If ``True``, a list + containing the noisy component numbers that were + listed in the file is returned. - :arg missingLabel: Label to use for any components which are not present - (only used for label files, not for noise component - files). + :arg missingLabel: Label to use for any components which are not + present (only used for label files, not for noise + component files). + + :arg returnProbabilities: Defaults to ``False``. If ``True``, a list + containing the component classification + probabilities is returned. If the file does not + contain probabilities, every value in this list + will be nan. :returns: A tuple containing: @@ -109,6 +120,9 @@ def loadLabelFile(filename, - If ``returnIndices is True``, a list of the noisy component indices (starting from 1) that were specified in the file. + - If ``returnProbabilities is True``, a list of the component + classification probabilities that were specified in the + file (all nan if they are not in the file). .. note:: Some label files generated by old versions of FIX/Melview do not contain a line for every component (unknown/unlabelled @@ -118,8 +132,9 @@ def loadLabelFile(filename, list may contain fewer entries than there are components. """ - signalLabels = None - filename = op.abspath(filename) + filename = op.abspath(filename) + probabilities = None + signalLabels = None with open(filename, 'rt') as f: lines = f.readlines() @@ -136,99 +151,23 @@ def loadLabelFile(filename, # of noise components (possibly preceeded by # the MELODIC directory path) if len(lines) <= 2: + melDir, noisyComps, allLabels, signalLabels = \ + _parseSingleLineLabelFile(lines, includeLabel, excludeLabel) + probabilities = [math.nan] * len(allLabels) - noisyComps = lines[-1] - - if len(lines) == 2: melDir = lines[0] - else: melDir = None - - # if the list is contained in - # square brackets, we assume - # that it is a FIX output file, - # where included components have - # been classified as noise, and - # excluded components as signal. - # - # Otherwise we assume that it - # is an AROMA file, where - # included components have - # been classified as being due - # to motion, and excluded - # components unclassified. - if includeLabel is None: - if noisyComps[0] == '[': includeLabel = 'Unclassified noise' - else: includeLabel = 'Movement' - - if excludeLabel is None: - if noisyComps[0] == '[': excludeLabel = 'Signal' - else: excludeLabel = 'Unknown' - else: - signalLabels = [excludeLabel] - - # Remove any leading/trailing - # whitespace or brackets. - noisyComps = noisyComps.strip(' []') - noisyComps = [int(i) for i in noisyComps.split(',')] - allLabels = [] - - for i in range(max(noisyComps)): - if (i + 1) in noisyComps: allLabels.append([includeLabel]) - else: allLabels.append([excludeLabel]) - - # Otherwise, we assume that - # it is a full label file. + # Otherwise, we assume that it is a full label file. else: - - melDir = lines[0] - noisyComps = lines[-1].strip(' []').split(',') - noisyComps = [c for c in noisyComps if c != ''] - noisyComps = [int(c) for c in noisyComps] - - # Parse the labels for every component. - # Initially store as a {comp : [labels]} dict. - allLabels = {} - for i, compLine in enumerate(lines[1:-1]): - - tokens = compLine.split(',') - tokens = [t.strip() for t in tokens] - - if len(tokens) < 3: - raise InvalidLabelFileError( - f'{filename} : Invalid FIX classification ' - f'file - line: {i + 1}: {compLine}') - - try: - compIdx = int(tokens[0]) - if compIdx in allLabels: - raise ValueError() - - except ValueError: - raise InvalidLabelFileError( - f'{filename}: Invalid FIX classification ' - f'file - line {i + 1}: {compLine}') - - if tokens[-1].lower() in ('true', 'false'): - compLabels = tokens[1:-1] - else: - compLabels = tokens[1:] - - allLabels[compIdx] = compLabels - - # Convert {comp : [labels]} into a list - # of lists, filling in missing components - allLabelsList = [] - for i in range(max(it.chain(allLabels.keys(), noisyComps))): - allLabelsList.append(allLabels.get(i + 1, [missingLabel])) - allLabels = allLabelsList + melDir, noisyComps, allLabels, probabilities = \ + _parseFullLabelFile(filename, lines, missingLabel) # There's no way to validate # the melodic directory path, # but let's try anyway. if melDir is not None: if len(melDir.split(',')) >= 3: - raise InvalidLabelFileError( - f'{filename}: First line does not look like ' - f'a MELODIC directory path: {melDir}') + raise InvalidLabelFileError( + f'{filename}: First line does not look like ' + f'a MELODIC directory path: {melDir}') # The melodic directory path should # either be an absolute path, or @@ -260,41 +199,184 @@ def loadLabelFile(filename, raise InvalidLabelFileError(f'{filename}: Noisy component {comp} ' 'is missing a noise label') - if returnIndices: return melDir, allLabels, noisyComps - else: return melDir, allLabels + retval = [melDir, allLabels] + + if returnIndices: retval.append(noisyComps) + if returnProbabilities: retval.append(probabilities) + + return tuple(retval) + + +def _parseSingleLineLabelFile(lines, includeLabel, excludeLabel): + """Called by :func:`loadLabelFile`. Parses the contents of an + ICA-AROMA-style label file which just contains a list of noise + components (and possibly the MELODIC directory path), e.g.:: + + filtered_func_data.ica + [2, 5, 6, 7] + """ + signalLabels = None + noisyComps = lines[-1] + + if len(lines) == 2: melDir = lines[0] + else: melDir = None + + # if the list is contained in + # square brackets, we assume + # that it is a FIX output file, + # where included components have + # been classified as noise, and + # excluded components as signal. + # + # Otherwise we assume that it + # is an AROMA file, where + # included components have + # been classified as being due + # to motion, and excluded + # components unclassified. + if includeLabel is None: + if noisyComps[0] == '[': includeLabel = 'Unclassified noise' + else: includeLabel = 'Movement' + + if excludeLabel is None: + if noisyComps[0] == '[': excludeLabel = 'Signal' + else: excludeLabel = 'Unknown' + else: + signalLabels = [excludeLabel] + + # Remove any leading/trailing + # whitespace or brackets. + noisyComps = noisyComps.strip(' []') + noisyComps = [int(i) for i in noisyComps.split(',')] + allLabels = [] + + for i in range(max(noisyComps)): + if (i + 1) in noisyComps: allLabels.append([includeLabel]) + else: allLabels.append([excludeLabel]) + + return melDir, noisyComps, allLabels, signalLabels + + +def _parseFullLabelFile(filename, lines, missingLabel): + """Called by :func:`loadLabelFile`. Parses the contents of a + FIX/Melview-style label file which contains labels for each component, + e.g.: + + filtered_func_data.ica + 1, Signal, False + 2, Unclassified Noise, True + 3, Unknown, False + 4, Signal, False + 5, Unclassified Noise, True + 6, Unclassified Noise, True + 7, Unclassified Noise, True + 8, Signal, False + [2, 5, 6, 7] + """ + melDir = lines[0] + noisyComps = lines[-1].strip(' []').split(',') + noisyComps = [c for c in noisyComps if c != ''] + noisyComps = [int(c) for c in noisyComps] + + # Parse the labels for every component. + # Initially store as a {comp : ([labels], probability)} dict. + allLabels = {} + for i, compLine in enumerate(lines[1:-1]): + + tokens = compLine.split(',') + tokens = [t.strip() for t in tokens] + + if len(tokens) < 3: + raise InvalidLabelFileError( + f'{filename}: Invalid FIX classification ' + f'file - line: {i + 1}: {compLine}') + + try: + compIdx = int(tokens[0]) + if compIdx in allLabels: + raise ValueError() + + except ValueError: + raise InvalidLabelFileError( + f'{filename}: Invalid FIX classification ' + f'file - line {i + 1}: {compLine}') + + tokens = tokens[1:] + probability = math.nan + + # last token could be classification probability + if _isfloat(tokens[-1]): + probability = float(tokens[-1]) + tokens = tokens[:-1] + + # true/false is ignored as it is superfluous + if tokens[-1].lower() in ('true', 'false'): + tokens = tokens[:-1] + + allLabels[compIdx] = tokens, probability + + # Convert {comp : [labels]} into a list + # of lists, filling in missing components + allLabelsList = [] + probabilities = [] + for i in range(max(it.chain(allLabels.keys(), noisyComps))): + labels, prob = allLabels.get(i + 1, ([missingLabel], math.nan)) + allLabelsList.append(labels) + probabilities.append(prob) + allLabels = allLabelsList + + return melDir, noisyComps, allLabels, probabilities + + +def _isfloat(s): + """Returns True if the given string appears to contain a floating + point number, False otherwise. + """ + try: + float(s) + return True + except Exception: + return False def saveLabelFile(allLabels, filename, dirname=None, listBad=True, - signalLabels=None): + signalLabels=None, + probabilities=None): """Saves the given classification labels to the specified file. The classifications are saved in the format described in the :func:`loadLabelFile` method. - :arg allLabels: A list of lists, one list for each component, where - each list contains the labels for the corresponding - component. + :arg allLabels: A list of lists, one list for each component, where + each list contains the labels for the corresponding + component. + + :arg filename: Name of the file to which the labels should be saved. - :arg filename: Name of the file to which the labels should be saved. + :arg dirname: If provided, is output as the first line of the file. + Intended to be a relative path to the MELODIC analysis + directory with which this label file is associated. If + not provided, a ``'.'`` is output as the first line. - :arg dirname: If provided, is output as the first line of the file. - Intended to be a relative path to the MELODIC analysis - directory with which this label file is associated. If - not provided, a ``'.'`` is output as the first line. + :arg listBad: If ``True`` (the default), the last line of the file + will contain a comma separated list of components which + are deemed 'noisy' (see :func:`isNoisyComponent`). - :arg listBad: If ``True`` (the default), the last line of the file - will contain a comma separated list of components which - are deemed 'noisy' (see :func:`isNoisyComponent`). + :arg signalLabels: Labels which should be deemed 'signal' - see the + :func:`isNoisyComponent` function. - :arg signalLabels: Labels which should be deemed 'signal' - see the - :func:`isNoisyComponent` function. + :arg probabilities: Classification probabilities. If provided, the + probability for each component is saved to the file. """ lines = [] noisyComps = [] + if probabilities is not None and len(probabilities) != len(allLabels): + raise ValueError('len(probabilities) != len(allLabels)') + # The first line - the melodic directory name if dirname is None: dirname = '.' @@ -312,6 +394,9 @@ def saveLabelFile(allLabels, labels = [l.replace(',', '_') for l in labels] tokens = [str(comp)] + labels + [str(noise)] + if probabilities is not None: + tokens.append(f'{probabilities[i]:0.6f}') + lines.append(', '.join(tokens)) if noise: @@ -347,4 +432,3 @@ class InvalidLabelFileError(Exception): """Exception raised by the :func:`loadLabelFile` function when an attempt is made to load an invalid label file. """ - pass diff --git a/fsl/tests/test_fixlabels.py b/fsl/tests/test_fixlabels.py index 308883e96e3eb7ab6626fac248d167e7569b22fe..2664e982d8aae818356afae53b3a3c3d728729b3 100644 --- a/fsl/tests/test_fixlabels.py +++ b/fsl/tests/test_fixlabels.py @@ -5,8 +5,9 @@ # Author: Paul McCarthy <pauldmccarthy@gmail.com> # -import os.path as op -import textwrap +import math +import os.path as op +import textwrap as tw import pytest @@ -178,10 +179,29 @@ path/to/analysis.ica ['Signal']], [1, 2])) +# Classification probabilities +goodfiles.append((""" +path/to/analysis.ica +1, Unclassified noise, True, 0.2 +2, Unclassified noise, True, 0.1 +3, Signal, False, 0.8 +[1, 2] +""", +'path/to/analysis.ica', +[['Unclassified noise'], + ['Unclassified noise'], + ['Signal']], +[1, 2], +[0.2, 0.1, 0.8])) + def test_loadLabelFile_good(): - for filecontents, expMelDir, expLabels, expIdxs in goodfiles: + for test in goodfiles: + filecontents, expMelDir, expLabels, expIdxs = test[:4] + + if len(test) > 4: probs = test[4] + else: probs = None with tests.testdir() as testdir: @@ -206,6 +226,11 @@ def test_loadLabelFile_good(): for exp, res in zip(expLabels, resLabels): assert exp == res + if probs is not None: + resMelDir, resLabels, resProbs = fixlabels.loadLabelFile( + fname, returnProbabilities=True) + assert resProbs == probs + @@ -316,7 +341,8 @@ def test_loadLabelFile_bad(): def test_loadLabelFile_customLabels(): included = [2, 3, 4, 5] - contents = '[{}]\n'.format([i + 1 for i in included]) + contents = ','.join([str(i + 1) for i in included]) + contents = f'[{contents}]\n' defIncLabel = 'Unclassified noise' defExcLabel = 'Signal' @@ -350,16 +376,74 @@ def test_loadLabelFile_customLabels(): assert ilbls[0] == excLabel -def test_saveLabelFile(): +def test_loadLabelFile_probabilities(): + + def lists_equal(a, b): + if len(a) != len(b): + return False + for av, bv in zip(a, b): + if av == bv: + continue + if math.isnan(av) and math.isnan(bv): + continue + if math.isnan(av) and (not math.isnan(bv)): + return False + if (not math.isnan(av)) and math.isnan(bv): + return False + + return True + + nan = math.nan + + testcases = [ + (""" + analysis.ica + 1, Signal, False + 2, Unclassified noise, True + 3, Signal, False + [2] + """, [nan, nan, nan]), + (""" + analysis.ica + 1, Signal, False, 0.1 + 2, Unclassified noise, True, 0.2 + 3, Signal, False, 0.3 + [2] + """, [0.1, 0.2, 0.3]), + (""" + analysis.ica + 1, Signal, False, 0.1 + 2, Unclassified noise, True + 3, Signal, False, 0.3 + [2] + """, [0.1, nan, 0.3]), + (""" + [1, 2, 3] + """, [nan, nan, nan]), + ] + + for contents, expprobs in testcases: + with tests.testdir() as testdir: + fname = op.join(testdir, 'labels.txt') + + with open(fname, 'wt') as f: + f.write(tw.dedent(contents).strip()) + + _, _, gotprobs = fixlabels.loadLabelFile( + fname, returnProbabilities=True) + + assert lists_equal(gotprobs, expprobs) +def test_saveLabelFile(): + labels = [['Label1', 'Label2', 'Label3'], ['Signal'], ['Noise'], ['Label1'], ['Unknown']] - expected = textwrap.dedent(""" + expected = tw.dedent(""" 1, Label1, Label2, Label3, True 2, Signal, False 3, Noise, True @@ -391,7 +475,7 @@ def test_saveLabelFile(): # Custom signal labels sigLabels = ['Label1'] - exp = textwrap.dedent(""" + exp = tw.dedent(""" . 1, Label1, Label2, Label3, False 2, Signal, True @@ -404,3 +488,31 @@ def test_saveLabelFile(): fixlabels.saveLabelFile(labels, fname, signalLabels=sigLabels) with open(fname, 'rt') as f: assert f.read().strip() == exp + + +def test_saveLabelFile_probabilities(): + + labels = [['Label1', 'Label2', 'Label3'], + ['Signal'], + ['Noise'], + ['Label1'], + ['Unknown']] + probs = [0.1, 0.2, 0.3, 0.4, 0.5] + + expected = tw.dedent(""" + 1, Label1, Label2, Label3, True, 0.100000 + 2, Signal, False, 0.200000 + 3, Noise, True, 0.300000 + 4, Label1, True, 0.400000 + 5, Unknown, False, 0.500000 + [1, 3, 4] + """).strip() + + with tests.testdir() as testdir: + fname = op.join(testdir, 'fname.txt') + + exp = '.\n{}'.format(expected) + fixlabels.saveLabelFile(labels, fname, probabilities=probs) + with open(fname, 'rt') as f: + got = f.read().strip() + assert got == exp diff --git a/fsl/transform/flirt.py b/fsl/transform/flirt.py index e1c84b7617e23eda40aac4692fe586d4942de372..5acd4b6ff86879897be5a108b1ca1bc2a6b5e29c 100644 --- a/fsl/transform/flirt.py +++ b/fsl/transform/flirt.py @@ -63,7 +63,7 @@ def fromFlirt(xform, src, ref, from_='voxel', to='world'): Valid values for the ``from_`` and ``to`` arguments are: - ``voxel``: The voxel coordinate system - - ``fsl``: The FSL coordiante system (voxels scaled by pixdims, with the + - ``fsl``: The FSL coordinate system (voxels scaled by pixdims, with the X axis inverted if the image sform/qform has a positive determinant) - ``world`` The world coordinate system