diff --git a/fsl/data/fixlabels.py b/fsl/data/fixlabels.py index 09918ce8d02e05bce5252547ea8f4e6cc6908577..8388daacef2d07ccdfc4445d10fb0ee35c9e274f 100644 --- a/fsl/data/fixlabels.py +++ b/fsl/data/fixlabels.py @@ -17,6 +17,7 @@ import itertools as it +import math import os.path as op @@ -24,12 +25,12 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None, returnIndices=False, - missingLabel='Unknown'): + missingLabel='Unknown', + returnProbabilities=False): """Loads component labels from the specified file. The file is assuemd to be of the format generated by FIX, Melview or ICA-AROMA; such a file should have a structure resembling the following:: - filtered_func_data.ica 1, Signal, False 2, Unclassified Noise, True @@ -41,7 +42,6 @@ def loadLabelFile(filename, 8, Signal, False [2, 5, 6, 7] - .. note:: This function will also parse files which only contain a component list, e.g.:: @@ -68,35 +68,46 @@ def loadLabelFile(filename, - One or more labels for the component (multiple labels must be comma-separated). - - ``'True'`` if the component has been classified as *bad*, - ``'False'`` otherwise. This field is optional - if the last - comma-separated token on a line is not equal (case-insensitive) - to ``True`` or ``False``, it is interpreted as a component label. + - ``'True'`` if the component has been classified as *bad*, ``'False'`` + otherwise. This field is optional - if the last non-numeric + comma-separated token on a line is not equal to ``True`` or ``False`` + (case-insensitive) , it is interpreted as a component label. + + - A value between 0 and 1, which gives the probability of the component + being signal, as generated by an automatic classifier (e.g. FIX). This + field is optional - it is output by some versions of FIX. The last line of the file contains the index (starting from 1) of all *bad* components, i.e. those components which are not classified as signal or unknown. - :arg filename: Name of the label file to load. + :arg filename: Name of the label file to load. - :arg includeLabel: If the file contains a single line containing a list - component indices, this label will be used for the - components in the list. Defaults to 'Unclassified - noise' for FIX-like files, and 'Movement' for - ICA-AROMA-like files. + :arg includeLabel: If the file contains a single line containing a + list component indices, this label will be used + for the components in the list. Defaults to + ``'Unclassified noise'`` for FIX-like files, and + ``'Movement'`` for ICA-AROMA-like files. - :arg excludeLabel: If the file contains a single line containing component - indices, this label will be used for the components - that are not in the list. Defaults to 'Signal' for - FIX-like files, and 'Unknown' for ICA-AROMA-like files. + :arg excludeLabel: If the file contains a single line containing + component indices, this label will be used for + the components that are not in the list. + Defaults to ``'Signal'`` for FIX-like files, and + ``'Unknown'`` for ICA-AROMA-like files. - :arg returnIndices: Defaults to ``False``. If ``True``, a list containing - the noisy component numbers that were listed in the - file is returned. + :arg returnIndices: Defaults to ``False``. If ``True``, a list + containing the noisy component numbers that were + listed in the file is returned. - :arg missingLabel: Label to use for any components which are not present - (only used for label files, not for noise component - files). + :arg missingLabel: Label to use for any components which are not + present (only used for label files, not for noise + component files). + + :arg returnProbabilities: Defaults to ``False``. If ``True``, a list + containing the component classification + probabilities is returned. If the file does not + contain probabilities, every value in this list + will be nan. :returns: A tuple containing: @@ -109,6 +120,9 @@ def loadLabelFile(filename, - If ``returnIndices is True``, a list of the noisy component indices (starting from 1) that were specified in the file. + - If ``returnProbabilities is True``, a list of the component + classification probabilities that were specified in the + file (all nan if they are not in the file). .. note:: Some label files generated by old versions of FIX/Melview do not contain a line for every component (unknown/unlabelled @@ -118,8 +132,9 @@ def loadLabelFile(filename, list may contain fewer entries than there are components. """ - signalLabels = None - filename = op.abspath(filename) + filename = op.abspath(filename) + probabilities = None + signalLabels = None with open(filename, 'rt') as f: lines = f.readlines() @@ -136,99 +151,23 @@ def loadLabelFile(filename, # of noise components (possibly preceeded by # the MELODIC directory path) if len(lines) <= 2: + melDir, noisyComps, allLabels, signalLabels = \ + _parseSingleLineLabelFile(lines, includeLabel, excludeLabel) + probabilities = [math.nan] * len(allLabels) - noisyComps = lines[-1] - - if len(lines) == 2: melDir = lines[0] - else: melDir = None - - # if the list is contained in - # square brackets, we assume - # that it is a FIX output file, - # where included components have - # been classified as noise, and - # excluded components as signal. - # - # Otherwise we assume that it - # is an AROMA file, where - # included components have - # been classified as being due - # to motion, and excluded - # components unclassified. - if includeLabel is None: - if noisyComps[0] == '[': includeLabel = 'Unclassified noise' - else: includeLabel = 'Movement' - - if excludeLabel is None: - if noisyComps[0] == '[': excludeLabel = 'Signal' - else: excludeLabel = 'Unknown' - else: - signalLabels = [excludeLabel] - - # Remove any leading/trailing - # whitespace or brackets. - noisyComps = noisyComps.strip(' []') - noisyComps = [int(i) for i in noisyComps.split(',')] - allLabels = [] - - for i in range(max(noisyComps)): - if (i + 1) in noisyComps: allLabels.append([includeLabel]) - else: allLabels.append([excludeLabel]) - - # Otherwise, we assume that - # it is a full label file. + # Otherwise, we assume that it is a full label file. else: - - melDir = lines[0] - noisyComps = lines[-1].strip(' []').split(',') - noisyComps = [c for c in noisyComps if c != ''] - noisyComps = [int(c) for c in noisyComps] - - # Parse the labels for every component. - # Initially store as a {comp : [labels]} dict. - allLabels = {} - for i, compLine in enumerate(lines[1:-1]): - - tokens = compLine.split(',') - tokens = [t.strip() for t in tokens] - - if len(tokens) < 3: - raise InvalidLabelFileError( - f'{filename} : Invalid FIX classification ' - f'file - line: {i + 1}: {compLine}') - - try: - compIdx = int(tokens[0]) - if compIdx in allLabels: - raise ValueError() - - except ValueError: - raise InvalidLabelFileError( - f'{filename}: Invalid FIX classification ' - f'file - line {i + 1}: {compLine}') - - if tokens[-1].lower() in ('true', 'false'): - compLabels = tokens[1:-1] - else: - compLabels = tokens[1:] - - allLabels[compIdx] = compLabels - - # Convert {comp : [labels]} into a list - # of lists, filling in missing components - allLabelsList = [] - for i in range(max(it.chain(allLabels.keys(), noisyComps))): - allLabelsList.append(allLabels.get(i + 1, [missingLabel])) - allLabels = allLabelsList + melDir, noisyComps, allLabels, probabilities = \ + _parseFullLabelFile(filename, lines, missingLabel) # There's no way to validate # the melodic directory path, # but let's try anyway. if melDir is not None: if len(melDir.split(',')) >= 3: - raise InvalidLabelFileError( - f'{filename}: First line does not look like ' - f'a MELODIC directory path: {melDir}') + raise InvalidLabelFileError( + f'{filename}: First line does not look like ' + f'a MELODIC directory path: {melDir}') # The melodic directory path should # either be an absolute path, or @@ -260,8 +199,144 @@ def loadLabelFile(filename, raise InvalidLabelFileError(f'{filename}: Noisy component {comp} ' 'is missing a noise label') - if returnIndices: return melDir, allLabels, noisyComps - else: return melDir, allLabels + retval = [melDir, allLabels] + + if returnIndices: retval.append(noisyComps) + if returnProbabilities: retval.append(probabilities) + + return tuple(retval) + + +def _parseSingleLineLabelFile(lines, includeLabel, excludeLabel): + """Called by :func:`loadLabelFile`. Parses the contents of an + ICA-AROMA-style label file which just contains a list of noise + components (and possibly the MELODIC directory path), e.g.:: + + filtered_func_data.ica + [2, 5, 6, 7] + """ + signalLabels = None + noisyComps = lines[-1] + + if len(lines) == 2: melDir = lines[0] + else: melDir = None + + # if the list is contained in + # square brackets, we assume + # that it is a FIX output file, + # where included components have + # been classified as noise, and + # excluded components as signal. + # + # Otherwise we assume that it + # is an AROMA file, where + # included components have + # been classified as being due + # to motion, and excluded + # components unclassified. + if includeLabel is None: + if noisyComps[0] == '[': includeLabel = 'Unclassified noise' + else: includeLabel = 'Movement' + + if excludeLabel is None: + if noisyComps[0] == '[': excludeLabel = 'Signal' + else: excludeLabel = 'Unknown' + else: + signalLabels = [excludeLabel] + + # Remove any leading/trailing + # whitespace or brackets. + noisyComps = noisyComps.strip(' []') + noisyComps = [int(i) for i in noisyComps.split(',')] + allLabels = [] + + for i in range(max(noisyComps)): + if (i + 1) in noisyComps: allLabels.append([includeLabel]) + else: allLabels.append([excludeLabel]) + + return melDir, noisyComps, allLabels, signalLabels + + +def _parseFullLabelFile(filename, lines, missingLabel): + """Called by :func:`loadLabelFile`. Parses the contents of a + FIX/Melview-style label file which contains labels for each component, + e.g.: + + filtered_func_data.ica + 1, Signal, False + 2, Unclassified Noise, True + 3, Unknown, False + 4, Signal, False + 5, Unclassified Noise, True + 6, Unclassified Noise, True + 7, Unclassified Noise, True + 8, Signal, False + [2, 5, 6, 7] + """ + melDir = lines[0] + noisyComps = lines[-1].strip(' []').split(',') + noisyComps = [c for c in noisyComps if c != ''] + noisyComps = [int(c) for c in noisyComps] + + # Parse the labels for every component. + # Initially store as a {comp : ([labels], probability)} dict. + allLabels = {} + for i, compLine in enumerate(lines[1:-1]): + + tokens = compLine.split(',') + tokens = [t.strip() for t in tokens] + + if len(tokens) < 3: + raise InvalidLabelFileError( + f'{filename}: Invalid FIX classification ' + f'file - line: {i + 1}: {compLine}') + + try: + compIdx = int(tokens[0]) + if compIdx in allLabels: + raise ValueError() + + except ValueError: + raise InvalidLabelFileError( + f'{filename}: Invalid FIX classification ' + f'file - line {i + 1}: {compLine}') + + tokens = tokens[1:] + probability = math.nan + + # last token could be classification probability + if _isfloat(tokens[-1]): + probability = float(tokens[-1]) + tokens = tokens[:-1] + + # true/false is ignored as it is superfluous + if tokens[-1].lower() in ('true', 'false'): + tokens = tokens[:-1] + + allLabels[compIdx] = tokens, probability + + # Convert {comp : [labels]} into a list + # of lists, filling in missing components + allLabelsList = [] + probabilities = [] + for i in range(max(it.chain(allLabels.keys(), noisyComps))): + labels, prob = allLabels.get(i + 1, ([missingLabel], math.nan)) + allLabelsList.append(labels) + probabilities.append(prob) + allLabels = allLabelsList + + return melDir, noisyComps, allLabels, probabilities + + +def _isfloat(s): + """Returns True if the given string appears to contain a floating + point number, False otherwise. + """ + try: + float(s) + return True + except Exception: + return False def saveLabelFile(allLabels,