Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
#
# fixlabels.py - Functions for loading/saving FIX/ICA-AROMA label files.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module contains functions for loading/saving FIX/ICA-AROMA label files.
.. autosummary::
:nosignatures:
loadLabelFile
saveLabelFile
isNoisyComponent
InvalidLabelFileError
"""
import os.path as op
def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
"""Loads component labels from the specified file. The file is assuemd
to be of the format generated by FIX, Melview or ICA-AROMA; such a file
should have a structure resembling the following::
filtered_func_data.ica
1, Signal, False
2, Unclassified Noise, True
3, Unknown, False
4, Signal, False
5, Unclassified Noise, True
6, Unclassified Noise, True
7, Unclassified Noise, True
8, Signal, False
[2, 5, 6, 7]
.. note:: This function will also parse files which only contain a
component list, e.g.::
[2, 5, 6, 7]
The square brackets may or may not be present, i.e. the
following format is also accepted (this format is generated
by ICA-AROMA)::
2, 5, 6, 7
In this case, the returned melodic directory path will be
``None``. The ``includeLabel`` and ``excludeLabel`` arguments
allow you to control the labels assigned to included/excluded
components.
The first line of the file contains the name of the melodic directory.
Then, one line is present for each component, containing the following,
separated by commas:
- The component index (starting from 1).
- One or more labels for the component (multiple labels must be
comma-separated).
- ``'True'`` if the component has been classified as *bad*,
``'False'`` otherwise. This field is optional - if the last
comma-separated token on a line is not equal (case-insensitive)
to ``True`` or ``False``, it is interpreted as a component label.
The last line of the file contains the index (starting from 1) of all
*bad* components, i.e. those components which are not classified as
signal or unknown.
:arg filename: Name of the label file to load.
:arg includeLabel: If the file contains a single line containing a list
component indices, this label will be used for the
components in the list. Defaults to 'Unclassified
noise' for FIX-like files, and 'Movement' for
ICA-AROMA-like files.
:arg excludeLabel: If the file contains a single line containing component
indices, this label will be used for the components
that are not in the list. Defaults to 'Signal' for
FIX-like files, and 'Unknown' for ICA-AROMA-like files.
:returns: A tuple containing the path to the melodic directory
as specified in the label file, and a list of lists, one
list per component, with each list containing the labels for
the corresponding component.
"""
signalLabels = None
filename = op.abspath(filename)
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
with open(filename, 'rt') as f:
lines = f.readlines()
if len(lines) < 1:
raise InvalidLabelFileError('Invalid FIX classification '
'file - not enough lines')
lines = [l.strip() for l in lines]
lines = [l for l in lines if l != '']
# If the file contains a single
# line, we assume that it is just
# a comma-separated list of noise
# components.
if len(lines) == 1:
line = lines[0]
# if the list is contained in
# square brackets, we assume
# that it is a FIX output file,
# where included components have
# been classified as noise, and
# excluded components as signal.
#
# Otherwise we assume that it
# is an AROMA file, where
# included components have
# been classified as being due
# to motion, and excluded
# components unclassified.
if includeLabel is None:
if line[0] == '[': includeLabel = 'Unclassified noise'
else: includeLabel = 'Movement'
if excludeLabel is None:
if line[0] == '[': excludeLabel = 'Signal'
else: excludeLabel = 'Unknown'
else:
signalLabels = [excludeLabel]
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# Remove any leading/trailing
# whitespace or brackets.
line = lines[0].strip(' []')
melDir = None
noisyComps = [int(i) for i in line.split(',')]
allLabels = []
for i in range(max(noisyComps)):
if (i + 1) in noisyComps: allLabels.append([includeLabel])
else: allLabels.append([excludeLabel])
# Otherwise, we assume that
# it is a full label file.
else:
melDir = lines[0]
noisyComps = lines[-1].strip(' []').split(',')
noisyComps = [c for c in noisyComps if c != '']
noisyComps = [int(c) for c in noisyComps]
# The melodic directory path should
# either be an absolute path, or
# be specified relative to the location
# of the label file.
if not op.isabs(melDir):
melDir = op.join(op.dirname(filename), melDir)
# Parse the labels for every component
allLabels = []
for i, compLine in enumerate(lines[1:-1]):
tokens = compLine.split(',')
tokens = [t.strip() for t in tokens]
if len(tokens) < 3:
raise InvalidLabelFileError(
'Invalid FIX classification file - '
'line {}: {}'.format(i + 1, compLine))
try:
compIdx = int(tokens[0])
except:
raise InvalidLabelFileError(
'Invalid FIX classification file - '
'line {}: {}'.format(i + 1, compLine))
if tokens[-1].lower() in ('true', 'false'):
compLabels = tokens[1:-1]
else:
compLabels = tokens[1:]
if compIdx != i + 1:
raise InvalidLabelFileError(
'Invalid FIX classification file - wrong component '
'number at line {}: {}'.format(i + 1, compLine))
allLabels.append(compLabels)
# Validate the labels against
# the noisy list - all components
# in the noisy list should not
# have 'signal' or 'unknown' labels
for i, labels in enumerate(allLabels):
comp = i + 1
noise = isNoisyComponent(labels, signalLabels)
if noise and (comp not in noisyComps):
print(signalLabels)
raise InvalidLabelFileError('Noisy component {} has invalid '
'labels: {}'.format(comp, labels))
for comp in noisyComps:
i = comp - 1
labels = allLabels[i]
noise = isNoisyComponent(labels, signalLabels)
if not noise:
raise InvalidLabelFileError('Noisy component {} is missing '
'a noise label'.format(comp))
return melDir, allLabels
def saveLabelFile(allLabels,
filename,
dirname=None,
listBad=True,
signalLabels=None):
"""Saves the given classification labels to the specified file. The
classifications are saved in the format described in the
:func:`loadLabelFile` method.
:arg allLabels: A list of lists, one list for each component, where
each list contains the labels for the corresponding
component.
:arg filename: Name of the file to which the labels should be saved.
:arg dirname: If provided, is output as the first line of the file.
Intended to be a relative path to the MELODIC analysis
directory with which this label file is associated.
:arg listBad: If ``True`` (the default), the last line of the file
will contain a comma separated list of components which
are deemed 'noisy' (see :func:`isNoisyComponent`).
:arg signalLabels: Labels which should be deemed 'signal' - see the
:func:`isNoisyComponent` function.
"""
lines = []
noisyComps = []
# The first line - the melodic directory name
if dirname is not None:
lines.append(dirname)
# A line for each component
for i, labels in enumerate(allLabels):
comp = i + 1
noise = isNoisyComponent(labels, signalLabels)
# Make sure there are no
# commas in any label names
labels = [l.replace(',', '_') for l in labels]
tokens = [str(comp)] + labels + [str(noise)]
lines.append(', '.join(tokens))
if noise:
noisyComps.append(comp)
# A line listing the bad components
if listBad:
lines.append('[' + ', '.join([str(c) for c in noisyComps]) + ']')
with open(filename, 'wt') as f:
f.write('\n'.join(lines) + '\n')
def isNoisyComponent(labels, signalLabels=None):
"""Given a set of component labels, returns ``True`` if the component
is ultimately classified as noise, ``False`` otherwise.
:arg signalLabels: Labels which are deemed signal. If a component has
no labels in this list, it is deemed noise. Defaults
to ``['Signal', 'Unknown']`.
"""
if signalLabels is None:
signalLabels = ['signal', 'unknown']
signalLabels = [l.lower() for l in signalLabels]
labels = [l.lower() for l in labels]
noise = not any([sl in labels for sl in signalLabels])
return noise
class InvalidLabelFileError(Exception):
"""Exception raised by the :func:`loadLabelFile` function when an attempt
is made to load an invalid label file.
"""
pass