Skip to content
Snippets Groups Projects
Commit 431c4d59 authored by Paul McCarthy's avatar Paul McCarthy :mountain_bicyclist:
Browse files

Merge branch 'enh/extract_noise_script' into 'master'

Enh/extract noise script

See merge request fsl/fslpy!58
parents ac3fd5c1 904de71a
No related branches found
No related tags found
No related merge requests found
...@@ -32,17 +32,17 @@ To aid readability, all commit messages should be prefixed with one or more of ...@@ -32,17 +32,17 @@ To aid readability, all commit messages should be prefixed with one or more of
the following labels (this convention has been inherited from `nibabel the following labels (this convention has been inherited from `nibabel
<https://github.com/nipy/nibabel>`_): <https://github.com/nipy/nibabel>`_):
* *BF* : bug fix * *BF* : bug fix
* *RF* : refactoring * *RF* : refactoring
* *NF* : new feature * *NF* : new feature
* *BW* : addresses backward-compatibility * *BW* : addresses backward-compatibility
* *OPT* : optimization * *OPT* : optimization
* *BK* : breaks something and/or tests fail * *BK* : breaks something and/or tests fail
* *PL* : making pylint happier * *PL* : making pylint happier
* *DOC*: for all kinds of documentation related commits * *DOC* : for all kinds of documentation related commits
* *TEST*: for adding or changing tests * *TEST*: for adding or changing tests
* *MAINT*: for administrative/maintenance changes * *MNT* : for administrative/maintenance changes
* *CI*: for continuous-integration changes * *CI* : for continuous-integration changes
Version number Version number
......
...@@ -19,7 +19,10 @@ ...@@ -19,7 +19,10 @@
import os.path as op import os.path as op
def loadLabelFile(filename, includeLabel=None, excludeLabel=None): def loadLabelFile(filename,
includeLabel=None,
excludeLabel=None,
returnIndices=False):
"""Loads component labels from the specified file. The file is assuemd """Loads component labels from the specified file. The file is assuemd
to be of the format generated by FIX, Melview or ICA-AROMA; such a file to be of the format generated by FIX, Melview or ICA-AROMA; such a file
should have a structure resembling the following:: should have a structure resembling the following::
...@@ -70,23 +73,30 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None): ...@@ -70,23 +73,30 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
*bad* components, i.e. those components which are not classified as *bad* components, i.e. those components which are not classified as
signal or unknown. signal or unknown.
:arg filename: Name of the label file to load. :arg filename: Name of the label file to load.
:arg includeLabel: If the file contains a single line containing a list :arg includeLabel: If the file contains a single line containing a list
component indices, this label will be used for the component indices, this label will be used for the
components in the list. Defaults to 'Unclassified components in the list. Defaults to 'Unclassified
noise' for FIX-like files, and 'Movement' for noise' for FIX-like files, and 'Movement' for
ICA-AROMA-like files. ICA-AROMA-like files.
:arg excludeLabel: If the file contains a single line containing component :arg excludeLabel: If the file contains a single line containing component
indices, this label will be used for the components indices, this label will be used for the components
that are not in the list. Defaults to 'Signal' for that are not in the list. Defaults to 'Signal' for
FIX-like files, and 'Unknown' for ICA-AROMA-like files. FIX-like files, and 'Unknown' for ICA-AROMA-like files.
:returns: A tuple containing the path to the melodic directory :arg returnIndices: Defaults to ``False``. If ``True``, a list containing
as specified in the label file, and a list of lists, one the noisy component numbers that were listed in the
list per component, with each list containing the labels for file is returned.
the corresponding component.
:returns: A tuple containing:
- The path to the melodic directory as specified in the label
file
- A list of lists, one list per component, with each list
containing the labels for the corresponding component.
- If ``returnIndices is True``, a list of the noisy component
indices (starting from 1) that were specified in the file.
""" """
signalLabels = None signalLabels = None
...@@ -176,7 +186,7 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None): ...@@ -176,7 +186,7 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
try: try:
compIdx = int(tokens[0]) compIdx = int(tokens[0])
except: except ValueError:
raise InvalidLabelFileError( raise InvalidLabelFileError(
'Invalid FIX classification file - ' 'Invalid FIX classification file - '
'line {}: {}'.format(i + 1, compLine)) 'line {}: {}'.format(i + 1, compLine))
...@@ -203,7 +213,6 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None): ...@@ -203,7 +213,6 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
noise = isNoisyComponent(labels, signalLabels) noise = isNoisyComponent(labels, signalLabels)
if noise and (comp not in noisyComps): if noise and (comp not in noisyComps):
print(signalLabels)
raise InvalidLabelFileError('Noisy component {} has invalid ' raise InvalidLabelFileError('Noisy component {} has invalid '
'labels: {}'.format(comp, labels)) 'labels: {}'.format(comp, labels))
...@@ -217,7 +226,8 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None): ...@@ -217,7 +226,8 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
raise InvalidLabelFileError('Noisy component {} is missing ' raise InvalidLabelFileError('Noisy component {} is missing '
'a noise label'.format(comp)) 'a noise label'.format(comp))
return melDir, allLabels if returnIndices: return melDir, allLabels, noisyComps
else: return melDir, allLabels
def saveLabelFile(allLabels, def saveLabelFile(allLabels,
......
#!/usr/bin/env python
#
# extract_noise.py - Extract ICA component time courses from a MELODIC
# directory.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module defines the ``extract_noise`` script, for extracting component
time series from a MELODIC ``.ica`` directory.
"""
from __future__ import print_function
import os.path as op
import sys
import argparse
import warnings
import numpy as np
# See atlasq.py for explanation
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
import fsl.data.fixlabels as fixlabels
import fsl.data.melodicanalysis as melanalysis
DTYPE = np.float64
name = "extract_noise"
desc = 'Extract component time series from a MELODIC .ica directory'
usage = """
{name}: {desc}
Usage:
{name} <.ica directory> [-o outfile] <fixfile>
{name} <.ica directory> [-o outfile] <component> [<component> ...]
{name} <.ica directory> [-o outfile] [-c conffile] [-c conffile] <fixfile>
{name} <.ica directory> [-o outfile] [-c conffile] [-c conffile] <component> [<component> ...]
""".format(name=name, desc=desc).strip() # noqa
helps = {
'outfile' :
'File to save time series to',
'overwrite' :
'Overwrite output file if it exists',
'icadir' :
'.ica directory to extract time series from.',
'component' :
'Component number or FIX/AROMA file specifying components to extract.',
'confound' :
'Extra files to append to output file.',
}
def parseArgs(args):
"""Parses command line arguments.
:arg args: Sequence of command line arguments.
:returns: An ``argparse.Namespace`` object containing parsed arguments.
"""
if len(args) == 0:
print(usage)
sys.exit(0)
parser = argparse.ArgumentParser(prog=name,
usage=usage,
description=desc)
parser.add_argument('-o', '--outfile',
help=helps['outfile'],
default='confound_timeseries.txt')
parser.add_argument('-ow', '--overwrite',
action='store_true',
help=helps['overwrite'])
parser.add_argument('-c', '--conffile',
action='append',
help=helps['confound'])
parser.add_argument('icadir',
help=helps['icadir'])
parser.add_argument('components',
nargs='+',
help=helps['component'])
args = parser.parse_args(args)
# Error if ica directory does not exist
if not op.exists(args.icadir):
print('ICA directory {} does not exist'.format(args.icadir))
sys.exit(1)
# Error if output exists, but overwrite not specified
if op.exists(args.outfile) and not args.overwrite:
print('Output file {} already exists and --overwrite not '
'specified'.format(args.outfile))
sys.exit(1)
# Convert components into integers,
# or absolute file paths, and error
# if any are not one of these.
for i, c in enumerate(args.components):
if op.exists(c):
args.components[i] = op.abspath(c)
else:
try:
args.components[i] = int(c)
except ValueError:
print('Bad component: {}. Components must either be component '
'indices (starting from 1), or paths to FIX/AROMA '
'files.')
sys.exit(1)
# Convert confound files to absolute
# paths, error if any do not exist.
if args.conffile is None:
args.conffile = []
for i, cf in enumerate(args.conffile):
if not op.exists(cf):
print('Confound file does not exist: {}'.format(cf))
sys.exit(1)
args.conffile[i] = op.abspath(cf)
args.outfile = op.abspath(args.outfile)
args.icadir = op.abspath(args.icadir)
return args
def genComponentIndexList(comps, ncomps):
"""Turns the given sequence of integers and file paths into a list
of 0-based component indices.
:arg comps: Sequence containing 1-based component indices, and/or paths
to FIX/AROMA label text files.
:arg ncomps: Number of components in the input data - indices larger than
this will be ignored.
:returns: List of 0-based component indices.
"""
allcomps = []
for c in comps:
if isinstance(c, int):
ccomps = [c]
else:
ccomps = fixlabels.loadLabelFile(c, returnIndices=True)[2]
allcomps.extend([c - 1 for c in ccomps])
if any([c < 0 or c >= ncomps for c in allcomps]):
raise ValueError('Invalid component indices: {}'.format(allcomps))
return list(sorted(set(allcomps)))
def loadConfoundFiles(conffiles, npts):
"""Loads the given confound files, and copies them all into a single 2D
``(npoints, nconfounds)`` matrix.
:arg conffiles: Sequence of paths to files containing confound time series
(where each row corresponds to a time point, and each
column corresponds to a single confound).
:arg npts: Expected number of time points
:returns: A ``(npoints, nconfounds)`` ``numpy`` matrix.
"""
matrices = []
for cfile in conffiles:
mat = np.loadtxt(cfile, dtype=DTYPE)
if len(mat.shape) == 1:
mat = np.atleast_2d(mat).T
if mat.shape[0] != npts:
raise ValueError('Confound file {} does not have correct number '
'of points (expected {}, has {})'.format(
cfile, npts, mat.shape[0]))
matrices.append(mat)
ncols = sum([m.shape[1] for m in matrices])
confounds = np.zeros((npts, ncols), dtype=DTYPE)
coli = 0
for mat in matrices:
matcols = mat.shape[1]
confounds[:, coli:coli + matcols] = mat
coli = coli + matcols
return confounds
def main(argv=None):
"""Entry point for the ``extract_noise`` script.
Identifies component time series to extract, extracts them, loads extra
confound files, and saves them out to a file.
"""
if argv is None:
argv = sys.argv[1:]
args = parseArgs(argv)
try:
ts = melanalysis.getComponentTimeSeries(args.icadir)
npts, ncomps = ts.shape
confs = loadConfoundFiles(args.conffile, npts)
comps = genComponentIndexList(args.components, ncomps)
ts = ts[:, comps]
except Exception as e:
print(e)
sys.exit(1)
ts = np.hstack((ts, confs))
np.savetxt(args.outfile, ts, fmt='%10.5f')
if __name__ == '__main__':
sys.exit(main())
...@@ -124,11 +124,12 @@ setup( ...@@ -124,11 +124,12 @@ setup(
entry_points={ entry_points={
'console_scripts' : [ 'console_scripts' : [
'immv = fsl.scripts.immv:main', 'immv = fsl.scripts.immv:main',
'imcp = fsl.scripts.imcp:main', 'imcp = fsl.scripts.imcp:main',
'imglob = fsl.scripts.imglob:main', 'imglob = fsl.scripts.imglob:main',
'atlasq = fsl.scripts.atlasq:main', 'atlasq = fsl.scripts.atlasq:main',
'atlasquery = fsl.scripts.atlasq:atlasquery_emulation', 'atlasquery = fsl.scripts.atlasq:atlasquery_emulation',
'extract_noise = fsl.scripts.extract_noise:main',
] ]
} }
) )
#!/usr/bin/env python
#
# test_extract_noise.py -
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
import sys
import numpy as np
import pytest
import fsl.utils.tempdir as tempdir
import fsl.scripts.extract_noise as extn
def test_genComponentIndexList():
with tempdir.tempdir():
# sequence of 1-indexed integers/file paths
icomps = [1, 5, 28, 12, 42, 54]
fcomps1 = [1, 4, 6, 3, 7]
fcomps2 = [12, 42, 31, 1, 4, 8]
with open('comps1.txt', 'wt') as f:
f.write(','.join([str(l) for l in fcomps1]))
with open('comps2.txt', 'wt') as f:
f.write(','.join([str(l) for l in fcomps2]))
ncomps = 60
comps = icomps + ['comps1.txt', 'comps2.txt']
expcomps = list(sorted(set(icomps + fcomps1 + fcomps2)))
expcomps = [c - 1 for c in expcomps]
assert extn.genComponentIndexList(comps, ncomps) == expcomps
with pytest.raises(ValueError):
extn.genComponentIndexList(comps + [-1], 60)
with pytest.raises(ValueError):
extn.genComponentIndexList(comps, 40)
def test_loadConfoundFiles():
with tempdir.tempdir():
npts = 50
confs = [
np.random.randint(1, 100, (50, 10)),
np.random.randint(1, 100, (50, 1)),
np.random.randint(1, 100, (50, 5))]
badconfs = [
np.random.randint(1, 100, (40, 10)),
np.random.randint(1, 100, (60, 10))]
expected = np.empty((50, 16), dtype=np.float64)
expected[:, :] = np.nan
expected[:, :10] = confs[0]
expected[:, 10:11] = confs[1]
expected[:, 11:16] = confs[2]
conffiles = []
for i, c in enumerate(confs):
fname = 'conf{}.txt'.format(i)
conffiles.append(fname)
np.savetxt(fname, c)
result = extn.loadConfoundFiles(conffiles, npts)
amask = ~np.isnan(expected)
assert np.all(~np.isnan(result) == amask)
assert np.all(result[amask] == expected[amask])
assert np.all(result[amask] == expected[amask])
badconfs = [
np.random.randint(1, 100, (40, 10)),
np.random.randint(1, 100, (60, 10))]
conffiles = []
for i, c in enumerate(badconfs):
fname = 'conf{}.txt'.format(i)
conffiles.append(fname)
np.savetxt(fname, c)
with pytest.raises(ValueError):
extn.loadConfoundFiles(conffiles, npts)
def test_extract_noise():
with tempdir.tempdir() as td:
# (npts, ncomps)
melmix = np.random.randint(1, 100, (100, 20))
np.savetxt('melodic_mix', melmix)
sys.argv = ['extract_noise', td] + '-o out.txt 1 2 3'.split()
extn.main()
assert np.all(np.loadtxt('out.txt') == melmix[:, :3])
with open('labels.txt', 'wt') as f:
f.write('4, 5, 6, 7')
extn.main([td] + '-o out.txt -ow 1 2 3 labels.txt'.split())
assert np.all(np.loadtxt('out.txt') == melmix[:, :7])
conf1 = np.random.randint(1, 100, (100, 1))
conf2 = np.random.randint(1, 100, (100, 5))
np.savetxt('conf1.txt', conf1)
np.savetxt('conf2.txt', conf2)
exp = np.hstack((melmix[:, :3], conf1, conf2))
extn.main([td] + '-o out.txt -c conf1.txt -c conf2.txt -ow 1 2 3'.split())
assert np.all(np.loadtxt('out.txt') == exp)
def test_extract_noise_usage():
with pytest.raises(SystemExit) as e:
extn.main([])
assert e.value.code == 0
def test_extract_noise_badargs():
with pytest.raises(SystemExit) as e:
extn.main(['non-existent.ica', '1', '2', '3'])
assert e.value.code != 0
with tempdir.tempdir() as td:
with pytest.raises(SystemExit) as e:
extn.main([td, 'non-existent.txt', '1', '2', '3'])
assert e.value.code != 0
with open('outfile.txt', 'wt') as f:
f.write('a')
# overwrite not specified
with pytest.raises(SystemExit) as e:
extn.main([td, '-o', 'outfile.txt', '1', '2', '3'])
assert e.value.code != 0
with pytest.raises(SystemExit) as e:
extn.main([td, '-c', 'non-existent.txt', '1', '2', '3'])
assert e.value.code != 0
# bad data
melmix = np.random.randint(1, 100, (100, 5))
np.savetxt('melodic_mix', melmix)
with open('labels.txt', 'wt') as f:
f.write('-1, 0, 1, 2')
with pytest.raises(SystemExit) as e:
extn.main([td, 'labels.txt', '1', '2', '3'])
assert e.value.code != 0
...@@ -35,7 +35,8 @@ filtered_func_data.ica ...@@ -35,7 +35,8 @@ filtered_func_data.ica
['Unclassified Noise'], ['Unclassified Noise'],
['Unclassified Noise'], ['Unclassified Noise'],
['Unclassified Noise'], ['Unclassified Noise'],
['Signal']])) ['Signal']],
[2, 5, 6, 7]))
goodfiles.append((""" goodfiles.append(("""
...@@ -92,7 +93,8 @@ REST.ica/filtered_func_data.ica ...@@ -92,7 +93,8 @@ REST.ica/filtered_func_data.ica
['Unclassified noise'], ['Unclassified noise'],
['Unclassified noise'], ['Unclassified noise'],
['Unclassified noise'], ['Unclassified noise'],
['Unknown']])) ['Unknown']],
[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 21, 22, 23, 24]))
goodfiles.append((""" goodfiles.append(("""
[2, 5, 6, 7] [2, 5, 6, 7]
...@@ -104,7 +106,8 @@ None, ...@@ -104,7 +106,8 @@ None,
['Signal'], ['Signal'],
['Unclassified noise'], ['Unclassified noise'],
['Unclassified noise'], ['Unclassified noise'],
['Unclassified noise']])) ['Unclassified noise']],
[2, 5, 6, 7]))
goodfiles.append((""" goodfiles.append(("""
2, 5, 6, 7 2, 5, 6, 7
...@@ -116,7 +119,8 @@ None, ...@@ -116,7 +119,8 @@ None,
['Unknown'], ['Unknown'],
['Movement'], ['Movement'],
['Movement'], ['Movement'],
['Movement']])) ['Movement']],
[2, 5, 6, 7]))
goodfiles.append((""" goodfiles.append(("""
...@@ -127,11 +131,12 @@ path/to/analysis.ica ...@@ -127,11 +131,12 @@ path/to/analysis.ica
""", """,
'path/to/analysis.ica', 'path/to/analysis.ica',
[['Unclassified noise'], [['Unclassified noise'],
['Signal', 'Blob']])) ['Signal', 'Blob']],
[1]))
def test_loadLabelFile_good(): def test_loadLabelFile_good():
for filecontents, expMelDir, expLabels in goodfiles: for filecontents, expMelDir, expLabels, expIdxs in goodfiles:
with tests.testdir() as testdir: with tests.testdir() as testdir:
...@@ -143,9 +148,15 @@ def test_loadLabelFile_good(): ...@@ -143,9 +148,15 @@ def test_loadLabelFile_good():
f.write(filecontents.strip()) f.write(filecontents.strip())
resMelDir, resLabels = fixlabels.loadLabelFile(fname) resMelDir, resLabels = fixlabels.loadLabelFile(fname)
assert resMelDir == expMelDir
assert len(resLabels) == len(expLabels)
for exp, res in zip(expLabels, resLabels):
assert exp == res
resMelDir, resLabels, resIdxs = fixlabels.loadLabelFile(
fname, returnIndices=True)
assert resMelDir == expMelDir assert resMelDir == expMelDir
assert resIdxs == expIdxs
assert len(resLabels) == len(expLabels) assert len(resLabels) == len(expLabels)
for exp, res in zip(expLabels, resLabels): for exp, res in zip(expLabels, resLabels):
assert exp == res assert exp == res
...@@ -309,7 +320,7 @@ def test_loadLabelFile_customLabels(): ...@@ -309,7 +320,7 @@ def test_loadLabelFile_customLabels():
if i in included: if i in included:
assert ilbls[0] == incLabel assert ilbls[0] == incLabel
else: else:
assert ilbls[0] == excLabel assert ilbls[0] == excLabel
def test_saveLabelFile(): def test_saveLabelFile():
...@@ -328,7 +339,7 @@ def test_saveLabelFile(): ...@@ -328,7 +339,7 @@ def test_saveLabelFile():
4, Label1, True 4, Label1, True
5, Unknown, False 5, Unknown, False
""").strip() """).strip()
with tests.testdir() as testdir: with tests.testdir() as testdir:
fname = op.join(testdir, 'fname.txt') fname = op.join(testdir, 'fname.txt')
...@@ -344,12 +355,12 @@ def test_saveLabelFile(): ...@@ -344,12 +355,12 @@ def test_saveLabelFile():
exp = '{}\n{}'.format(dirname, expected) exp = '{}\n{}'.format(dirname, expected)
with open(fname, 'rt') as f: with open(fname, 'rt') as f:
assert f.read().strip() == exp assert f.read().strip() == exp
# dirname=None, listBad=True # dirname=None, listBad=True
fixlabels.saveLabelFile(labels, fname) fixlabels.saveLabelFile(labels, fname)
exp = '.\n{}\n[1, 3, 4]'.format(expected) exp = '.\n{}\n[1, 3, 4]'.format(expected)
with open(fname, 'rt') as f: with open(fname, 'rt') as f:
assert f.read().strip() == exp assert f.read().strip() == exp
# Custom signal labels # Custom signal labels
sigLabels = ['Label1'] sigLabels = ['Label1']
...@@ -364,5 +375,5 @@ def test_saveLabelFile(): ...@@ -364,5 +375,5 @@ def test_saveLabelFile():
""").strip() """).strip()
fixlabels.saveLabelFile(labels, fname, signalLabels=sigLabels) fixlabels.saveLabelFile(labels, fname, signalLabels=sigLabels)
with open(fname, 'rt') as f: with open(fname, 'rt') as f:
assert f.read().strip() == exp assert f.read().strip() == exp
...@@ -204,6 +204,8 @@ def test_loadVertexData_annot(): ...@@ -204,6 +204,8 @@ def test_loadVertexData_annot():
loaded = mesh.loadVertexData(vdfile) loaded = mesh.loadVertexData(vdfile)
ergbal, enames = mesh.getVertexDataColourTable(vdfile) ergbal, enames = mesh.getVertexDataColourTable(vdfile)
enames = [n.decode() for n in enames]
assert np.all(np.isclose(loaded, labels.reshape(-1, 1))) assert np.all(np.isclose(loaded, labels.reshape(-1, 1)))
assert list(enames) == list(names) assert list(enames) == list(names)
assert np.all(np.isclose(ergbal[:, :4], rgba)) assert np.all(np.isclose(ergbal[:, :4], rgba))
......
...@@ -30,6 +30,18 @@ from fsl.utils.tempdir import tempdir ...@@ -30,6 +30,18 @@ from fsl.utils.tempdir import tempdir
from . import make_random_image from . import make_random_image
from . import make_dummy_file from . import make_dummy_file
try:
from unittest import mock
except ImportError:
import mock
try:
import indexed_gzip as igzip
except ImportError:
igzip = mock.MagicMock()
igzip.ZranError = mock.MagicMock()
def make_image(filename=None, def make_image(filename=None,
imgtype=1, imgtype=1,
...@@ -116,8 +128,8 @@ def test_load(): ...@@ -116,8 +128,8 @@ def test_load():
shouldRaise = [('notexist', fslpath.PathError), shouldRaise = [('notexist', fslpath.PathError),
('notexist.nii.gz', fslpath.PathError), ('notexist.nii.gz', fslpath.PathError),
('ambiguous', fslpath.PathError), ('ambiguous', fslpath.PathError),
('notnifti', ImageFileError), ('notnifti', (ImageFileError, igzip.ZranError)),
('notnifti.nii.gz', ImageFileError)] ('notnifti.nii.gz', (ImageFileError, igzip.ZranError))]
with tempdir() as testdir: with tempdir() as testdir:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment