Skip to content
Snippets Groups Projects
Commit 431c4d59 authored by Paul McCarthy's avatar Paul McCarthy :mountain_bicyclist:
Browse files

Merge branch 'enh/extract_noise_script' into 'master'

Enh/extract noise script

See merge request fsl/fslpy!58
parents ac3fd5c1 904de71a
No related branches found
No related tags found
No related merge requests found
Pipeline #
......@@ -32,17 +32,17 @@ To aid readability, all commit messages should be prefixed with one or more of
the following labels (this convention has been inherited from `nibabel
<https://github.com/nipy/nibabel>`_):
* *BF* : bug fix
* *RF* : refactoring
* *NF* : new feature
* *BW* : addresses backward-compatibility
* *BF* : bug fix
* *RF* : refactoring
* *NF* : new feature
* *BW* : addresses backward-compatibility
* *OPT* : optimization
* *BK* : breaks something and/or tests fail
* *PL* : making pylint happier
* *DOC*: for all kinds of documentation related commits
* *BK* : breaks something and/or tests fail
* *PL* : making pylint happier
* *DOC* : for all kinds of documentation related commits
* *TEST*: for adding or changing tests
* *MAINT*: for administrative/maintenance changes
* *CI*: for continuous-integration changes
* *MNT* : for administrative/maintenance changes
* *CI* : for continuous-integration changes
Version number
......
......@@ -19,7 +19,10 @@
import os.path as op
def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
def loadLabelFile(filename,
includeLabel=None,
excludeLabel=None,
returnIndices=False):
"""Loads component labels from the specified file. The file is assuemd
to be of the format generated by FIX, Melview or ICA-AROMA; such a file
should have a structure resembling the following::
......@@ -70,23 +73,30 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
*bad* components, i.e. those components which are not classified as
signal or unknown.
:arg filename: Name of the label file to load.
:arg includeLabel: If the file contains a single line containing a list
component indices, this label will be used for the
components in the list. Defaults to 'Unclassified
noise' for FIX-like files, and 'Movement' for
ICA-AROMA-like files.
:arg excludeLabel: If the file contains a single line containing component
indices, this label will be used for the components
that are not in the list. Defaults to 'Signal' for
FIX-like files, and 'Unknown' for ICA-AROMA-like files.
:returns: A tuple containing the path to the melodic directory
as specified in the label file, and a list of lists, one
list per component, with each list containing the labels for
the corresponding component.
:arg filename: Name of the label file to load.
:arg includeLabel: If the file contains a single line containing a list
component indices, this label will be used for the
components in the list. Defaults to 'Unclassified
noise' for FIX-like files, and 'Movement' for
ICA-AROMA-like files.
:arg excludeLabel: If the file contains a single line containing component
indices, this label will be used for the components
that are not in the list. Defaults to 'Signal' for
FIX-like files, and 'Unknown' for ICA-AROMA-like files.
:arg returnIndices: Defaults to ``False``. If ``True``, a list containing
the noisy component numbers that were listed in the
file is returned.
:returns: A tuple containing:
- The path to the melodic directory as specified in the label
file
- A list of lists, one list per component, with each list
containing the labels for the corresponding component.
- If ``returnIndices is True``, a list of the noisy component
indices (starting from 1) that were specified in the file.
"""
signalLabels = None
......@@ -176,7 +186,7 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
try:
compIdx = int(tokens[0])
except:
except ValueError:
raise InvalidLabelFileError(
'Invalid FIX classification file - '
'line {}: {}'.format(i + 1, compLine))
......@@ -203,7 +213,6 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
noise = isNoisyComponent(labels, signalLabels)
if noise and (comp not in noisyComps):
print(signalLabels)
raise InvalidLabelFileError('Noisy component {} has invalid '
'labels: {}'.format(comp, labels))
......@@ -217,7 +226,8 @@ def loadLabelFile(filename, includeLabel=None, excludeLabel=None):
raise InvalidLabelFileError('Noisy component {} is missing '
'a noise label'.format(comp))
return melDir, allLabels
if returnIndices: return melDir, allLabels, noisyComps
else: return melDir, allLabels
def saveLabelFile(allLabels,
......
#!/usr/bin/env python
#
# extract_noise.py - Extract ICA component time courses from a MELODIC
# directory.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module defines the ``extract_noise`` script, for extracting component
time series from a MELODIC ``.ica`` directory.
"""
from __future__ import print_function
import os.path as op
import sys
import argparse
import warnings
import numpy as np
# See atlasq.py for explanation
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
import fsl.data.fixlabels as fixlabels
import fsl.data.melodicanalysis as melanalysis
DTYPE = np.float64
name = "extract_noise"
desc = 'Extract component time series from a MELODIC .ica directory'
usage = """
{name}: {desc}
Usage:
{name} <.ica directory> [-o outfile] <fixfile>
{name} <.ica directory> [-o outfile] <component> [<component> ...]
{name} <.ica directory> [-o outfile] [-c conffile] [-c conffile] <fixfile>
{name} <.ica directory> [-o outfile] [-c conffile] [-c conffile] <component> [<component> ...]
""".format(name=name, desc=desc).strip() # noqa
helps = {
'outfile' :
'File to save time series to',
'overwrite' :
'Overwrite output file if it exists',
'icadir' :
'.ica directory to extract time series from.',
'component' :
'Component number or FIX/AROMA file specifying components to extract.',
'confound' :
'Extra files to append to output file.',
}
def parseArgs(args):
"""Parses command line arguments.
:arg args: Sequence of command line arguments.
:returns: An ``argparse.Namespace`` object containing parsed arguments.
"""
if len(args) == 0:
print(usage)
sys.exit(0)
parser = argparse.ArgumentParser(prog=name,
usage=usage,
description=desc)
parser.add_argument('-o', '--outfile',
help=helps['outfile'],
default='confound_timeseries.txt')
parser.add_argument('-ow', '--overwrite',
action='store_true',
help=helps['overwrite'])
parser.add_argument('-c', '--conffile',
action='append',
help=helps['confound'])
parser.add_argument('icadir',
help=helps['icadir'])
parser.add_argument('components',
nargs='+',
help=helps['component'])
args = parser.parse_args(args)
# Error if ica directory does not exist
if not op.exists(args.icadir):
print('ICA directory {} does not exist'.format(args.icadir))
sys.exit(1)
# Error if output exists, but overwrite not specified
if op.exists(args.outfile) and not args.overwrite:
print('Output file {} already exists and --overwrite not '
'specified'.format(args.outfile))
sys.exit(1)
# Convert components into integers,
# or absolute file paths, and error
# if any are not one of these.
for i, c in enumerate(args.components):
if op.exists(c):
args.components[i] = op.abspath(c)
else:
try:
args.components[i] = int(c)
except ValueError:
print('Bad component: {}. Components must either be component '
'indices (starting from 1), or paths to FIX/AROMA '
'files.')
sys.exit(1)
# Convert confound files to absolute
# paths, error if any do not exist.
if args.conffile is None:
args.conffile = []
for i, cf in enumerate(args.conffile):
if not op.exists(cf):
print('Confound file does not exist: {}'.format(cf))
sys.exit(1)
args.conffile[i] = op.abspath(cf)
args.outfile = op.abspath(args.outfile)
args.icadir = op.abspath(args.icadir)
return args
def genComponentIndexList(comps, ncomps):
"""Turns the given sequence of integers and file paths into a list
of 0-based component indices.
:arg comps: Sequence containing 1-based component indices, and/or paths
to FIX/AROMA label text files.
:arg ncomps: Number of components in the input data - indices larger than
this will be ignored.
:returns: List of 0-based component indices.
"""
allcomps = []
for c in comps:
if isinstance(c, int):
ccomps = [c]
else:
ccomps = fixlabels.loadLabelFile(c, returnIndices=True)[2]
allcomps.extend([c - 1 for c in ccomps])
if any([c < 0 or c >= ncomps for c in allcomps]):
raise ValueError('Invalid component indices: {}'.format(allcomps))
return list(sorted(set(allcomps)))
def loadConfoundFiles(conffiles, npts):
"""Loads the given confound files, and copies them all into a single 2D
``(npoints, nconfounds)`` matrix.
:arg conffiles: Sequence of paths to files containing confound time series
(where each row corresponds to a time point, and each
column corresponds to a single confound).
:arg npts: Expected number of time points
:returns: A ``(npoints, nconfounds)`` ``numpy`` matrix.
"""
matrices = []
for cfile in conffiles:
mat = np.loadtxt(cfile, dtype=DTYPE)
if len(mat.shape) == 1:
mat = np.atleast_2d(mat).T
if mat.shape[0] != npts:
raise ValueError('Confound file {} does not have correct number '
'of points (expected {}, has {})'.format(
cfile, npts, mat.shape[0]))
matrices.append(mat)
ncols = sum([m.shape[1] for m in matrices])
confounds = np.zeros((npts, ncols), dtype=DTYPE)
coli = 0
for mat in matrices:
matcols = mat.shape[1]
confounds[:, coli:coli + matcols] = mat
coli = coli + matcols
return confounds
def main(argv=None):
"""Entry point for the ``extract_noise`` script.
Identifies component time series to extract, extracts them, loads extra
confound files, and saves them out to a file.
"""
if argv is None:
argv = sys.argv[1:]
args = parseArgs(argv)
try:
ts = melanalysis.getComponentTimeSeries(args.icadir)
npts, ncomps = ts.shape
confs = loadConfoundFiles(args.conffile, npts)
comps = genComponentIndexList(args.components, ncomps)
ts = ts[:, comps]
except Exception as e:
print(e)
sys.exit(1)
ts = np.hstack((ts, confs))
np.savetxt(args.outfile, ts, fmt='%10.5f')
if __name__ == '__main__':
sys.exit(main())
......@@ -124,11 +124,12 @@ setup(
entry_points={
'console_scripts' : [
'immv = fsl.scripts.immv:main',
'imcp = fsl.scripts.imcp:main',
'imglob = fsl.scripts.imglob:main',
'atlasq = fsl.scripts.atlasq:main',
'atlasquery = fsl.scripts.atlasq:atlasquery_emulation',
'immv = fsl.scripts.immv:main',
'imcp = fsl.scripts.imcp:main',
'imglob = fsl.scripts.imglob:main',
'atlasq = fsl.scripts.atlasq:main',
'atlasquery = fsl.scripts.atlasq:atlasquery_emulation',
'extract_noise = fsl.scripts.extract_noise:main',
]
}
)
#!/usr/bin/env python
#
# test_extract_noise.py -
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
import sys
import numpy as np
import pytest
import fsl.utils.tempdir as tempdir
import fsl.scripts.extract_noise as extn
def test_genComponentIndexList():
with tempdir.tempdir():
# sequence of 1-indexed integers/file paths
icomps = [1, 5, 28, 12, 42, 54]
fcomps1 = [1, 4, 6, 3, 7]
fcomps2 = [12, 42, 31, 1, 4, 8]
with open('comps1.txt', 'wt') as f:
f.write(','.join([str(l) for l in fcomps1]))
with open('comps2.txt', 'wt') as f:
f.write(','.join([str(l) for l in fcomps2]))
ncomps = 60
comps = icomps + ['comps1.txt', 'comps2.txt']
expcomps = list(sorted(set(icomps + fcomps1 + fcomps2)))
expcomps = [c - 1 for c in expcomps]
assert extn.genComponentIndexList(comps, ncomps) == expcomps
with pytest.raises(ValueError):
extn.genComponentIndexList(comps + [-1], 60)
with pytest.raises(ValueError):
extn.genComponentIndexList(comps, 40)
def test_loadConfoundFiles():
with tempdir.tempdir():
npts = 50
confs = [
np.random.randint(1, 100, (50, 10)),
np.random.randint(1, 100, (50, 1)),
np.random.randint(1, 100, (50, 5))]
badconfs = [
np.random.randint(1, 100, (40, 10)),
np.random.randint(1, 100, (60, 10))]
expected = np.empty((50, 16), dtype=np.float64)
expected[:, :] = np.nan
expected[:, :10] = confs[0]
expected[:, 10:11] = confs[1]
expected[:, 11:16] = confs[2]
conffiles = []
for i, c in enumerate(confs):
fname = 'conf{}.txt'.format(i)
conffiles.append(fname)
np.savetxt(fname, c)
result = extn.loadConfoundFiles(conffiles, npts)
amask = ~np.isnan(expected)
assert np.all(~np.isnan(result) == amask)
assert np.all(result[amask] == expected[amask])
assert np.all(result[amask] == expected[amask])
badconfs = [
np.random.randint(1, 100, (40, 10)),
np.random.randint(1, 100, (60, 10))]
conffiles = []
for i, c in enumerate(badconfs):
fname = 'conf{}.txt'.format(i)
conffiles.append(fname)
np.savetxt(fname, c)
with pytest.raises(ValueError):
extn.loadConfoundFiles(conffiles, npts)
def test_extract_noise():
with tempdir.tempdir() as td:
# (npts, ncomps)
melmix = np.random.randint(1, 100, (100, 20))
np.savetxt('melodic_mix', melmix)
sys.argv = ['extract_noise', td] + '-o out.txt 1 2 3'.split()
extn.main()
assert np.all(np.loadtxt('out.txt') == melmix[:, :3])
with open('labels.txt', 'wt') as f:
f.write('4, 5, 6, 7')
extn.main([td] + '-o out.txt -ow 1 2 3 labels.txt'.split())
assert np.all(np.loadtxt('out.txt') == melmix[:, :7])
conf1 = np.random.randint(1, 100, (100, 1))
conf2 = np.random.randint(1, 100, (100, 5))
np.savetxt('conf1.txt', conf1)
np.savetxt('conf2.txt', conf2)
exp = np.hstack((melmix[:, :3], conf1, conf2))
extn.main([td] + '-o out.txt -c conf1.txt -c conf2.txt -ow 1 2 3'.split())
assert np.all(np.loadtxt('out.txt') == exp)
def test_extract_noise_usage():
with pytest.raises(SystemExit) as e:
extn.main([])
assert e.value.code == 0
def test_extract_noise_badargs():
with pytest.raises(SystemExit) as e:
extn.main(['non-existent.ica', '1', '2', '3'])
assert e.value.code != 0
with tempdir.tempdir() as td:
with pytest.raises(SystemExit) as e:
extn.main([td, 'non-existent.txt', '1', '2', '3'])
assert e.value.code != 0
with open('outfile.txt', 'wt') as f:
f.write('a')
# overwrite not specified
with pytest.raises(SystemExit) as e:
extn.main([td, '-o', 'outfile.txt', '1', '2', '3'])
assert e.value.code != 0
with pytest.raises(SystemExit) as e:
extn.main([td, '-c', 'non-existent.txt', '1', '2', '3'])
assert e.value.code != 0
# bad data
melmix = np.random.randint(1, 100, (100, 5))
np.savetxt('melodic_mix', melmix)
with open('labels.txt', 'wt') as f:
f.write('-1, 0, 1, 2')
with pytest.raises(SystemExit) as e:
extn.main([td, 'labels.txt', '1', '2', '3'])
assert e.value.code != 0
......@@ -35,7 +35,8 @@ filtered_func_data.ica
['Unclassified Noise'],
['Unclassified Noise'],
['Unclassified Noise'],
['Signal']]))
['Signal']],
[2, 5, 6, 7]))
goodfiles.append(("""
......@@ -92,7 +93,8 @@ REST.ica/filtered_func_data.ica
['Unclassified noise'],
['Unclassified noise'],
['Unclassified noise'],
['Unknown']]))
['Unknown']],
[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 21, 22, 23, 24]))
goodfiles.append(("""
[2, 5, 6, 7]
......@@ -104,7 +106,8 @@ None,
['Signal'],
['Unclassified noise'],
['Unclassified noise'],
['Unclassified noise']]))
['Unclassified noise']],
[2, 5, 6, 7]))
goodfiles.append(("""
2, 5, 6, 7
......@@ -116,7 +119,8 @@ None,
['Unknown'],
['Movement'],
['Movement'],
['Movement']]))
['Movement']],
[2, 5, 6, 7]))
goodfiles.append(("""
......@@ -127,11 +131,12 @@ path/to/analysis.ica
""",
'path/to/analysis.ica',
[['Unclassified noise'],
['Signal', 'Blob']]))
['Signal', 'Blob']],
[1]))
def test_loadLabelFile_good():
for filecontents, expMelDir, expLabels in goodfiles:
for filecontents, expMelDir, expLabels, expIdxs in goodfiles:
with tests.testdir() as testdir:
......@@ -143,9 +148,15 @@ def test_loadLabelFile_good():
f.write(filecontents.strip())
resMelDir, resLabels = fixlabels.loadLabelFile(fname)
assert resMelDir == expMelDir
assert len(resLabels) == len(expLabels)
for exp, res in zip(expLabels, resLabels):
assert exp == res
resMelDir, resLabels, resIdxs = fixlabels.loadLabelFile(
fname, returnIndices=True)
assert resMelDir == expMelDir
assert resIdxs == expIdxs
assert len(resLabels) == len(expLabels)
for exp, res in zip(expLabels, resLabels):
assert exp == res
......@@ -309,7 +320,7 @@ def test_loadLabelFile_customLabels():
if i in included:
assert ilbls[0] == incLabel
else:
assert ilbls[0] == excLabel
assert ilbls[0] == excLabel
def test_saveLabelFile():
......@@ -328,7 +339,7 @@ def test_saveLabelFile():
4, Label1, True
5, Unknown, False
""").strip()
with tests.testdir() as testdir:
fname = op.join(testdir, 'fname.txt')
......@@ -344,12 +355,12 @@ def test_saveLabelFile():
exp = '{}\n{}'.format(dirname, expected)
with open(fname, 'rt') as f:
assert f.read().strip() == exp
# dirname=None, listBad=True
fixlabels.saveLabelFile(labels, fname)
exp = '.\n{}\n[1, 3, 4]'.format(expected)
with open(fname, 'rt') as f:
assert f.read().strip() == exp
with open(fname, 'rt') as f:
assert f.read().strip() == exp
# Custom signal labels
sigLabels = ['Label1']
......@@ -364,5 +375,5 @@ def test_saveLabelFile():
""").strip()
fixlabels.saveLabelFile(labels, fname, signalLabels=sigLabels)
with open(fname, 'rt') as f:
assert f.read().strip() == exp
with open(fname, 'rt') as f:
assert f.read().strip() == exp
......@@ -204,6 +204,8 @@ def test_loadVertexData_annot():
loaded = mesh.loadVertexData(vdfile)
ergbal, enames = mesh.getVertexDataColourTable(vdfile)
enames = [n.decode() for n in enames]
assert np.all(np.isclose(loaded, labels.reshape(-1, 1)))
assert list(enames) == list(names)
assert np.all(np.isclose(ergbal[:, :4], rgba))
......
......@@ -30,6 +30,18 @@ from fsl.utils.tempdir import tempdir
from . import make_random_image
from . import make_dummy_file
try:
from unittest import mock
except ImportError:
import mock
try:
import indexed_gzip as igzip
except ImportError:
igzip = mock.MagicMock()
igzip.ZranError = mock.MagicMock()
def make_image(filename=None,
imgtype=1,
......@@ -116,8 +128,8 @@ def test_load():
shouldRaise = [('notexist', fslpath.PathError),
('notexist.nii.gz', fslpath.PathError),
('ambiguous', fslpath.PathError),
('notnifti', ImageFileError),
('notnifti.nii.gz', ImageFileError)]
('notnifti', (ImageFileError, igzip.ZranError)),
('notnifti.nii.gz', (ImageFileError, igzip.ZranError))]
with tempdir() as testdir:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment