Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
FSL
ukbparse
Commits
e5df476b
Commit
e5df476b
authored
Dec 25, 2018
by
Paul McCarthy
🚵
Browse files
Merge branch 'enh/select_by_column_name' into 'master'
Enh/select by column name See merge request fsl/ukbparse!98
parents
d34aad97
3d058619
Pipeline
#3194
passed with stages
in 8 minutes and 23 seconds
Changes
19
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
CHANGELOG.rst
View file @
e5df476b
...
...
@@ -2,7 +2,7 @@
======================
0.1
3.1 (Thur
sday 2
0
th Deember 2018)
0.1
4.0 (Tue
sday 2
5
th De
c
ember 2018)
-----------------------------------
...
...
@@ -10,6 +10,37 @@ Added
^^^^^
* New ``--column`` option, allowing columns to be selected by name/name
pattern.
* ``ukbparse`` can now be installed from `conda-forge
<https://anaconda.org/conda-forge/ukbparse>`_.
Changed
^^^^^^^
* The index column in the output file no longer defaults to being named
``'eid'``. It defaults to the name of the index in the input file, but
can still be overridden by the ``--output_id_column`` option.
Fixed
^^^^^
* Blank lines are now allowed in configuration files (#2)
* Fix to derived column names for ICD10 variables in default processing rules.
0.13.1 (Thursday 20th December 2018)
------------------------------------
Added
^^^^^
* Unit test to make sure that ``ukbparse`` crashes if given bad input
arguments.
...
...
README.rst
View file @
e5df476b
...
...
@@ -5,6 +5,9 @@
.. image:: https://img.shields.io/pypi/v/ukbparse.svg
:target: https://pypi.python.org/pypi/ukbparse/
.. image:: https://anaconda.org/conda-forge/ukbparse/badges/version.svg
:target: https://anaconda.org/conda-forge/ukbparse
.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1997626.svg
:target: https://doi.org/10.5281/zenodo.1997626
...
...
@@ -25,6 +28,11 @@ Install ``ukbparse`` via pip::
pip install ukbparse
Or from ``conda-forge``::
conda install -c conda-forge ukbparse
Comprehensive documentation does not yet exist.
...
...
ukbparse/__init__.py
View file @
e5df476b
...
...
@@ -6,7 +6,7 @@
#
__version__
=
'0.1
3.1
'
__version__
=
'0.1
4.0
'
"""The ``ukbparse`` versioning scheme roughly follows Semantic Versioning
conventions.
"""
...
...
ukbparse/config.py
View file @
e5df476b
...
...
@@ -44,7 +44,6 @@ DEFAULT_MERGE_STRATEGY = importing.MERGE_STRATEGY
DEFAULT_EXPORT_FORMAT
=
exporting
.
EXPORT_FORMAT
AVAILABLE_MERGE_AXES
=
importing
.
MERGE_AXIS_OPTIONS
AVAILABLE_MERGE_STRATEGIES
=
importing
.
MERGE_STRATEGY_OPTIONS
DEFAULT_OUTPUT_ID_COLUMN
=
exporting
.
ID_COLUMN
DEFAULT_COLUMN_PATTERN
=
exporting
.
COLUMN_PATTERN
DEFAULT_TSV_SEP
=
exporting_tsv
.
TSV_SEP
DEFAULT_HDF5_KEY
=
exporting_hdf5
.
HDF5_KEY
...
...
@@ -79,6 +78,7 @@ CLI_ARGUMENTS = collections.OrderedDict((
((
'r'
,
'remove_unknown'
),
{
'action'
:
'store_true'
}),
((
's'
,
'subject'
),
{
'action'
:
'append'
}),
((
'v'
,
'variable'
),
{
'action'
:
'append'
}),
((
'co'
,
'column'
),
{
'action'
:
'append'
}),
((
'c'
,
'category'
),
{
'action'
:
'append'
}),
((
'vi'
,
'visit'
),
{
'action'
:
'append'
}),
((
'ex'
,
'exclude'
),
{
'action'
:
'append'
})]),
...
...
@@ -124,7 +124,7 @@ CLI_ARGUMENTS = collections.OrderedDict((
((
'rc'
,
'rename_column'
),
{
'action'
:
'append'
,
'nargs'
:
2
,
'metavar'
:
(
'OLD_NAME'
,
'NEW_NAME'
)}),
# noqa
((
'oi'
,
'output_id_column'
),
{
'default'
:
DEFAULT_OUTPUT_ID_COLUMN
}),
# noqa
((
'oi'
,
'output_id_column'
),
{
}),
((
'edf'
,
'date_format'
),
{
'default'
:
'default'
}),
((
'etf'
,
'time_format'
),
{
'default'
:
'default'
}),
((
'nr'
,
'num_rows'
),
{
'type'
:
int
}),
...
...
@@ -256,6 +256,11 @@ CLI_ARGUMENT_HELP = {
'variable IDs, to import. Can be used multiple times. Implies '
'--remove_unknown.'
,
'column'
:
'Name of column to import. Can also be a glob-style wildcard pattern - '
'columns with a name matching the pattern will be imported. Can be used '
'multiple times. Implies --remove_unknown.'
,
'category'
:
'Category ID or label to import. Can be used multiple times. Implies '
'--remove_unknown.'
,
...
...
@@ -330,8 +335,7 @@ CLI_ARGUMENT_HELP = {
'be used multiple times'
,
'output_id_column'
:
'Name of ID column in output file '
'(default:
\'
{}
\'
)'
.
format
(
DEFAULT_OUTPUT_ID_COLUMN
),
'Name of ID column in output file.'
,
'date_format'
:
'Formatter to use for date variables (default: "default").'
,
...
...
@@ -661,9 +665,12 @@ def parseArgs(argv=None, namespace=None):
if
args
.
global_clean
is
None
:
args
.
global_clean
=
visit
else
:
args
.
global_clean
+=
','
+
visit
# If variables/categories are explicitly
# specified, remove_unknown is implied.
if
args
.
variable
is
not
None
or
args
.
category
is
not
None
:
# If variables/categories/columns are
# explicitly specified, remove_unknown
# is implied.
if
any
((
args
.
variable
is
not
None
,
args
.
category
is
not
None
,
args
.
column
is
not
None
)):
args
.
remove_unknown
=
True
# categories can be specified
...
...
@@ -725,8 +732,9 @@ def loadConfigFile(cfgfile):
log
.
debug
(
'Loading arguments from configuration file %s'
,
cfgfile
)
with
open
(
cfgfile
,
'rt'
)
as
f
:
lines
=
[
l
.
strip
()
for
l
in
f
.
readlines
()]
lines
=
[
l
for
l
in
lines
if
not
l
.
startswith
(
'#'
)]
lines
=
[
line
.
strip
()
for
line
in
f
.
readlines
()]
lines
=
[
line
for
line
in
lines
if
line
!=
''
]
lines
=
[
line
for
line
in
lines
if
not
line
.
startswith
(
'#'
)]
for
line
in
lines
:
words
=
list
(
shlex
.
split
(
line
))
...
...
ukbparse/data/processing.tsv
View file @
e5df476b
Variable Process
40001 binariseCategorical(acrossInstances=True, acrossVisits=True, nameFormat='{vid}{value}')
40002 binariseCategorical(acrossInstances=True, acrossVisits=True, nameFormat='{vid}{value}')
40006 binariseCategorical(acrossInstances=True, acrossVisits=True, nameFormat='{vid}{value}')
41202 binariseCategorical(acrossInstances=True, nameFormat='{vid}{value}-{visit}')
41204 binariseCategorical(acrossInstances=True, nameFormat='{vid}{value}-{visit}')
40001 binariseCategorical(acrossInstances=True, acrossVisits=True, nameFormat='{vid}{value
:0.0f
}')
40002 binariseCategorical(acrossInstances=True, acrossVisits=True, nameFormat='{vid}{value
:0.0f
}')
40006 binariseCategorical(acrossInstances=True, acrossVisits=True, nameFormat='{vid}{value
:0.0f
}')
41202 binariseCategorical(acrossInstances=True, nameFormat='{vid}{value
:0.0f
}-{visit}')
41204 binariseCategorical(acrossInstances=True, nameFormat='{vid}{value
:0.0f
}-{visit}')
all_independent removeIfSparse(minpres=51, maxcat=0.99)
all removeIfRedundant(0.99, 0.2)
ukbparse/exporting.py
View file @
e5df476b
...
...
@@ -37,10 +37,6 @@ Setting this variable to ``None`` has the same effect as setting it to
"""
ID_COLUMN
=
'eid'
"""Default name for the output ID column. """
EXPORT_FORMAT
=
'tsv'
"""Default export format."""
...
...
@@ -120,8 +116,8 @@ def exportData(dtable,
to be used as the ``variable`` in ``colpat`` when
generating output column names.
:arg idcol: Name to use for ID column. Defaults to
:attr:`ID_COLUMN`
.
:arg idcol: Name to use for ID column. Defaults to
the original
index column name (``pandas.DataFrame.index.name``)
.
:arg fileFormat: File format to export to - the name of a ``@exporter``
plugin. If not provided, defaults to
...
...
@@ -130,7 +126,7 @@ def exportData(dtable,
if
colpat
is
None
:
colpat
=
COLUMN_PATTERN
if
fileFormat
is
None
:
fileFormat
=
EXPORT_FORMAT
if
idcol
is
None
:
idcol
=
ID_COLUMN
if
idcol
is
None
:
idcol
=
dtable
.
index
.
name
if
subjects
is
None
:
subjects
=
dtable
.
index
...
...
ukbparse/exporting_tsv.py
View file @
e5df476b
...
...
@@ -83,7 +83,7 @@ def exportTSV(dtable,
cstart
=
chunki
*
numRows
cend
=
cstart
+
numRows
csubjs
=
subjects
[
cstart
:
cend
]
towrite
=
pd
.
DataFrame
()
towrite
=
pd
.
DataFrame
(
index
=
csubjs
)
for
col
in
dtable
.
allColumns
:
...
...
ukbparse/fileinfo.py
View file @
e5df476b
...
...
@@ -359,16 +359,20 @@ def fileinfo(datafiles, indexes=None, sniffers=None):
for
ci
,
col
in
enumerate
(
fcols
):
# UKB-style - all good
if
col
.
vid
is
not
None
:
# Index columns always get
# a variable ID of 0
if
ci
==
idxcol
:
vid
=
0
# UKB-style - we already
# have a variable id, visit,
# and instance.
elif
col
.
vid
is
not
None
:
continue
# Non-UKB style file - assign
# a (vid, visit, instance) to
# each column, giving the index
# column vid 0
if
ci
==
idxcol
:
vid
=
0
# each column
else
:
vid
=
autovid
autovid
+=
1
...
...
ukbparse/importing.py
View file @
e5df476b
...
...
@@ -13,6 +13,7 @@ import os.path as op
import
itertools
as
it
import
functools
as
ft
import
multiprocessing.dummy
as
mpd
import
fnmatch
import
logging
import
warnings
import
collections
...
...
@@ -67,6 +68,7 @@ def importData(datafiles,
proctable
,
cattable
,
variables
=
None
,
colnames
=
None
,
categories
=
None
,
subjects
=
None
,
encoding
=
None
,
...
...
@@ -102,6 +104,9 @@ def importData(datafiles,
:arg variables: List of variable IDs to import
:arg colnames: List of names/glob-style wildcard patterns
specifying columns to import.
:arg categories: List of category names to import
:arg subjects: List of subjects to include.
...
...
@@ -169,6 +174,7 @@ def importData(datafiles,
cols
,
drop
=
columnsToLoad
(
datafiles
,
vartable
,
variables
,
colnames
,
unknownVars
,
removeUnknown
,
indexes
=
indexes
,
...
...
@@ -335,6 +341,7 @@ def restrictVariables(cattable, variables, categories):
def
columnsToLoad
(
datafiles
,
vartable
,
variables
,
colnames
,
unknownVars
,
removeUnknown
,
indexes
=
None
,
...
...
@@ -351,6 +358,10 @@ def columnsToLoad(datafiles,
:arg variables: List of variables to load. If provided,
``removeUnknown`` is ignored.
:arg colnames: List of column names/glob-style wildcard patterns,
specifying columns to load. If provided,
``removeUnknown`` is ignored.
:arg unknownVars: List of :class:`.Column` objects representing unknown
variables
...
...
@@ -369,11 +380,8 @@ def columnsToLoad(datafiles,
- A dict of ``{ file : [Column] }`` mappings, the
:class:`.Column` objects to *load* from each input
file.
Note that the columns are not necessarily ordered
in the same way that they are in the input files -
the header column will always be first in each list.
file. The columns (including the index column) are
ordered as they appear in the file.
- A list containing the :class:`.Column` objects to
*ignore*.
...
...
@@ -382,7 +390,10 @@ def columnsToLoad(datafiles,
if
sniffers
is
None
:
sniffers
=
{}
if
indexes
is
None
:
indexes
=
{}
if
variables
is
not
None
:
removeUnknown
=
False
if
colnames
is
not
None
:
removeUnknown
=
False
# Turn the unknonwVars list
# into a list of variable IDs
unknownVids
=
list
(
sorted
(
set
([
c
.
vid
for
c
in
unknownVars
])))
if
isinstance
(
datafiles
,
six
.
string_types
):
...
...
@@ -392,24 +403,26 @@ def columnsToLoad(datafiles,
# omitting the relevant columns.
loadFuncNames
=
[
'remove'
,
'keepVisits'
]
# Peek at the columns that are
# in the input files. Save a ref
# to the first column (assumed
# to be the ID column)
# Peek at the columns that
# are in the input files.
allcols
=
fileinfo
.
fileinfo
(
datafiles
,
indexes
=
indexes
,
sniffers
=
sniffers
)[
2
]
indexes
=
[
indexes
.
get
(
f
,
0
)
for
f
in
datafiles
]
hdrcols
=
[
c
[
i
]
for
c
,
i
in
zip
(
allcols
,
indexes
)]
allcols
=
[
c
[:
i
]
+
c
[
i
+
1
:]
for
c
,
i
in
zip
(
allcols
,
indexes
)]
allcols
=
list
(
it
.
chain
(
*
allcols
))
ncols
=
len
(
list
(
it
.
chain
(
*
allcols
)))
# re-organise them - a list of
# columns for each variable ID
# re-organise the columns - a list of
# columns for each variable ID. We do
# this because, for a given VID, we
# want to pass all columns at once to
# the cleaning function(s) below.
byvid
=
collections
.
defaultdict
(
list
)
for
col
in
allcols
:
for
col
in
it
.
chain
(
*
allcols
)
:
byvid
[
col
.
vid
].
append
(
col
)
# Build a full list of index
# columns for each data file.
indexes
=
[
indexes
.
get
(
f
,
0
)
for
f
in
datafiles
]
# retrieve all cleaning steps -
# we are only going to apply the
# cleaning steps that will
...
...
@@ -422,30 +435,50 @@ def columnsToLoad(datafiles,
# Loop through all columns in
# the data, and build a list of
# the ones we want to load. The
# end result will be organised
# by the data files.
#
# We load the ID column for every
# file - it will appear first in
# the list for each input file.
# end result will be an ordered
# dict of { file : [column] }
# mappings, and a list of columns
# to drop.
drop
=
[]
load
=
collections
.
OrderedDict
(
[(
f
,
[
hc
])
for
f
,
hc
in
zip
(
datafiles
,
hdrcols
)])
load
=
collections
.
OrderedDict
([(
f
,
[])
for
f
in
datafiles
])
for
vid
,
cols
in
byvid
.
items
():
# index column - load it!
# (the fileinfo function gives
# index columns a variable ID
# of 0).
if
vid
==
0
:
for
col
in
cols
:
load
[
col
.
datafile
].
append
(
col
)
continue
# variable list provided, but this
# variable is not in it - don't load.
if
variables
is
not
None
and
vid
not
in
variables
:
drop
.
extend
(
cols
)
continue
#
column
is flagged as unknown,
# and we have been told to
#
ignore
unknown
column
s
#
variable
is flagged as unknown,
# and we have been told to
ignore
# unknown
variable
s
if
removeUnknown
and
vid
in
unknownVids
:
drop
.
extend
(
cols
)
continue
# column names/patterns specified -
# filter the list of columns based
# on whether they match any of the
# patterns specified.
if
colnames
is
not
None
:
for
col
in
list
(
cols
):
hits
=
[
fnmatch
.
fnmatch
(
col
.
name
,
pat
)
for
pat
in
colnames
]
if
not
any
(
hits
):
cols
.
remove
(
col
)
drop
.
append
(
col
)
# cleaning specified for this variable
if
vid
in
ppvids
:
...
...
@@ -470,9 +503,15 @@ def columnsToLoad(datafiles,
for
col
in
cols
:
load
[
col
.
datafile
].
append
(
col
)
# Final step - the column lists for each
# file are not necessarily ordered by
# their position in the file. Re-order
# them so they are.
for
fname
,
cols
in
list
(
load
.
items
()):
load
[
fname
].
sort
(
key
=
lambda
c
:
c
.
index
)
log
.
debug
(
'Identified %i / %i columns to be loaded'
,
sum
([
len
(
c
)
for
c
in
load
.
values
()]),
len
(
allcols
)
+
len
(
datafiles
))
sum
([
len
(
c
)
for
c
in
load
.
values
()]),
ncols
)
return
load
,
drop
...
...
@@ -501,8 +540,7 @@ def loadData(datafiles,
:arg columns: Dict of ``{ file : [Column] }`` mappings,
defining the columns to load, as returned by
:func:`columnsToLoad`. It is assumed that the
first column in each list is the index column.
:func:`columnsToLoad`.
:arg nrows: Number of rows to read at a time. Defaults to
:attr:`NUM_ROWS`.
...
...
@@ -654,8 +692,8 @@ def loadFile(fname,
in the file.
:arg toload: Sequence of :class:`.Column` objects describing the columns
that should be loaded
. It is assumed that the first column
in this list is the index column
.
that should be loaded
, as generated by
:func:`columnsToLoad`
.
:arg index: Column position of index column (starting from 0). Defaults
to 0.
...
...
@@ -701,6 +739,26 @@ def loadFile(fname,
def
shouldLoad
(
c
):
return
c
in
toloadnames
# The read_csv function requires the
# index argument to be specified
# relative to the usecols argument:
#
# - https://stackoverflow.com/a/45943627
# - https://github.com/pandas-dev/pandas/issues/9098
# - https://github.com/pandas-dev/pandas/issues/2654
#
# So here we make index relative to
# toloadnames.
#
# We also drop the index column from
# the toload list - after the call to
# read_csv, we want our Column list
# to align with the pandas Series
# objects (which won't include the
# index).
index
=
[
i
for
i
,
c
in
enumerate
(
toload
)
if
c
.
index
==
index
][
0
]
toload
.
pop
(
index
)
# Figure out suitable data types to
# store the data for each column.
# Only date/time columns are converted
...
...
@@ -718,11 +776,7 @@ def loadFile(fname,
else
:
header
=
None
log
.
debug
(
'Loading %u columns from %s: %s ...'
,
len
(
toload
),
fname
,
toloadnames
[:
5
])
# we can discard the index column
# from the toload list now
toload
=
[
c
for
c
in
toload
if
c
.
index
!=
index
]
len
(
toload
)
+
1
,
fname
,
toloadnames
[:
5
])
if
dialect
==
'whitespace'
:
dlargs
=
{
'delim_whitespace'
:
True
}
else
:
dlargs
=
{
'dialect'
:
dialect
}
...
...
@@ -737,6 +791,7 @@ def loadFile(fname,
with
warnings
.
catch_warnings
():
warnings
.
filterwarnings
(
'ignore'
,
module
=
'pandas.io.parsers'
)
warnings
.
filterwarnings
(
'ignore'
,
category
=
pd
.
errors
.
DtypeWarning
)
dfiter
=
pd
.
read_csv
(
fname
,
header
=
header
,
names
=
allcolnames
,
...
...
ukbparse/loadtables.py
View file @
e5df476b
...
...
@@ -415,34 +415,43 @@ def loadVariableTable(datafiles,
if
sniffers
is
None
:
sniffers
=
{}
if
indexes
is
None
:
indexes
=
{}
if
varfile
is
not
None
:
log
.
debug
(
'Loading variable table from %s'
,
varfile
)
vartable
=
pd
.
read_csv
(
varfile
,
'
\t
'
,
index_col
=
0
,
dtype
=
VARTABLE_DTYPES
,
converters
=
VARTABLE_CONVERTERS
)
else
:
vartable
=
pd
.
DataFrame
(
columns
=
VARTABLE_COLUMNS
[
1
:])
vartable
.
index
.
name
=
VARTABLE_COLUMNS
[
0
]
if
dcfile
is
not
None
:
log
.
debug
(
'Loading data coding table from %s'
,
dcfile
)
dctable
=
pd
.
read_csv
(
dcfile
,
'
\t
'
,
index_col
=
0
,
dtype
=
DCTABLE_DTYPES
,
converters
=
DCTABLE_CONVERTERS
)
else
:
dctable
=
pd
.
DataFrame
(
columns
=
DCTABLE_COLUMNS
[
1
:])
dctable
.
index
.
name
=
DCTABLE_COLUMNS
[
0
]
if
typefile
is
not
None
:
log
.
debug
(
'Loading type table from %s'
,
typefile
)
tytable
=
pd
.
read_csv
(
typefile
,
'
\t
'
,
index_col
=
0
,
converters
=
TYPETABLE_CONVERTERS
)
else
:
tytable
=
pd
.
DataFrame
(
columns
=
TYPETABLE_COLUMNS
[
1
:])
tytable
.
index
.
name
=
TYPETABLE_COLUMNS
[
0
]
def
load_table_file
(
fname
,
what
,
dtypes
,
converters
,
columns
):
with
warnings
.
catch_warnings
():
warnings
.
filterwarnings
(
'ignore'
,
category
=
pd
.
errors
.
ParserWarning
)
if
fname
is
not
None
:
log
.
debug
(
'Loading %s table from %s'
,
what
,
varfile
)
table
=
pd
.
read_csv
(
fname
,
'
\t
'
,
index_col
=
0
,
dtype
=
dtypes
,
converters
=
converters
)
else
:
table
=
pd
.
DataFrame
(
columns
=
columns
[
1
:])
table
.
index
.
name
=
columns
[
0
]
if
list
(
sorted
(
table
.
columns
))
!=
sorted
(
columns
[
1
:]):
raise
ValueError
(
'Missing/unrecognised columns in table file {} - '
'should be {}, but file contained {}.'
.
format
(
fname
,
columns
,
table
.
columns
))
return
table
vartable
=
load_table_file
(
varfile
,
'variable'
,
VARTABLE_DTYPES
,
VARTABLE_CONVERTERS
,
VARTABLE_COLUMNS
)
dctable
=
load_table_file
(
dcfile
,
'data coding'
,
DCTABLE_DTYPES
,
DCTABLE_CONVERTERS
,
DCTABLE_COLUMNS
)
tytable
=
load_table_file
(
typefile
,
'type'
,
TYPETABLE_DTYPES
,
TYPETABLE_CONVERTERS
,
TYPETABLE_COLUMNS
)
# Make sure data types are aligned,
# otherwise we may run into problems
...
...
ukbparse/main.py
View file @
e5df476b
...
...
@@ -11,13 +11,12 @@ import multiprocessing as mp
import
sys
import
shutil
import
logging
import
fnmatch
import
tempfile
import
warnings
import
datetime
import
calendar
import
pandas
as
pd
import
ukbparse
import
ukbparse.util
as
util
import
ukbparse.icd10
as
icd10
...
...
@@ -105,6 +104,7 @@ def main(argv=None):
try
:
with
util
.
timed
(
None
,
log
,
fmt
=
'Total time: %i minutes, %i seconds (%+iMB)'
):
dtable
,
unknowns
,
drop
=
doImport
(
args
,
pool
,
mgr
)
if
args
.
dry_run
:
...
...
@@ -175,10 +175,12 @@ def doImport(args, pool, mgr):
if
not
args
.
dry_run
and
args
.
import_all
:
variables
=
None
categories
=
None
columns
=
None
removeUnknown
=
None
else
:
variables
=
args
.
variable
categories
=
args
.
category
columns
=
args
.
column
removeUnknown
=
args
.
remove_unknown
# Import data
...
...
@@ -189,6 +191,7 @@ def doImport(args, pool, mgr):
proctable
=
proctable
,
cattable
=
cattable
,
variables
=
variables
,
colnames
=
columns
,
categories
=
categories
,
subjects
=
subjects
,
encoding
=
args
.
encoding
,
...
...
@@ -265,11 +268,13 @@ def finaliseColumns(dtable, args, unknowns):
vids
=
importing
.
restrictVariables
(
dtable
.
cattable
,
args
.
variable
,
args
.
category
)
# args.remove_unknown is only applied if
# variables were not already restricted
# by args.variable and/or args.category
# args.remove_unknown is only applied
# if variables/columns were not already
# restricted by args.variable,
# args.category, and or args.column
removeUnknown
=
all
((
vids
is
None
,
args
.
remove_unknown
,
args
.
column
is
None
,
len
(
unknowns
)
>
0
))
# apply removeUnknown
...
...
@@ -280,6 +285,15 @@ def finaliseColumns(dtable, args, unknowns):
if
vid
in
uvids
:
vids
.
remove
(
vid
)
# apply column patterns
if
args
.
column
is
not
None
:
remove
=
[]
for
col
in
list
(
dtable
.
allColumns
[
1
:]):
hits
=
[
fnmatch
.
fnmatch
(
col
.
name
,
pat
)
for
pat
in
args
.
column
]