Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
FSL
funpack
Commits
a460a146
Commit
a460a146
authored
Dec 28, 2021
by
Paul McCarthy
🚵
Browse files
ENH,RF: Implement excludeColnames - only support suffixes to avoid fnmatch
performance hit.
parent
b122004e
Changes
3
Hide whitespace changes
Inline
Side-by-side
funpack/importing/core.py
View file @
a460a146
...
...
@@ -104,8 +104,8 @@ def importData(fileinfo,
:arg colnames: List of names/glob-style wildcard patterns
specifying columns to import.
:arg excludeColnames: List of
names/glob-style wildcard patter
ns
specifying columns
to exclude.
:arg excludeColnames: List of
column name suffixes specifying colum
ns
to exclude.
:arg categories: List of category names to import
...
...
@@ -157,7 +157,8 @@ def importData(fileinfo,
cols
,
drop
=
filter
.
columnsToLoad
(
fileinfo
,
vartable
,
variables
,
colnames
)
colnames
,
excludeColnames
)
# Load those columns, merging
# multiple input files.
...
...
funpack/importing/filter.py
View file @
a460a146
...
...
@@ -66,20 +66,28 @@ def restrictVariables(cattable, variables, categories):
return
variables
def
columnsToLoad
(
fileinfo
,
vartable
,
variables
,
colnames
):
def
columnsToLoad
(
fileinfo
,
vartable
,
variables
,
colnames
=
None
,
excludeColnames
=
None
):
"""Determines which columns should be loaded from ``datafiles``.
Peeks at the first line of the data file (assumed to contain column names),
then uses the variable table to determine which of them should be loaded.
:arg fileinfo: :class:`.FileInfo` object describing the input file(s).
:arg fileinfo: :class:`.FileInfo` object describing the input
file(s).
:arg vartable: Variable table
:arg vartable:
Variable table
:arg variables: List of variables to load.
:arg variables:
List of variables to load.
:arg colnames: List of column names/glob-style wildcard patterns,
specifying columns to load.
:arg colnames: List of column names/glob-style wildcard patterns,
specifying columns to load.
:arg excludeColnames: List of column name suffixes specifying columns to
exclude. This overrides ``colnames``.
:returns: A tuple containing:
...
...
@@ -92,6 +100,9 @@ def columnsToLoad(fileinfo, vartable, variables, colnames):
*ignore*.
"""
if
excludeColnames
is
None
:
excludeColnames
=
[]
# We apply these cleaning steps by
# omitting the relevant columns.
loadFuncNames
=
[
'remove'
,
'keepVisits'
]
...
...
@@ -139,6 +150,15 @@ def columnsToLoad(fileinfo, vartable, variables, colnames):
load
[
col
.
datafile
].
append
(
col
)
continue
# excludeColnames takes precedence
# over all other column selection
# mechanisms
for
suf
in
excludeColnames
:
for
col
in
list
(
cols
):
if
col
.
name
.
endswith
(
suf
):
cols
.
remove
(
col
)
drop
.
append
(
col
)
# Figure out whether each
# column should be loaded.
# We load all columns which
...
...
@@ -163,7 +183,7 @@ def columnsToLoad(fileinfo, vartable, variables, colnames):
# if there are any glob patterns, do
# an exhaustive search (*very* slow)
if
any
(
[
_ispattern
(
c
)
for
c
in
colnames
]
):
if
any
(
_ispattern
(
c
)
for
c
in
colnames
):
for
i
,
col
in
enumerate
(
cols
):
hits
=
[
fnmatch
.
fnmatch
(
col
.
name
,
pat
)
for
pat
in
colnames
]
loadflags
[
i
]
=
loadflags
[
i
]
or
any
(
hits
)
...
...
funpack/main.py
View file @
a460a146
...
...
@@ -221,7 +221,7 @@ def doImport(args, mgr):
cattable
=
cattable
,
variables
=
variables
,
colnames
=
columns
,
excludeColnames
=
'*'
+
suffix
,
excludeColnames
=
[
suffix
]
,
categories
=
categories
,
subjects
=
subjects
,
subjectExprs
=
exprs
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment