Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
FSL
funpack
Commits
d2418857
Commit
d2418857
authored
Jun 21, 2022
by
Paul McCarthy
🚵
Browse files
ENH: implement --add_aux_vars
parent
6146eb8c
Changes
4
Hide whitespace changes
Inline
Side-by-side
funpack/fileinfo.py
View file @
d2418857
...
...
@@ -9,11 +9,14 @@
"""
import
itertools
as
it
import
io
import
csv
import
logging
import
collections
import
itertools
as
it
import
functools
as
ft
import
io
import
csv
import
logging
import
collections
from
typing
import
Sequence
import
funpack.util
as
util
import
funpack.custom
as
custom
...
...
@@ -344,6 +347,25 @@ class FileInfo:
return
list
(
self
.
__datafiles
)
@
property
@
ft
.
lru_cache
()
def
allColumns
(
self
)
->
Sequence
[
datatable
.
Column
]:
"""Returns a list containing all columns from all data files. The result
is just a concatenation of the lists returned by :meth:`columns` for each
data file.
"""
return
list
(
it
.
chain
(
*
[
self
.
columns
(
df
)
for
df
in
self
.
datafiles
]))
@
property
@
ft
.
lru_cache
()
def
allVariables
(
self
)
->
Sequence
[
int
]:
"""Returns a list containing all variable IDs from all data files.
Duplicates are removed, and the IDs are sorted.
"""
return
sorted
({
c
.
vid
for
c
in
self
.
allColumns
})
def
dialect
(
self
,
datafile
):
"""Return the CSV dialect type for the given ``datafile``. """
return
self
.
__dialects
[
datafile
]
...
...
funpack/importing/filter.py
View file @
d2418857
...
...
@@ -95,6 +95,59 @@ def restrictVariables(
return
variables
,
exclude
def
addAuxillaryVariables
(
fileinfo
:
finfo
.
FileInfo
,
proctable
:
pd
.
DataFrame
,
variables
:
Sequence
[
int
]
=
None
,
exclude
:
Sequence
[
int
]
=
None
)
->
Tuple
[
Optional
[
Sequence
[
int
]],
Optional
[
Sequence
[
int
]]]:
"""Checks that auxillary variables referred to by processing rules are to
be loaded.
:arg fileinfo: :class:`.FileInfo` object describing the input
file(s).
:arg proctable: Processing table
:arg variables: Variables to load, as returnened by
:func:`restrictVariables`
:arg exclude: Variables to exclude, as returnened by
:func:`restrictVariables`
:returns: A tuple containing:
- a sequence of variables to load, or ``None`` if
all variables should be loaded.
- a sequence of variables to exclude, or ``None``
if no variables should be excluded.
"""
if
variables
is
None
and
exclude
is
None
:
return
None
,
None
if
exclude
is
None
:
exclude
=
[]
# each entry in the processing table
# is an ordered dictionary of {name :
# Process} mappings
for
procs
in
proctable
[
'Process'
]:
for
proc
in
procs
.
values
():
auxvids
=
proc
.
auxillaryVariables
()
for
vid
in
auxvids
:
# load if not already being loaded,
# and if present in input file(s)
if
vid
in
exclude
:
exclude
.
remove
(
vid
)
if
variables
is
not
None
and
\
vid
not
in
variables
and
\
vid
in
fileinfo
.
allVariables
:
variables
.
append
(
vid
)
if
len
(
exclude
)
==
0
:
exclude
=
None
return
variables
,
exclude
def
columnsToLoad
(
fileinfo
,
vartable
,
variables
,
...
...
funpack/processing.py
View file @
d2418857
...
...
@@ -571,10 +571,6 @@ class Process:
This property is set via a ``filterMissing`` argument passed to the
processor decorator function. Its default value is ``True``.
Note: This is a hack which is only used by the
:func:`.processing_functions.binariseCategorical` function, and which
is in place because that function used to be parallelised differently.
"""
return
custom
.
args
(
self
.
__ptype
,
self
.
__name
).
get
(
'filterMissing'
,
True
)
...
...
@@ -588,10 +584,6 @@ class Process:
The names of any arguments which contain auxillary variables are
specified via the ``auxvids`` argument to the processor decorator
function.
Note: This is a hack which is only used by the
:func:`.processing_functions.binariseCategorical` function, and which
is in place because that function used to be parallelised differently.
"""
auxargs
=
custom
.
args
(
self
.
__ptype
,
self
.
__name
).
get
(
'auxvids'
,
[])
...
...
funpack/processing_functions.py
View file @
d2418857
...
...
@@ -377,12 +377,6 @@ def _pairwiseRemoveIfRedundant(dtable, data, corrthres, nathres=None):
# are not present in the data from the list
# of vids that are passed in - we do our
# own check here.
#
# Both of the above are ridiculous hacks
# which are in place because this function,
# and FUNPACK, used to parallelise things
# differently, and to preserve backwards
# compatibility.
@
custom
.
processor
(
auxvids
=
[
'take'
],
filterMissing
=
False
)
def
binariseCategorical
(
dtable
,
vids
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment