Commit d2418857 authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

ENH: implement --add_aux_vars

parent 6146eb8c
......@@ -9,11 +9,14 @@
"""
import itertools as it
import io
import csv
import logging
import collections
import itertools as it
import functools as ft
import io
import csv
import logging
import collections
from typing import Sequence
import funpack.util as util
import funpack.custom as custom
......@@ -344,6 +347,25 @@ class FileInfo:
return list(self.__datafiles)
@property
@ft.lru_cache()
def allColumns(self) -> Sequence[datatable.Column]:
"""Returns a list containing all columns from all data files. The result
is just a concatenation of the lists returned by :meth:`columns` for each
data file.
"""
return list(it.chain(*[self.columns(df) for df in self.datafiles]))
@property
@ft.lru_cache()
def allVariables(self) -> Sequence[int]:
"""Returns a list containing all variable IDs from all data files.
Duplicates are removed, and the IDs are sorted.
"""
return sorted({c.vid for c in self.allColumns})
def dialect(self, datafile):
"""Return the CSV dialect type for the given ``datafile``. """
return self.__dialects[datafile]
......
......@@ -95,6 +95,59 @@ def restrictVariables(
return variables, exclude
def addAuxillaryVariables(fileinfo : finfo.FileInfo,
proctable : pd.DataFrame,
variables : Sequence[int] = None,
exclude : Sequence[int] = None
) -> Tuple[Optional[Sequence[int]], Optional[Sequence[int]]]:
"""Checks that auxillary variables referred to by processing rules are to
be loaded.
:arg fileinfo: :class:`.FileInfo` object describing the input
file(s).
:arg proctable: Processing table
:arg variables: Variables to load, as returnened by
:func:`restrictVariables`
:arg exclude: Variables to exclude, as returnened by
:func:`restrictVariables`
:returns: A tuple containing:
- a sequence of variables to load, or ``None`` if
all variables should be loaded.
- a sequence of variables to exclude, or ``None``
if no variables should be excluded.
"""
if variables is None and exclude is None:
return None, None
if exclude is None:
exclude = []
# each entry in the processing table
# is an ordered dictionary of {name :
# Process} mappings
for procs in proctable['Process']:
for proc in procs.values():
auxvids = proc.auxillaryVariables()
for vid in auxvids:
# load if not already being loaded,
# and if present in input file(s)
if vid in exclude:
exclude.remove(vid)
if variables is not None and \
vid not in variables and \
vid in fileinfo.allVariables:
variables.append(vid)
if len(exclude) == 0:
exclude = None
return variables, exclude
def columnsToLoad(fileinfo,
vartable,
variables,
......
......@@ -571,10 +571,6 @@ class Process:
This property is set via a ``filterMissing`` argument passed to the
processor decorator function. Its default value is ``True``.
Note: This is a hack which is only used by the
:func:`.processing_functions.binariseCategorical` function, and which
is in place because that function used to be parallelised differently.
"""
return custom.args(self.__ptype, self.__name).get('filterMissing', True)
......@@ -588,10 +584,6 @@ class Process:
The names of any arguments which contain auxillary variables are
specified via the ``auxvids`` argument to the processor decorator
function.
Note: This is a hack which is only used by the
:func:`.processing_functions.binariseCategorical` function, and which
is in place because that function used to be parallelised differently.
"""
auxargs = custom.args(self.__ptype, self.__name).get('auxvids', [])
......
......@@ -377,12 +377,6 @@ def _pairwiseRemoveIfRedundant(dtable, data, corrthres, nathres=None):
# are not present in the data from the list
# of vids that are passed in - we do our
# own check here.
#
# Both of the above are ridiculous hacks
# which are in place because this function,
# and FUNPACK, used to parallelise things
# differently, and to preserve backwards
# compatibility.
@custom.processor(auxvids=['take'], filterMissing=False)
def binariseCategorical(dtable,
vids,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment