Commit 6146eb8c authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

ENH: infra for new --add_aux_vars option

parent 5204a6f9
......@@ -74,6 +74,7 @@ CLI_ARGUMENTS = collections.OrderedDict((
(('ex', 'exclude'), {'action' : 'append'}),
(('ev', 'exclude_variable'), {'action' : 'append'}),
(('ec', 'exclude_category'), {'action' : 'append'}),
(('aa', 'add_aux_vars'), {'action' : 'store_true'}),
(('iv', 'index_visits'), {'action' : 'store_true'}),
(('tt', 'trust_types'), {'action' : 'store_true'})]),
......@@ -294,6 +295,12 @@ CLI_ARGUMENT_HELP = {
'Category ID or label to exclude. Takes precedence over the --variable '
'and --category options. Can be used multiple times.',
'add_aux_vars' :
'Automatically import auxillary variables which are specified in '
'processing rules if not already selected. Note that this only affects '
'auxillary variables - see the "take" option to the binariseCategorical '
'processing function for an example of an auxillary variable.',
'index_visits' :
'If set, the data is re-arranged so that visits form part of the row '
'indices, rather than being stored in separate columns for each variable. '
......
......@@ -78,6 +78,7 @@ def importData(fileinfo,
mergeStrategy=None,
indexVisits=False,
dropNaRows=False,
addAuxVars=False,
njobs=1,
mgr=None,
dryrun=False):
......@@ -140,6 +141,12 @@ def importData(fileinfo,
:arg dropNaRows: If ``True``, rows which do not contain data for any
columns are not loaded.
:arg addAuxVars: If ``True``, data fields which are referred to in
the processing rules are selected for import if
present in the input file(s) and not already
selected, and . See the ``take`` argument to
:func:`.binariseCategorical` for an example.
:arg njobs: Number of processes to use for parallelising tasks.
:arg mgr: :class:`multiprocessing.Manager` object for
......@@ -157,9 +164,17 @@ def importData(fileinfo,
loaded from each input file.
"""
# generate a list of variable IDs to include/exclude,
# from variable/category selection/deselection options
variables, excludevars = filter.restrictVariables(
cattable, variables, categories, excludeVariables, excludeCategories)
# make sure auxillary variables are to be imported
# (e.g. binariseCategorical(take))
if addAuxVars:
variables, excludevars = filter.addAuxillaryVariables(
fileinfo, proctable, variables, excludevars)
# Figure out which columns to load
cols, drop = filter.columnsToLoad(fileinfo,
vartable,
......
......@@ -208,7 +208,6 @@ def doImport(args, mgr):
prependProcess=args.prepend_process,
appendProcess=args.append_process)
subjects, exprs = args.subject
if suffix is None: excludeColnames = []
else: excludeColnames = [suffix]
......@@ -234,6 +233,7 @@ def doImport(args, mgr):
mergeStrategy=args.merge_strategy,
indexVisits=args.index_visits,
dropNaRows=args.drop_na_rows,
addAuxVars=args.add_aux_vars,
njobs=args.num_jobs,
mgr=mgr,
dryrun=args.dry_run)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment