Commit 9a299dbd authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

RF: remove unused/pointless export options

parent 902a8b39
......@@ -38,7 +38,6 @@ DEFAULT_MERGE_STRATEGY = importing.MERGE_STRATEGY
DEFAULT_EXPORT_FORMAT = exporting.EXPORT_FORMAT
AVAILABLE_MERGE_AXES = importing.MERGE_AXIS_OPTIONS
AVAILABLE_MERGE_STRATEGIES = importing.MERGE_STRATEGY_OPTIONS
DEFAULT_COLUMN_PATTERN = exporting.COLUMN_PATTERN
DEFAULT_TSV_SEP = exporting_tsv.TSV_SEP
DEFAULT_HDF5_KEY = exporting_hdf5.HDF5_KEY
DEFAULT_HDF5_STYLE = exporting_hdf5.HDF5_STYLE
......@@ -112,14 +111,9 @@ CLI_ARGUMENTS = collections.OrderedDict((
'metavar' : ('VARS', 'PROCS')})]),
('Export options', [
(('f', 'format'), {'default' : DEFAULT_EXPORT_FORMAT}),
(('cp', 'column_pattern'), {'default' : DEFAULT_COLUMN_PATTERN}),
(('rc', 'rename_column'), {'action' : 'append',
'nargs' : 2,
'metavar' : ('OLD_NAME', 'NEW_NAME')}), # noqa
(('oi', 'output_id_column'), {}),
(('edf', 'date_format'), {'default' : 'default'}),
(('etf', 'time_format'), {'default' : 'default'})]),
(('f', 'format'), {'default' : DEFAULT_EXPORT_FORMAT}),
(('edf', 'date_format'), {'default' : 'default'}),
(('etf', 'time_format'), {'default' : 'default'})]),
('TSV export options', [
(('ts', 'tsv_sep'), {'default' : DEFAULT_TSV_SEP}),
......@@ -324,17 +318,6 @@ CLI_ARGUMENT_HELP = {
'format' :
'Output file format (default: "{}").'.format(DEFAULT_EXPORT_FORMAT),
'column_pattern' :
'Pattern defining output column names (default: "{}").'.format(
DEFAULT_COLUMN_PATTERN),
'rename_column' :
'Rename the given column instead of applying --column_pattern. Can '
'be used multiple times',
'output_id_column' :
'Name of ID column in output file.',
'date_format' :
'Formatter to use for date variables (default: "default").',
......@@ -741,12 +724,6 @@ def parseArgs(argv=None, namespace=None):
try: args.category[i] = int(c)
except ValueError: continue
# convert rename_column from a sequence of
# [(oldname, newname)] pairs into a dict of
# { oldname : newname } mappings.
if args.rename_column is not None:
args.rename_column = dict(args.rename_column)
def numlist(s):
return np.fromstring(s, sep=',', dtype=np.float)
......
......@@ -22,76 +22,10 @@ from . import custom
log = logging.getLogger(__name__)
COLUMN_PATTERN = '{name}'
"""Default output column naming pattern. A python-style formatting string
which may refer to:
- ``'{variable}'``
- ``'{name}'``
- ``'{description}'``
- ``'{visit}'``
- ``'{instance}'``
"""
EXPORT_FORMAT = 'tsv'
"""Default export format."""
def genColumnNames(dtable, colpat=None, colmap=None):
"""Generate column names to use in the output file.
:arg dtable: :class:`.DataTable` containing the data to export.
:arg colpat: Output column name pattern. If not provided, defaults to
:attr:`COLUMN_PATTERN`.
:arg colmap: Dictionary containing ``{variable : name}`` mappings.
:returns: A dictionary containing ``{incolumn : outcolumn}`` mappings.
"""
if colpat is None: colpat = COLUMN_PATTERN
if colmap is None: colmap = {}
variables = dtable.variables
newcols = {}
for var in variables:
# ID column
if var == 0:
continue
desc = dtable.vartable.loc[var, 'Description']
visits = dtable.visits(var)
instances = dtable.instances(var)
if pd.isna(desc):
desc = ''
for visit, instance in it.product(visits, instances):
for oldcol in dtable.columns(var, visit, instance):
newcol = colmap.get(oldcol.name, None)
if newcol is None:
newcol = colpat.format(variable=var,
visit=visit,
name=oldcol.name,
description=desc,
instance=instance)
newcols[oldcol.name] = newcol
return newcols
def exportData(dtable,
outfile,
colpat=None,
colmap=None,
idcol=None,
fileFormat=None,
**kwargs):
"""Export the data contained in ``dtable`` to ``outfile`` using the
......@@ -101,16 +35,6 @@ def exportData(dtable,
:arg outfile: File to export data to.
:arg colpat: Output column name pattern - see
:func:`.genColumnNames`.
:arg colmap: Dictionary containing ``{variable : name}`` mappings,
to be used as the ``variable`` in ``colpat`` when
generating output column names.
:arg idcol: Name to use for ID column. Defaults to the original
index column name (``pandas.DataFrame.index.name``).
:arg fileFormat: File format to export to - the name of a ``@exporter``
plugin. If not provided, defaults to
:attr:`EXPORT_FORMAT`
......@@ -122,13 +46,10 @@ def exportData(dtable,
raise RuntimeError('No data to export (rows: {}, columns: '
'{})'.format(*dtable.shape))
if fileFormat is None: fileFormat = EXPORT_FORMAT
if idcol is None: idcol = dtable.index.name
colnames = genColumnNames(dtable, colpat, colmap)
if fileFormat is None:
fileFormat = EXPORT_FORMAT
custom.runExporter(
fileFormat, dtable, outfile, idcol, colnames, **kwargs)
custom.runExporter(fileFormat, dtable, outfile, **kwargs)
@custom.formatter('default')
......
......@@ -38,8 +38,6 @@ HDF5_STYLE = 'pandas'
@custom.exporter('hdf5')
def exportHDF5(dtable,
outfile,
idcol,
colnames,
key=None,
style=None,
**kwargs):
......@@ -54,10 +52,6 @@ def exportHDF5(dtable,
:arg outfile: File to output to
:arg idcol: Name to use for the subject ID column
:arg colnames: Dict containing ``{oldcol : newcol}`` mappings
:arg key: Name to give the HDF5 group. Defaults to :attr:`HDF5_KEY`.
:arg style: HDF5 style to use (see above). Defaults to
......@@ -73,24 +67,18 @@ def exportHDF5(dtable,
if style == 'pandas':
exportPandasStyle(dtable,
outfile,
idcol,
colnames,
key=key,
**kwargs)
elif style == 'funpack':
exportFunpackStyle(dtable,
outfile,
idcol,
colnames,
key=key,
**kwargs)
def exportPandasStyle(dtable,
outfile,
idcol,
colnames,
key,
numRows=None,
**kwargs):
......@@ -105,10 +93,6 @@ def exportPandasStyle(dtable,
:arg outfile: File to output to
:arg idcol: Name to use for the subject ID column
:arg colnames: Dict containing ``{oldcol : newcol}`` mappings
:arg key: Name to give the HDF5 group.
:arg numRows: Number of rows to write at time (only used for
......@@ -132,17 +116,12 @@ def exportPandasStyle(dtable,
cidxs = index[cstart:cend]
towrite = dtable[cidxs, :]
towrite.index.name = idcol
towrite.rename(columns=colnames, inplace=True)
if chunki == 0: s.put( key, towrite, format='table')
else: s.append(key, towrite, format='table')
def exportFunpackStyle(dtable,
outfile,
idcol,
colnames,
key,
dateFormat=None,
timeFormat=None):
......@@ -156,10 +135,6 @@ def exportFunpackStyle(dtable,
:arg outfile: File to output to
:arg idcol: Name to use for the subject ID column
:arg colnames: Dict containing ``{oldcol : newcol}`` mappings
:arg key: Name to give the HDF5 group.
:arg dateFormat: Name of formatter to use for date columns.
......@@ -177,13 +152,13 @@ def exportFunpackStyle(dtable,
with h5py.File(outfile, 'w') as f:
name = '/'.join((key, idcol))
name = '/'.join((key, dtable.index.name))
data = np.asarray(dtable.index)
f.create_dataset(name, data=data)
for col in dtable.dataColumns:
name = '/'.join((key, colnames.get(col.name, col.name)))
name = '/'.join((key, col.name))
series = dtable[:, col.name]
vid = col.basevid
formatter = None
......
......@@ -38,8 +38,6 @@ default value for its ``numRows`` argument.
@custom.exporter('tsv')
def exportTSV(dtable,
outfile,
idcol,
colnames,
sep=None,
missingValues=None,
dateFormat=None,
......@@ -58,10 +56,6 @@ def exportTSV(dtable,
:arg outfile: File to output to
:arg idcol: Name to use for the subject ID column
:arg colnames: Dict containing ``{oldcol : newcol}`` mappings
:arg sep: Separator character to use. Defaults to
:attr:`TSV_SEP`
......@@ -113,8 +107,6 @@ def exportTSV(dtable,
[True] + [False] * (nchunks - 1),
range(nchunks))
func = ft.partial(writeDataFrame,
idcol=idcol,
colnames=colnames,
sep=sep,
missingValues=missingValues,
dateFormat=dateFormat,
......@@ -148,8 +140,6 @@ def writeDataFrame(dtable,
nonNumericFile,
header,
chunki,
idcol,
colnames,
sep,
missingValues,
dateFormat,
......@@ -168,10 +158,6 @@ def writeDataFrame(dtable,
:arg header: If ``True``, write the header row (column names).
:arg idcol: Name to use for the subject ID column
:arg colnames: Dict containing ``{oldcol : newcol}`` mappings
:arg chunki: Chunk index (used for logging)
:arg sep: Separator character to use. Defaults to
......@@ -209,7 +195,7 @@ def writeDataFrame(dtable,
# separate out into numeric
# and non-numeric
for col in columns:
name = colnames.get(col.name, col.name)
name = col.name
series = formatColumn(
col, dtable, dateFormat, timeFormat, formatters, chunki)
......@@ -235,8 +221,8 @@ def writeDataFrame(dtable,
len(nonNumericCols), nonNumericFile,
len(numericCols), outfile)
if not header:
idcol = None
if header: idcol = towrite.index.name
else: idcol = None
numericChunk.to_csv(outfile,
sep=sep,
......
......@@ -278,9 +278,6 @@ def doExport(dtable, args):
args.outfile,
# General export options
colpat=args.column_pattern,
colmap=args.rename_column,
idcol=args.output_id_column,
fileFormat=args.format,
dateFormat=args.date_format,
timeFormat=args.time_format,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment