Commit 643e9195 authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

ENH: Column class now has a "metadata" attribute for storing arbitrary

stuff. Processing functions have the option of returning metadata for newly
created columns. Used by binariseCategorical.
parent 0bb5527d
...@@ -42,7 +42,8 @@ class Column(object): ...@@ -42,7 +42,8 @@ class Column(object):
index, index,
vid=None, vid=None,
visit=0, visit=0,
instance=0): instance=0,
metadata=None):
self.datafile = datafile self.datafile = datafile
self.name = name self.name = name
...@@ -50,6 +51,7 @@ class Column(object): ...@@ -50,6 +51,7 @@ class Column(object):
self.vid = vid self.vid = vid
self.visit = visit self.visit = visit
self.instance = instance self.instance = instance
self.metadata = metadata
def __str__(self): def __str__(self):
...@@ -356,7 +358,7 @@ class DataTable(object): ...@@ -356,7 +358,7 @@ class DataTable(object):
else: self.__varmap.pop(col.vid) else: self.__varmap.pop(col.vid)
def addColumns(self, series, vids=None): def addColumns(self, series, vids=None, meta=None):
"""Adds one or more new columns to the data set. """Adds one or more new columns to the data set.
:arg series: Sequence of ``pandas.Series`` objects containing the :arg series: Sequence of ``pandas.Series`` objects containing the
...@@ -365,10 +367,12 @@ class DataTable(object): ...@@ -365,10 +367,12 @@ class DataTable(object):
:arg vids: Sequence of variables each new column is associated :arg vids: Sequence of variables each new column is associated
with. If ``None`` (the default), variable IDs are with. If ``None`` (the default), variable IDs are
automatically assigned. automatically assigned.
:arg meta: Sequence of metadata associated with each new column.
""" """
if vids is None: if vids is None: vids = [None] * len(series)
vids = [None] * len(series) if meta is None: meta = [None] * len(series)
for s in series: for s in series:
if s.name in self.__data.columns: if s.name in self.__data.columns:
...@@ -390,13 +394,13 @@ class DataTable(object): ...@@ -390,13 +394,13 @@ class DataTable(object):
# a vid for each column starting from here. # a vid for each column starting from here.
startvid = max(max(self.variables) + 1, AUTO_VARIABLE_ID) startvid = max(max(self.variables) + 1, AUTO_VARIABLE_ID)
for s, idx, vid in zip(series, idxs, vids): for s, idx, vid, m in zip(series, idxs, vids, meta):
if vid is None: if vid is None:
vid = startvid vid = startvid
startvid = startvid + 1 startvid = startvid + 1
col = Column(None, s.name, idx, vid, 0, 0) col = Column(None, s.name, idx, vid, 0, 0, m)
self.__data[s.name] = s self.__data[s.name] = s
# new column on existing variable. # new column on existing variable.
......
...@@ -136,10 +136,7 @@ def runProcess(proc, dtable, vids): ...@@ -136,10 +136,7 @@ def runProcess(proc, dtable, vids):
remove = [] remove = []
add = [] add = []
addvids = [] addvids = []
addmeta = []
def genvids(result, vi, si):
if result[vi] is None: return [None] * len(result[si])
else: return result[vi]
for result in results: for result in results:
if result is None: if result is None:
...@@ -152,15 +149,20 @@ def runProcess(proc, dtable, vids): ...@@ -152,15 +149,20 @@ def runProcess(proc, dtable, vids):
# series/vids to add # series/vids to add
if len(result) == 2: if len(result) == 2:
add .extend( result[0]) add .extend(result[0])
addvids.extend(genvids(result, 1, 0)) addvids.extend(result[1])
# columns to remove, and # columns to remove, and
# series/vids to add # series/vids to add
elif len(result) == 3: elif len(result) in (3, 4):
remove .extend( result[0])
add .extend( result[1]) if len(result) == 3: meta = [None] * len(result[1])
addvids.extend(genvids(result, 2, 1)) else: meta = result[3]
remove .extend(result[0])
add .extend(result[1])
addvids.extend(result[2])
addmeta.extend(meta)
else: else:
raise error raise error
...@@ -170,7 +172,7 @@ def runProcess(proc, dtable, vids): ...@@ -170,7 +172,7 @@ def runProcess(proc, dtable, vids):
else: else:
raise error raise error
if len(add) > 0: dtable.addColumns(add, addvids) if len(add) > 0: dtable.addColumns(add, addvids, meta)
if len(remove) > 0: dtable.removeColumns(remove) if len(remove) > 0: dtable.removeColumns(remove)
......
...@@ -43,6 +43,12 @@ Furthermore, all processing functions must return one of the following: ...@@ -43,6 +43,12 @@ Furthermore, all processing functions must return one of the following:
- List of ``Series`` to be added - List of ``Series`` to be added
- List of variable IDs for each new ``Series``. - List of variable IDs for each new ``Series``.
- A ``tuple`` of length 3, containing the above, and:
- List of metadata associated with each of the new ``Series``. This will
be added to the :class:`.Column` objects that represent each of the new
``Series``.
The following processing functions are defined: The following processing functions are defined:
.. autosummary:: .. autosummary::
...@@ -217,6 +223,7 @@ def binariseCategorical(dtable, ...@@ -217,6 +223,7 @@ def binariseCategorical(dtable,
toremove = [] toremove = []
newseries = [] newseries = []
newvids = [] newvids = []
newmeta = []
for vid in vids: for vid in vids:
...@@ -261,13 +268,14 @@ def binariseCategorical(dtable, ...@@ -261,13 +268,14 @@ def binariseCategorical(dtable,
} }
newvids .append(vid) newvids .append(vid)
newmeta .append(val)
newseries.append(pd.Series( newseries.append(pd.Series(
col, col,
index=dtable.index, index=dtable.index,
name=nameFormat.format(**fmtargs))) name=nameFormat.format(**fmtargs)))
if replace: return toremove, newseries, newvids if replace: return toremove, newseries, newvids, newmeta
else: return newseries, newvids else: return [], newseries, newvids, newmeta
@custom.processor() @custom.processor()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment