Commit 7fd7041b authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

ENH: Speed up binariseCategorical with take by > 1000%

parent 426683af
......@@ -319,13 +319,18 @@ def binariseCategorical(data, minpres=None, take=None):
# every column in it has the same dtype
binshape = (len(data), len(uniq))
if take is None: bindata = np.zeros(binshape, dtype=np.uint8)
else: bindata = np.zeros(binshape, dtype=take.dtypes[0])
else: bindata = np.full( binshape, np.nan,
dtype=take.dtypes[0])
for i, v in enumerate(uniq):
mask = data == v
if take is None: values = mask.any(axis=1)
else: values = take.where(mask.values).apply(first, axis=1)
bindata[:, i] = values
if take is None:
bindata[:, i] = mask.any(axis=1)
else:
rowmask = mask.any(axis=1)
idxs = np.argmax(mask.values, axis=1)
values = take.values[np.arange(take.shape[0]), idxs]
bindata[rowmask, i] = values[rowmask]
return bindata, uniq
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment