Commit d70d6d8f authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

BF: Do not run minstd test on non-numerics. Use pd.unique instead of

np.unique, as the latter can bork with mixed type data (although I couldn't
reproduce the specific crash - perhaps it's np version specific)
parent 8f677077
......@@ -21,8 +21,9 @@ import logging
import collections
import itertools as it
import numpy as np
import pandas as pd
import numpy as np
import pandas as pd
import pandas.api.types as pdtypes
import funpack.util as util
......@@ -131,7 +132,8 @@ def isSparse(data,
util.CTYPES.continuous,
util.CTYPES.integer,
util.CTYPES.categorical_single,
util.CTYPES.categorical_multiple)
util.CTYPES.categorical_multiple) and \
pdtypes.is_numeric_dtype(data)
# not enough values
if minpres is not None:
......@@ -153,7 +155,7 @@ def isSparse(data,
# mincat - smallest category is too small
# maxcat - one category is too dominant
uniqvals = np.unique(present)
uniqvals = pd.unique(present)
uniqcounts = [sum(present == u) for u in uniqvals]
nmincat = min(uniqcounts)
nmaxcat = max(uniqcounts)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment