Commit ed5f35ce authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

RF: Adjust maxcat if it's bigger than the data size

parent ce677115
......@@ -103,7 +103,7 @@ def isSparse(data,
ntotal = len(data)
npresent = len(present)
def fixabs(val, isabs):
def fixabs(val, isabs, repl=npresent):
# Turn proportion into
# an absolute count
......@@ -112,10 +112,8 @@ def isSparse(data,
# ignore absolute thresholds if
# total data length is less than it
elif len(data) < val:
return npresent
else:
return val
elif len(data) < val: return repl
else: return val
iscategorical = ctype in (util.CTYPES.integer,
util.CTYPES.categorical_single,
......@@ -137,7 +135,7 @@ def isSparse(data,
# for categorical types
if iscategorical and ((maxcat is not None) or (mincat is not None)):
if maxcat is not None: maxcat = fixabs(maxcat, abscat)
if maxcat is not None: maxcat = fixabs(maxcat, abscat, npresent + 1)
if mincat is not None: mincat = fixabs(mincat, abscat)
# mincat - smallest category is too small
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment