Commit ce677115 authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

TEST: New mincat test, adjust others

parent 5a5cf7d5
......@@ -15,7 +15,7 @@ import ukbparse.main as main
from . import tempdir, patch_logging
#@patch_logging
@patch_logging
def test_binariseCategorical_outputs_go_through_sparsity_check():
data = tw.dedent("""
......@@ -66,7 +66,7 @@ def test_binariseCategorical_outputs_go_through_sparsity_check():
'-vf variables.tsv '
'-apr 1 binariseCategorical(nameFormat="{vid}-{value}") '
'-apr 2 binariseCategorical(nameFormat="{vid}-{value}") '
'-apr all removeIfSparse(maxcat=0.8) '
'-apr all removeIfSparse(maxcat=0.8,abscat=False) '
'out.tsv data.tsv'.split())
with open('out.tsv', 'rt') as f:
......
......@@ -35,23 +35,17 @@ def test_isSparse_minpres():
absres = core.isSparse(
data, util.CTYPES.continuous, minpres=threshold * size)
propres = core.isSparse(
data, util.CTYPES.continuous, minpres=threshold, absolute=False)
data, util.CTYPES.continuous, minpres=threshold, abspres=False)
if expected:
expcause = 'minpres'
expabsval = size - len(missing)
exppropval = (size - len(missing)) / size
expcause = 'minpres'
expval = size - len(missing)
else:
expcause = None
expabsval = None
exppropval = None
assert absres == (expected, expcause, expabsval)
assert propres[:2] == (expected, expcause)
if exppropval is not None:
assert np.isclose(propres[2], exppropval)
else:
assert propres[2] is None
expcause = None
expval = None
assert absres == (expected, expcause, expval)
assert propres == (expected, expcause, expval)
# minpres should be ignored if
# number of points in data is
......@@ -104,14 +98,17 @@ def test_isSparse_minstd():
def test_isSparse_maxcat():
size = 20
actualmaxcats = np.linspace(1.0 / size, 1, size)[::2]
maxcats = np.linspace(1.0 / size, 1, size)[::2]
actualmaxcats = np.arange(1, 21, 2)
maxcats = np.arange(1, 21, 2)
dtypes = [util.CTYPES.integer,
util.CTYPES.categorical_single,
util.CTYPES.categorical_multiple]
for actualmaxcat, maxcat in it.product(actualmaxcats, maxcats):
iamc = int(round(actualmaxcat * size))
data = np.arange(size)
data[:iamc] = size + 1
data[:actualmaxcat] = size + 1
data = pd.Series(data)
......@@ -127,29 +124,55 @@ def test_isSparse_maxcat():
maxcat=maxcat)
assert result == (False, None, None)
result = core.isSparse(data, util.CTYPES.integer,
maxcat=maxcat)
assert result[:2] == expected[:2]
if expected[2] is None:
assert result[2] is None
else:
assert np.isclose(result[2], expected[2])
for dt in dtypes:
result = core.isSparse(data, util.CTYPES.categorical_single,
maxcat=maxcat)
assert result[:2] == expected[:2]
if expected[2] is None:
assert result[2] is None
else:
assert np.isclose(result[2], expected[2])
maxcatprop = maxcat / len(data)
result = core.isSparse(data, util.CTYPES.categorical_multiple,
maxcat=maxcat)
assert result[:2] == expected[:2]
if expected[2] is None:
assert result[2] is None
resultabs = core.isSparse(data, dt, maxcat=maxcat)
resultprop = core.isSparse(data, dt, maxcat=maxcatprop,
abscat=False)
assert resultabs == expected
assert resultprop == expected
def test_isSparse_mincat():
size = 100
actualmincats = np.arange(1, 21, 2)
mincats = np.arange(1, 21, 2)
dtypes = [util.CTYPES.integer,
util.CTYPES.categorical_single,
util.CTYPES.categorical_multiple]
for actualmincat, mincat in it.product(actualmincats, mincats):
data = np.zeros(size)
data[:actualmincat] = size + 1
data = pd.Series(data)
if actualmincat < mincat:
expected = (True, 'mincat', actualmincat)
else:
assert np.isclose(result[2], expected[2])
expected = (False, None, None)
# test should only be applied for integer/categoricals
result = core.isSparse(data, util.CTYPES.continuous,
mincat=mincat)
assert result == (False, None, None)
for dt in dtypes:
mincatprop = mincat / len(data)
resultabs = core.isSparse(data, dt, mincat=mincat)
resultprop = core.isSparse(data, dt, mincat=mincatprop,
abscat=False)
assert resultabs == expected
assert resultprop == expected
def test_redundantColumns():
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment