Commit beecb5ad authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

TEST: tests for --remove_duplcates

parent a460a146
......@@ -216,7 +216,7 @@ fileinfo_tests = [
7, 8, 9
""",
False,
[( '0-0.0' .format(0), 0, 0, 0),
[( '0-0.0', 0, 0, 0),
( '{}-0.0'.format(AVID + 2), AVID + 2, 0, 0),
( '{}-0.0'.format(AVID + 3), AVID + 3, 0, 0)]),
("""col1\tcol2\tcol3\tcol4
......@@ -433,3 +433,14 @@ def test_renameDuplicateColumns():
assert [c.name for c in cols] == expnames
assert [c.origname for c in cols] == names
def test_renameDuplicateColumns_suffix():
names = ['A', 'B', 'C', 'A', 'B', 'D', 'A']
expnames = ['A', 'B', 'C', 'A.1.SUF', 'B.1.SUF', 'D', 'A.2.SUF']
cols = [datatable.Column(None, n, i) for i, n in enumerate(names)]
fileinfo.renameDuplicateColumns(cols, suffix='.SUF')
assert [c.name for c in cols] == expnames
assert [c.origname for c in cols] == names
......@@ -574,7 +574,7 @@ def test_importData_dropNaRows():
assert list(loaded.index) == [1, 3, 4, 5, 6, 8, 10]
def test_importData_duplicate_columns():
def test_importData_duplicate_columns_rename():
data = np.random.randint(1, 100, (10, 3))
data[:, 0] = np.arange(1, 10 + 1)
......@@ -600,9 +600,8 @@ def test_importData_duplicate_columns():
assert (dtable[:, :] == data[:, 1:]).all().all()
def test_importData_duplicate_columns_multiple_files():
"""
"""
def test_importData_duplicate_columns_multiple_files_rename():
data = np.random.randint(1, 100, (10, 5))
data[:, 0] = np.arange(1, 10 + 1)
colnames1 = ['eid'] + ['1-0.0', '2-0.0']
......@@ -633,3 +632,74 @@ def test_importData_duplicate_columns_multiple_files():
assert names == ['1-0.0', '2-0.0', '2-0.0.1', '3-0.0']
assert orignames == ['1-0.0', '2-0.0', '2-0.0', '3-0.0']
assert (dtable[:, names] == data[:, 1:]).all().all()
def test_importData_duplicate_columns_remove():
data = np.random.randint(1, 100, (10, 6))
data[:, 0] = np.arange(1, 10 + 1)
colnames = ['eid'] + ['1-0.0', '1-0.0', '2-0.0', '3-0.0', '3-0.0']
vartable, proctable, cattable = gen_tables([1])[:3]
custom.registerBuiltIns()
with tempdir():
with open('data.txt', 'wt') as f:
f.write('\t'.join(colnames) + '\n')
np.savetxt(f, data, fmt='%i', delimiter='\t')
finfo = fileinfo.FileInfo('data.txt',
renameDuplicates=True,
renameSuffix='.REMOVE')
dtable, _ = importing.importData(finfo,
vartable,
proctable,
cattable,
excludeColnames=['.REMOVE'])
cols = dtable.dataColumns
assert dtable.variables == [0, 1, 2, 3]
assert [c.name for c in cols] == ['1-0.0', '2-0.0', '3-0.0']
assert [c.origname for c in cols] == ['1-0.0', '2-0.0', '3-0.0']
assert (dtable[:, :] == data[:, [1, 3, 4]]).all().all()
def test_importData_duplicate_columns_multiple_files_remove():
data = np.random.randint(1, 100, (10, 8))
data[:, 0] = np.arange(1, 10 + 1)
colnames1 = ['eid'] + ['1-0.0', '2-0.0', '2-0.0']
colnames2 = ['eid'] + ['2-0.0', '3-0.0', '3-0.0', '4-0.0']
vartable, proctable, cattable = gen_tables([1, 2, 3])[:3]
custom.registerBuiltIns()
with tempdir():
with open('data1.txt', 'wt') as f:
f.write('\t'.join(colnames1) + '\n')
np.savetxt(f, data[:, [0, 1, 2, 3]], fmt='%i', delimiter='\t')
with open('data2.txt', 'wt') as f:
f.write('\t'.join(colnames2) + '\n')
np.savetxt(f, data[:, [0, 4, 5, 6, 7]], fmt='%i', delimiter='\t')
finfo = fileinfo.FileInfo(['data1.txt', 'data2.txt'],
renameDuplicates=True,
renameSuffix='.REMOVE')
dtable, _ = importing.importData(finfo,
vartable,
proctable,
cattable,
excludeColnames=['.REMOVE'])
cols = dtable.dataColumns
names = sorted([c.name for c in cols])
orignames = sorted([c.origname for c in cols])
assert dtable.variables == [0, 1, 2, 3, 4]
assert names == ['1-0.0', '2-0.0', '3-0.0', '4-0.0']
assert orignames == ['1-0.0', '2-0.0', '3-0.0', '4-0.0']
assert (dtable[:, names] == data[:, [1, 2, 5, 7]]).all().all()
......@@ -1560,7 +1560,7 @@ def test_ids_only():
@patch_logging
def test_dupe_columns():
def test_dupe_columns_rename():
data = np.random.randint(1, 100, (10, 3))
data[:, 0] = np.arange(1, 10 + 1)
colnames = ['eid', '1-0.0', '1-0.0']
......@@ -1577,7 +1577,7 @@ def test_dupe_columns():
@patch_logging
def test_dupe_columns_multiple_files():
def test_dupe_columns_multiple_files_rename():
data1 = np.random.randint(1, 100, (10, 3))
data1[:, 0] = np.arange(1, 10 + 1)
colnames1 = ['eid', '1-0.0', '2-0.0']
......@@ -1599,3 +1599,45 @@ def test_dupe_columns_multiple_files():
['eid', '1-0.0', '2-0.0', '2-0.0.1', '3-0.0']
exp = np.hstack((data1, data2[:, 1:]))
assert np.all(df.to_numpy() == exp)
@patch_logging
def test_dupe_columns_remove():
data = np.random.randint(1, 100, (10, 6))
data[:, 0] = np.arange(1, 10 + 1)
colnames = ['eid', '1-0.0', '1-0.0', '2-0.0', '3-0.0', '3-0.0']
with tempdir():
with open('data.txt', 'wt') as f:
f.write('\t'.join(colnames) + '\n')
np.savetxt(f, data, fmt='%i', delimiter='\t')
main.main('-rm out.csv data.txt'.split())
df = pd.read_csv('out.csv')
assert list(df.columns) == ['eid', '1-0.0', '2-0.0', '3-0.0']
assert np.all(df.to_numpy() == data[:, [0, 1, 3, 4]])
@patch_logging
def test_dupe_columns_multiple_files_remove():
data1 = np.random.randint(1, 100, (10, 5))
data1[:, 0] = np.arange(1, 10 + 1)
colnames1 = ['eid', '1-0.0', '1-0.0', '2-0.0', '3-0.0']
data2 = np.random.randint(1, 100, (10, 6))
data2[:, 0] = np.arange(1, 10 + 1)
colnames2 = ['eid', '2-0.0', '3-0.0', '4-0.0', '4-0.0', '5-0.0']
with tempdir():
with open('data1.txt', 'wt') as f:
f.write('\t'.join(colnames1) + '\n')
np.savetxt(f, data1, fmt='%i', delimiter='\t')
with open('data2.txt', 'wt') as f:
f.write('\t'.join(colnames2) + '\n')
np.savetxt(f, data2, fmt='%i', delimiter='\t')
main.main('-rm out.csv data1.txt data2.txt'.split())
df = pd.read_csv('out.csv')
assert list(df.columns) == \
['eid', '1-0.0', '2-0.0', '3-0.0', '4-0.0', '5-0.0']
exp = np.hstack((data1[:, [0, 1, 3, 4]], data2[:, [3, 5]]))
assert np.all(df.to_numpy() == exp)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment