Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
FSL
funpack
Commits
beecb5ad
Commit
beecb5ad
authored
Dec 28, 2021
by
Paul McCarthy
🚵
Browse files
TEST: tests for --remove_duplcates
parent
a460a146
Changes
3
Hide whitespace changes
Inline
Side-by-side
funpack/tests/test_fileinfo.py
View file @
beecb5ad
...
...
@@ -216,7 +216,7 @@ fileinfo_tests = [
7, 8, 9
"""
,
False
,
[(
'0-0.0'
.
format
(
0
),
0
,
0
,
0
),
[(
'0-0.0'
,
0
,
0
,
0
),
(
'{}-0.0'
.
format
(
AVID
+
2
),
AVID
+
2
,
0
,
0
),
(
'{}-0.0'
.
format
(
AVID
+
3
),
AVID
+
3
,
0
,
0
)]),
(
"""col1
\t
col2
\t
col3
\t
col4
...
...
@@ -433,3 +433,14 @@ def test_renameDuplicateColumns():
assert
[
c
.
name
for
c
in
cols
]
==
expnames
assert
[
c
.
origname
for
c
in
cols
]
==
names
def
test_renameDuplicateColumns_suffix
():
names
=
[
'A'
,
'B'
,
'C'
,
'A'
,
'B'
,
'D'
,
'A'
]
expnames
=
[
'A'
,
'B'
,
'C'
,
'A.1.SUF'
,
'B.1.SUF'
,
'D'
,
'A.2.SUF'
]
cols
=
[
datatable
.
Column
(
None
,
n
,
i
)
for
i
,
n
in
enumerate
(
names
)]
fileinfo
.
renameDuplicateColumns
(
cols
,
suffix
=
'.SUF'
)
assert
[
c
.
name
for
c
in
cols
]
==
expnames
assert
[
c
.
origname
for
c
in
cols
]
==
names
funpack/tests/test_importing.py
View file @
beecb5ad
...
...
@@ -574,7 +574,7 @@ def test_importData_dropNaRows():
assert
list
(
loaded
.
index
)
==
[
1
,
3
,
4
,
5
,
6
,
8
,
10
]
def
test_importData_duplicate_columns
():
def
test_importData_duplicate_columns
_rename
():
data
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
3
))
data
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
...
...
@@ -600,9 +600,8 @@ def test_importData_duplicate_columns():
assert
(
dtable
[:,
:]
==
data
[:,
1
:]).
all
().
all
()
def
test_importData_duplicate_columns_multiple_files
():
"""
"""
def
test_importData_duplicate_columns_multiple_files_rename
():
data
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
5
))
data
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames1
=
[
'eid'
]
+
[
'1-0.0'
,
'2-0.0'
]
...
...
@@ -633,3 +632,74 @@ def test_importData_duplicate_columns_multiple_files():
assert
names
==
[
'1-0.0'
,
'2-0.0'
,
'2-0.0.1'
,
'3-0.0'
]
assert
orignames
==
[
'1-0.0'
,
'2-0.0'
,
'2-0.0'
,
'3-0.0'
]
assert
(
dtable
[:,
names
]
==
data
[:,
1
:]).
all
().
all
()
def
test_importData_duplicate_columns_remove
():
data
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
6
))
data
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames
=
[
'eid'
]
+
[
'1-0.0'
,
'1-0.0'
,
'2-0.0'
,
'3-0.0'
,
'3-0.0'
]
vartable
,
proctable
,
cattable
=
gen_tables
([
1
])[:
3
]
custom
.
registerBuiltIns
()
with
tempdir
():
with
open
(
'data.txt'
,
'wt'
)
as
f
:
f
.
write
(
'
\t
'
.
join
(
colnames
)
+
'
\n
'
)
np
.
savetxt
(
f
,
data
,
fmt
=
'%i'
,
delimiter
=
'
\t
'
)
finfo
=
fileinfo
.
FileInfo
(
'data.txt'
,
renameDuplicates
=
True
,
renameSuffix
=
'.REMOVE'
)
dtable
,
_
=
importing
.
importData
(
finfo
,
vartable
,
proctable
,
cattable
,
excludeColnames
=
[
'.REMOVE'
])
cols
=
dtable
.
dataColumns
assert
dtable
.
variables
==
[
0
,
1
,
2
,
3
]
assert
[
c
.
name
for
c
in
cols
]
==
[
'1-0.0'
,
'2-0.0'
,
'3-0.0'
]
assert
[
c
.
origname
for
c
in
cols
]
==
[
'1-0.0'
,
'2-0.0'
,
'3-0.0'
]
assert
(
dtable
[:,
:]
==
data
[:,
[
1
,
3
,
4
]]).
all
().
all
()
def
test_importData_duplicate_columns_multiple_files_remove
():
data
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
8
))
data
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames1
=
[
'eid'
]
+
[
'1-0.0'
,
'2-0.0'
,
'2-0.0'
]
colnames2
=
[
'eid'
]
+
[
'2-0.0'
,
'3-0.0'
,
'3-0.0'
,
'4-0.0'
]
vartable
,
proctable
,
cattable
=
gen_tables
([
1
,
2
,
3
])[:
3
]
custom
.
registerBuiltIns
()
with
tempdir
():
with
open
(
'data1.txt'
,
'wt'
)
as
f
:
f
.
write
(
'
\t
'
.
join
(
colnames1
)
+
'
\n
'
)
np
.
savetxt
(
f
,
data
[:,
[
0
,
1
,
2
,
3
]],
fmt
=
'%i'
,
delimiter
=
'
\t
'
)
with
open
(
'data2.txt'
,
'wt'
)
as
f
:
f
.
write
(
'
\t
'
.
join
(
colnames2
)
+
'
\n
'
)
np
.
savetxt
(
f
,
data
[:,
[
0
,
4
,
5
,
6
,
7
]],
fmt
=
'%i'
,
delimiter
=
'
\t
'
)
finfo
=
fileinfo
.
FileInfo
([
'data1.txt'
,
'data2.txt'
],
renameDuplicates
=
True
,
renameSuffix
=
'.REMOVE'
)
dtable
,
_
=
importing
.
importData
(
finfo
,
vartable
,
proctable
,
cattable
,
excludeColnames
=
[
'.REMOVE'
])
cols
=
dtable
.
dataColumns
names
=
sorted
([
c
.
name
for
c
in
cols
])
orignames
=
sorted
([
c
.
origname
for
c
in
cols
])
assert
dtable
.
variables
==
[
0
,
1
,
2
,
3
,
4
]
assert
names
==
[
'1-0.0'
,
'2-0.0'
,
'3-0.0'
,
'4-0.0'
]
assert
orignames
==
[
'1-0.0'
,
'2-0.0'
,
'3-0.0'
,
'4-0.0'
]
assert
(
dtable
[:,
names
]
==
data
[:,
[
1
,
2
,
5
,
7
]]).
all
().
all
()
funpack/tests/test_main.py
View file @
beecb5ad
...
...
@@ -1560,7 +1560,7 @@ def test_ids_only():
@
patch_logging
def
test_dupe_columns
():
def
test_dupe_columns
_rename
():
data
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
3
))
data
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames
=
[
'eid'
,
'1-0.0'
,
'1-0.0'
]
...
...
@@ -1577,7 +1577,7 @@ def test_dupe_columns():
@
patch_logging
def
test_dupe_columns_multiple_files
():
def
test_dupe_columns_multiple_files
_rename
():
data1
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
3
))
data1
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames1
=
[
'eid'
,
'1-0.0'
,
'2-0.0'
]
...
...
@@ -1599,3 +1599,45 @@ def test_dupe_columns_multiple_files():
[
'eid'
,
'1-0.0'
,
'2-0.0'
,
'2-0.0.1'
,
'3-0.0'
]
exp
=
np
.
hstack
((
data1
,
data2
[:,
1
:]))
assert
np
.
all
(
df
.
to_numpy
()
==
exp
)
@
patch_logging
def
test_dupe_columns_remove
():
data
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
6
))
data
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames
=
[
'eid'
,
'1-0.0'
,
'1-0.0'
,
'2-0.0'
,
'3-0.0'
,
'3-0.0'
]
with
tempdir
():
with
open
(
'data.txt'
,
'wt'
)
as
f
:
f
.
write
(
'
\t
'
.
join
(
colnames
)
+
'
\n
'
)
np
.
savetxt
(
f
,
data
,
fmt
=
'%i'
,
delimiter
=
'
\t
'
)
main
.
main
(
'-rm out.csv data.txt'
.
split
())
df
=
pd
.
read_csv
(
'out.csv'
)
assert
list
(
df
.
columns
)
==
[
'eid'
,
'1-0.0'
,
'2-0.0'
,
'3-0.0'
]
assert
np
.
all
(
df
.
to_numpy
()
==
data
[:,
[
0
,
1
,
3
,
4
]])
@
patch_logging
def
test_dupe_columns_multiple_files_remove
():
data1
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
5
))
data1
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames1
=
[
'eid'
,
'1-0.0'
,
'1-0.0'
,
'2-0.0'
,
'3-0.0'
]
data2
=
np
.
random
.
randint
(
1
,
100
,
(
10
,
6
))
data2
[:,
0
]
=
np
.
arange
(
1
,
10
+
1
)
colnames2
=
[
'eid'
,
'2-0.0'
,
'3-0.0'
,
'4-0.0'
,
'4-0.0'
,
'5-0.0'
]
with
tempdir
():
with
open
(
'data1.txt'
,
'wt'
)
as
f
:
f
.
write
(
'
\t
'
.
join
(
colnames1
)
+
'
\n
'
)
np
.
savetxt
(
f
,
data1
,
fmt
=
'%i'
,
delimiter
=
'
\t
'
)
with
open
(
'data2.txt'
,
'wt'
)
as
f
:
f
.
write
(
'
\t
'
.
join
(
colnames2
)
+
'
\n
'
)
np
.
savetxt
(
f
,
data2
,
fmt
=
'%i'
,
delimiter
=
'
\t
'
)
main
.
main
(
'-rm out.csv data1.txt data2.txt'
.
split
())
df
=
pd
.
read_csv
(
'out.csv'
)
assert
list
(
df
.
columns
)
==
\
[
'eid'
,
'1-0.0'
,
'2-0.0'
,
'3-0.0'
,
'4-0.0'
,
'5-0.0'
]
exp
=
np
.
hstack
((
data1
[:,
[
0
,
1
,
3
,
4
]],
data2
[:,
[
3
,
5
]]))
assert
np
.
all
(
df
.
to_numpy
()
==
exp
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment