Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
FSL
funpack
Commits
44ac91ab
Commit
44ac91ab
authored
Dec 20, 2018
by
Paul McCarthy
🚵
Browse files
RF,DOC: Clarify index column position in loadData/loadFile. Small clean up in
fileinfo.
parent
4282448c
Changes
2
Hide whitespace changes
Inline
Side-by-side
ukbparse/fileinfo.py
View file @
44ac91ab
...
...
@@ -78,7 +78,7 @@ def has_header(sample,
# match, we take the length of
# the value, in the hope that the
# column will have a different
# length to the
# length to the
header row value
else
:
return
len
(
val
)
...
...
@@ -102,8 +102,9 @@ def has_header(sample,
for
i
,
col
in
enumerate
(
row
):
ct
=
inferType
(
col
)
# missing values are
# not considered
# missing values are treated
# like any other value - they
# are given a type of "None"
coltypes
[
i
].
append
(
ct
)
# we build a score based on the
...
...
@@ -180,7 +181,7 @@ def sniff(datafile):
sample
=
'
\n
'
.
join
(
lines
)
try
:
dialect
=
sniffer
.
sniff
(
sample
,
' .,
\t
:;|/\~!@#$~%^&*'
)
dialect
=
sniffer
.
sniff
(
sample
,
' .,
\t
:;|/
\
\
~!@#$~%^&*'
)
except
csv
.
Error
:
dialect
=
None
...
...
@@ -223,22 +224,17 @@ def sniff(datafile):
raise
ValueError
(
'Could not determine file format: '
'{}'
.
format
(
datafile
))
# if whitespace-delimited, we re-generate
# the sample into a format that will be
# recognised by the sniffer, purely so we
# can use its has_header method. We take
# a copy of the first row, so we can
# extract column names if possible.
if
dialect
==
'whitespace'
:
firstRow
=
lines
[
0
].
split
()
hasHeader
=
has_header
(
'
\n
'
.
join
(
lines
),
dialect
)
# Use the has_header function to
# figure out if we have column names
hasHeader
=
has_header
(
sample
,
dialect
)
# Otherwise we pass the unmodified sample,
# and read in the first row.
# And take a copy of the first row,
# in case we do have column names.
if
dialect
==
'whitespace'
:
firstRow
=
lines
[
0
].
split
()
else
:
hasHeader
=
has_header
(
sample
,
dialect
)
reader
=
csv
.
reader
(
io
.
StringIO
(
sample
),
dialect
)
firstRow
=
next
(
reader
)
reader
=
csv
.
reader
(
io
.
StringIO
(
sample
),
dialect
)
firstRow
=
next
(
reader
)
log
.
debug
(
'Detected dialect for input file %s: (header: %s, '
'delimiter: %s)'
,
...
...
ukbparse/importing.py
View file @
44ac91ab
...
...
@@ -369,8 +369,11 @@ def columnsToLoad(datafiles,
- A dict of ``{ file : [Column] }`` mappings, the
:class:`.Column` objects to *load* from each input
file. The columns are not necessarily ordered in the
same way that they are in the input files.
file.
Note that the columns are not necessarily ordered
in the same way that they are in the input files -
the header column will always be first in each list.
- A list containing the :class:`.Column` objects to
*ignore*.
...
...
@@ -545,8 +548,7 @@ def loadData(datafiles,
:class:`.HDFStoreCollection`, containing the data,
or ``None`` if ``dryrun is True``.
- A list of :class:`.Column` objects representing the
columns that were loaded. The index column is placed
at the beginning of this list.
columns that were loaded.
"""
if
mergeStrategy
is
None
:
mergeStrategy
=
MERGE_STRATEGY
...
...
@@ -651,8 +653,8 @@ def loadFile(fname,
in the file.
:arg toload: Sequence of :class:`.Column` objects describing the columns
that should be loaded.
This lis
t is
not
assumed t
o be
ordered
.
that should be loaded.
I
t is assumed t
hat the first column
in this list is the index column
.
:arg index: Column position of index column (starting from 0). Defaults
to 0.
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment