Commit 44ac91ab authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

RF,DOC: Clarify index column position in loadData/loadFile. Small clean up in

fileinfo.
parent 4282448c
......@@ -78,7 +78,7 @@ def has_header(sample,
# match, we take the length of
# the value, in the hope that the
# column will have a different
# length to the
# length to the header row value
else:
return len(val)
......@@ -102,8 +102,9 @@ def has_header(sample,
for i, col in enumerate(row):
ct = inferType(col)
# missing values are
# not considered
# missing values are treated
# like any other value - they
# are given a type of "None"
coltypes[i].append(ct)
# we build a score based on the
......@@ -180,7 +181,7 @@ def sniff(datafile):
sample = '\n'.join(lines)
try:
dialect = sniffer.sniff(sample, ' .,\t:;|/\~!@#$~%^&*')
dialect = sniffer.sniff(sample, ' .,\t:;|/\\~!@#$~%^&*')
except csv.Error:
dialect = None
......@@ -223,22 +224,17 @@ def sniff(datafile):
raise ValueError('Could not determine file format: '
'{}'.format(datafile))
# if whitespace-delimited, we re-generate
# the sample into a format that will be
# recognised by the sniffer, purely so we
# can use its has_header method. We take
# a copy of the first row, so we can
# extract column names if possible.
if dialect == 'whitespace':
firstRow = lines[0].split()
hasHeader = has_header('\n'.join(lines), dialect)
# Use the has_header function to
# figure out if we have column names
hasHeader = has_header(sample, dialect)
# Otherwise we pass the unmodified sample,
# and read in the first row.
# And take a copy of the first row,
# in case we do have column names.
if dialect == 'whitespace':
firstRow = lines[0].split()
else:
hasHeader = has_header(sample, dialect)
reader = csv.reader(io.StringIO(sample), dialect)
firstRow = next(reader)
reader = csv.reader(io.StringIO(sample), dialect)
firstRow = next(reader)
log.debug('Detected dialect for input file %s: (header: %s, '
'delimiter: %s)',
......
......@@ -369,8 +369,11 @@ def columnsToLoad(datafiles,
- A dict of ``{ file : [Column] }`` mappings, the
:class:`.Column` objects to *load* from each input
file. The columns are not necessarily ordered in the
same way that they are in the input files.
file.
Note that the columns are not necessarily ordered
in the same way that they are in the input files -
the header column will always be first in each list.
- A list containing the :class:`.Column` objects to
*ignore*.
......@@ -545,8 +548,7 @@ def loadData(datafiles,
:class:`.HDFStoreCollection`, containing the data,
or ``None`` if ``dryrun is True``.
- A list of :class:`.Column` objects representing the
columns that were loaded. The index column is placed
at the beginning of this list.
columns that were loaded.
"""
if mergeStrategy is None: mergeStrategy = MERGE_STRATEGY
......@@ -651,8 +653,8 @@ def loadFile(fname,
in the file.
:arg toload: Sequence of :class:`.Column` objects describing the columns
that should be loaded. This list is not assumed to be
ordered.
that should be loaded. It is assumed that the first column
in this list is the index column.
:arg index: Column position of index column (starting from 0). Defaults
to 0.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment