Commit 03033acb authored by Paul McCarthy's avatar Paul McCarthy 🚵
Browse files

BF: Was testing column type frequency the wrong way around

parent 99f38091
......@@ -113,9 +113,9 @@ def has_header(sample,
# if more than two thirds of rows
# have a different type to the first
# row, let;s sat we have a header.
threshold = collections.defaultdict(lambda : 0.66)
threshold = collections.defaultdict(lambda : 0.34)
threshold[1] = 1.0
threshold[2] = 0.49
threshold[2] = 0.51
for col, ctypes in coltypes.items():
......@@ -123,8 +123,8 @@ def has_header(sample,
hist = collections.Counter(ctypes)
thres = threshold[len(ctypes)]
if hist[t0] / len(ctypes) > thres: colcount += 1
else: colcount -= 1
if (hist[t0] / len(ctypes)) < thres: colcount += 1
else: colcount -= 1
return colcount > 0
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment