Skip to content

Commit

Permalink
Buffer input files. wireservice/csvkit#707.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Dec 25, 2016
1 parent 705d558 commit 4d4c342
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 8 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
1.5.3
-----


* :meth:`.Table.from_csv` now buffers input files to prevent issues with using STDIN as an input.

1.5.2 - December 24, 2016
-------------------------
Expand Down
10 changes: 4 additions & 6 deletions agate/table/from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,17 @@ def from_csv(cls, path, column_names=None, column_types=None, row_names=None, sk
else:
raise ValueError('skip_lines argument must be an int')

start = f.tell()
contents = six.StringIO(f.read())

if sniff_limit is None:
kwargs['dialect'] = csv.Sniffer().sniff(f.read())
kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue())
elif sniff_limit > 0:
kwargs['dialect'] = csv.Sniffer().sniff(f.read(sniff_limit))
kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()[:sniff_limit])

if six.PY2:
kwargs['encoding'] = encoding

f.seek(start)

reader = csv.reader(f, header=header, **kwargs)
reader = csv.reader(contents, header=header, **kwargs)

if header:
if column_names is None:
Expand Down
18 changes: 17 additions & 1 deletion tests/test_table/test_from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,23 @@ def test_from_csv_no_header_columns(self):
self.assertColumnNames(table, self.column_names)
self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])

def test_from_csv_sniff_limit(self):
def test_from_csv_sniff_limit_0(self):
table1 = Table(self.rows, self.column_names, self.column_types)
table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=0)

self.assertColumnNames(table2, ['number|text|boolean|date|datetime|timedelta'])
self.assertColumnTypes(table2, [Text])

def test_from_csv_sniff_limit_200(self):
table1 = Table(self.rows, self.column_names, self.column_types)
table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=200)

self.assertColumnNames(table2, table1.column_names)
self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

self.assertRows(table2, table1.rows)

def test_from_csv_sniff_limit_none(self):
table1 = Table(self.rows, self.column_names, self.column_types)
table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=None)

Expand Down

0 comments on commit 4d4c342

Please sign in to comment.