Skip to content

Commit

Permalink
Fix unicode bug under Py2.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Dec 24, 2016
1 parent 65181c0 commit 3c75157
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 9 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
1.5.2
-----


* Improved handling of non-ascii encoded CSV files under Python 2.

1.5.1 - December 23, 2016
-------------------------
Expand Down
18 changes: 12 additions & 6 deletions agate/table/from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ def from_csv(cls, path, column_names=None, column_types=None, row_names=None, sk
:param path:
Filepath or file-like object from which to read CSV data. If a file-like
object is specified, it must be seekable.
object is specified, it must be seekable. If using Python 2, the file
should be opened in binary mode (`rb`).
:param column_names:
See :meth:`.Table.__init__`.
:param column_types:
See :meth:`.Table.__init__`.
:param row_names:
See :meth:`.Table.__init__`.
:param skip_lines:
The number of lines to skip from the top of the file.
The number of lines to skip from the top of the file. Note that skip
lines will not work with
:param header:
If :code:`True`, the first row of the CSV is assumed to contain column
names. If :code:`header` and :code:`column_names` are both specified
Expand All @@ -46,7 +48,11 @@ def from_csv(cls, path, column_names=None, column_types=None, row_names=None, sk
if hasattr(path, 'read'):
f = path
else:
f = io.open(path, encoding=encoding)
if six.PY2:
f = open(path, 'Urb')
else:
f = io.open(path, encoding=encoding)

close = True

if isinstance(skip_lines, int):
Expand All @@ -63,10 +69,10 @@ def from_csv(cls, path, column_names=None, column_types=None, row_names=None, sk
elif sniff_limit > 0:
kwargs['dialect'] = csv.Sniffer().sniff(f.read(sniff_limit))

f.seek(start)

if six.PY2:
f = six.StringIO(f.read().encode('utf-8'))
kwargs['encoding'] = encoding

f.seek(start)

reader = csv.reader(f, header=header, **kwargs)

Expand Down
11 changes: 9 additions & 2 deletions tests/test_table/test_from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import io

import six

from agate import Table
from agate.testcase import AgateTestCase
from agate.data_types import *
Expand Down Expand Up @@ -55,8 +57,13 @@ def test_from_csv_cr(self):
def test_from_csv_file_like_object(self):
table1 = Table(self.rows, self.column_names, self.column_types)

with io.open('examples/test.csv', encoding='utf-8') as f:
table2 = Table.from_csv(f)
if six.PY2:
f = open('examples/test.csv', 'rb')
else:
f = io.open('examples/test.csv', encoding='utf-8')

table2 = Table.from_csv(f)
f.close()

self.assertColumnNames(table2, table1.column_names)
self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])
Expand Down

0 comments on commit 3c75157

Please sign in to comment.