Skip to content

Commit

Permalink
Unicode, man. Closes #138.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Sep 7, 2015
1 parent 8d547a0 commit 663759f
Show file tree
Hide file tree
Showing 12 changed files with 59 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
0.8.0
-----

* Add tests for unicode support. (#138)
* Fix computations.ZScores calculation. (#123)
* Differentiate sample and population variance and stdev. (#208)
* Support for overriding column inference with "force".
Expand Down
4 changes: 2 additions & 2 deletions agate/column_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from collections import OrderedDict
except ImportError: # pragma: no cover
from ordereddict import OrderedDict

from agate.column_types.base import *
from agate.column_types.boolean import *
from agate.column_types.date_time import *
Expand Down Expand Up @@ -91,4 +91,4 @@ def run(self, rows, column_names):
column_types.append(t)
break

return zip(column_names, column_types)
return tuple(zip(column_names, column_types))
3 changes: 3 additions & 0 deletions agate/columns/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def __unicode__(self):
return '<agate.columns.%s: %s>' % (self.__class__.__name__, sample)

def __str__(self):
if six.PY2:
return str(self.__unicode__().encode('utf8'))

return str(self.__unicode__())

def __getitem__(self, j):
Expand Down
3 changes: 3 additions & 0 deletions agate/rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def __unicode__(self):
return '<agate.rows.Row: %s>' % sample

def __str__(self):
if six.PY2:
return str(self.__unicode__().encode('utf8'))

return str(self.__unicode__())

def __repr__(self):
Expand Down
2 changes: 1 addition & 1 deletion agate/tableset.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def from_csv(cls, dir_path, column_info, header=True, **kwargs):
table = Table.from_csv(path, column_info, header=header, **kwargs)

if use_inference and not has_inferred_columns:
column_info = zip(table.get_column_names(), table.get_column_types())
column_info = tuple(zip(table.get_column_names(), table.get_column_types()))
has_inferred_columns = True

tables[name] = table
Expand Down
2 changes: 1 addition & 1 deletion examples/test.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
one,two,three
1,4,a
2,3,b
,2,c
,2,👍
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ python-dateutil==2.2
sphinx_rtd_theme>=0.1.6
wheel>=0.24.0
pytimeparse>=1.1.5
csvkit>=0.9.1
16 changes: 12 additions & 4 deletions tests/test_column_types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-

try:
import unittest2 as unittest
Expand Down Expand Up @@ -85,22 +86,29 @@ def test_force_type(self):
self.assertIsInstance(inferred[0][1], TextType)

def test_table_from_csv(self):
table = Table.from_csv('examples/test.csv', self.tester)
import csvkit
from agate import table
table.csv = csvkit

if six.PY2:
table = Table.from_csv('examples/test.csv', self.tester, encoding='utf8')
else:
table = Table.from_csv('examples/test.csv', self.tester)

self.assertSequenceEqual(table.get_column_names(), ['one', 'two', 'three'])
self.assertSequenceEqual(map(type, table.get_column_types()), [NumberType, NumberType, TextType])
self.assertSequenceEqual(tuple(map(type, table.get_column_types())), [NumberType, NumberType, TextType])

self.assertEqual(len(table.columns), 3)

self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
self.assertSequenceEqual(table.rows[2], [None, 2, 'c'])
self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])

def test_tableset_from_csv(self):
tableset = TableSet.from_csv('examples/tableset', self.tester)

self.assertSequenceEqual(tableset.get_column_names(), ['letter', 'number'])
self.assertSequenceEqual(map(type, tableset.get_column_types()), [TextType, NumberType])
self.assertSequenceEqual(tuple(map(type, tableset.get_column_types())), [TextType, NumberType])

self.assertEqual(len(tableset['table1'].columns), 2)

Expand Down
5 changes: 3 additions & 2 deletions tests/test_columns.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-

import datetime

Expand All @@ -22,9 +23,9 @@ def test_text(self):
self.assertIsInstance(TextType().create_column(None, 1), TextColumn)

def test_text_cast(self):
values = ('a', 1, None, Decimal('2.7'), 'n/a')
values = ('a', 1, None, Decimal('2.7'), 'n/a', u'👍')
casted = tuple(TextType().cast(v) for v in values)
self.assertSequenceEqual(casted, ('a', '1', None, '2.7', None))
self.assertSequenceEqual(casted, ('a', '1', None, '2.7', None, u'👍'))

def test_boolean(self):
self.assertIsInstance(BooleanType().create_column(None, 1), BooleanColumn)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def setUp(self):
self.table = Table(self.rows, self.columns)

def test_stringify(self):
self.assertEqual(str(self.table.rows[0]), "<agate.rows.Row: (1, 2, a)>")
self.assertEqual(str(self.table.rows[0]), '<agate.rows.Row: (1, 2, a)>')

def test_stringify_long(self):
rows = (
Expand Down
40 changes: 30 additions & 10 deletions tests/test_table.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-

try:
from cdecimal import Decimal
Expand All @@ -12,6 +13,8 @@
except ImportError:
import unittest

import six

from agate import Table, TableSet
from agate.column_types import TextType, NumberType
from agate.computations import Formula
Expand All @@ -22,7 +25,7 @@ def setUp(self):
self.rows = (
(1, 4, 'a'),
(2, 3, 'b'),
(None, 2, 'c')
(None, 2, u'👍')
)

self.number_type = NumberType()
Expand All @@ -41,7 +44,7 @@ def test_create_table(self):

self.assertSequenceEqual(table.rows[0], (1, 4, 'a'))
self.assertSequenceEqual(table.rows[1], (2, 3, 'b'))
self.assertSequenceEqual(table.rows[2], (None, 2, 'c'))
self.assertSequenceEqual(table.rows[2], (None, 2, u'👍'))

def test_create_duplicate_column_names(self):
columns = (
Expand All @@ -53,7 +56,24 @@ def test_create_duplicate_column_names(self):
with self.assertRaises(ValueError):
Table(self.rows, columns)

def test_from_csv(self):
def test_from_csv_builtin(self):
import csv
from agate import table
table.csv = csv

if six.PY2:
with self.assertRaises(UnicodeDecodeError):
table = Table.from_csv('examples/test.csv', self.columns)
else:
table = Table.from_csv('examples/test.csv', self.columns)

self.assertEqual(len(table.columns), 3)

def test_from_csv_csvkit(self):
import csvkit
from agate import table
table.csv = csvkit

table1 = Table(self.rows, self.columns)
table2 = Table.from_csv('examples/test.csv', self.columns)

Expand Down Expand Up @@ -102,12 +122,12 @@ def test_select(self):
self.assertEqual(len(new_table.rows), 3)
self.assertSequenceEqual(new_table.rows[0], ('a',))
self.assertSequenceEqual(new_table.rows[1], ('b',))
self.assertSequenceEqual(new_table.rows[2], ('c',))
self.assertSequenceEqual(new_table.rows[2], (u'👍',))

self.assertEqual(len(new_table.columns), 1)
self.assertSequenceEqual(new_table._column_types, (self.text_type,))
self.assertSequenceEqual(new_table._column_names, ('three',))
self.assertSequenceEqual(new_table.columns['three'], ('a', 'b', 'c'))
self.assertSequenceEqual(new_table.columns['three'], ('a', 'b', u'👍'))

def test_where(self):
table = Table(self.rows, self.columns)
Expand Down Expand Up @@ -219,14 +239,14 @@ def test_order_by(self):

self.assertIsNot(new_table, table)
self.assertEqual(len(new_table.rows), 3)
self.assertSequenceEqual(new_table.rows[0], (None, 2, 'c'))
self.assertSequenceEqual(new_table.rows[0], (None, 2, u'👍'))
self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b'))
self.assertSequenceEqual(new_table.rows[2], (1, 4, 'a'))

# Verify old table not changed
self.assertSequenceEqual(table.rows[0], (1, 4, 'a'))
self.assertSequenceEqual(table.rows[1], (2, 3, 'b'))
self.assertSequenceEqual(table.rows[2], (None, 2, 'c'))
self.assertSequenceEqual(table.rows[2], (None, 2, u'👍'))

def test_order_by_func(self):
rows = (
Expand All @@ -253,7 +273,7 @@ def test_order_by_reverse(self):
self.assertEqual(len(new_table.rows), 3)
self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a'))
self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b'))
self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c'))
self.assertSequenceEqual(new_table.rows[2], (None, 2, u'👍'))

def test_order_by_nulls(self):
rows = (
Expand Down Expand Up @@ -291,7 +311,7 @@ def test_limit_slice(self):
self.assertIsNot(new_table, table)
self.assertEqual(len(new_table.rows), 2)
self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a'))
self.assertSequenceEqual(new_table.rows[1], (None, 2, 'c'))
self.assertSequenceEqual(new_table.rows[1], (None, 2, u'👍'))
self.assertSequenceEqual(new_table.columns['one'], (1, None))

def test_limit_slice_negative(self):
Expand All @@ -313,7 +333,7 @@ def test_limit_step_only(self):
self.assertIsNot(new_table, table)
self.assertEqual(len(new_table.rows), 2)
self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a'))
self.assertSequenceEqual(new_table.rows[1], (None, 2, 'c'))
self.assertSequenceEqual(new_table.rows[1], (None, 2, u'👍'))
self.assertSequenceEqual(new_table.columns['one'], (1, None))

def test_distinct_column(self):
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ deps=
nose>=1.1.2
six>=1.6.1
python-dateutil>=2.2
csvkit>=0.9.1
commands=nosetests

[testenv:py27]
Expand Down

0 comments on commit 663759f

Please sign in to comment.