Skip to content

Commit

Permalink
Merge fe2b092 into 0c42d1c
Browse files Browse the repository at this point in the history
  • Loading branch information
James McKinney committed Jan 30, 2016
2 parents 0c42d1c + fe2b092 commit 3e5f5a2
Show file tree
Hide file tree
Showing 5 changed files with 2 additions and 321 deletions.
19 changes: 0 additions & 19 deletions csvkit/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ def length(self):
return len(self.row)


class CSVJSONException(CustomException):
"""
Exception raised when there is a problem converting data to CSV.
"""
pass


class InvalidValueForTypeException(CustomException):
"""
Exception raised when a value can not be normalized to a specified type.
Expand All @@ -69,18 +62,6 @@ def __init__(self, index, value, normal_type):
super(InvalidValueForTypeException, self).__init__(msg)


class InvalidValueForTypeListException(CustomException):
"""
Exception raised when one or more InvalidValueForTypeException
has been raised while accumulating errors.
"""

def __init__(self, errors):
self.errors = errors
msg = 'Encountered errors converting values in %i columns' % len(errors)
super(InvalidValueForTypeListException, self).__init__(msg)


class RequiredHeaderError(CustomException):
"""
Exception raised when an operation requires a CSV file to have a header row.
Expand Down
88 changes: 0 additions & 88 deletions csvkit/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,68 +104,6 @@ def __init__(self, columns=[], name='new_table'):
list.__init__(self, columns)
self.name = name

def __str__(self):
return str(self.__unicode__())

def __unicode__(self):
"""
Stringify a description of all columns in this table.
"""
return '\n'.join([six.text_type(c) for c in self])

def _reindex_columns(self):
"""
Update order properties of all columns in table.
"""
for i, c in enumerate(self):
c.order = i

def _deduplicate_column_name(self, column):
while column.name in self.headers():
try:
i = column.name.rindex('_')
counter = int(column.name[i + 1:])
column.name = '%s_%i' % (column.name[:i], counter + 1)
except:
column.name += '_2'

return column.name

def append(self, column):
"""Implements list append."""
self._deduplicate_column_name(column)

list.append(self, column)
column.index = len(self) - 1

def insert(self, i, column):
"""Implements list insert."""
self._deduplicate_column_name(column)

list.insert(self, i, column)
self._reindex_columns()

def extend(self, columns):
"""Implements list extend."""
for c in columns:
self._deduplicate_column_name(c)

list.extend(self, columns)
self._reindex_columns()

def remove(self, column):
"""Implements list remove."""
list.remove(self, column)
self._reindex_columns()

def sort(self):
"""Forbids list sort."""
raise NotImplementedError()

def reverse(self):
"""Forbids list reverse."""
raise NotImplementedError()

def headers(self):
return [c.name for c in self]

Expand All @@ -177,20 +115,6 @@ def count_rows(self):

return 0

def row(self, i):
"""
Fetch a row of data from this table.
"""
if i < 0:
raise IndexError('Negative row numbers are not valid.')

if i >= self.count_rows():
raise IndexError('Row number exceeds the number of rows in the table.')

row_data = [c[i] for c in self]

return row_data

@classmethod
def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
"""
Expand Down Expand Up @@ -281,15 +205,3 @@ def to_rows(self, serialize_dates=False):
return list(zip(*out_columns))
else:
return list(zip(*self))

def to_csv(self, output, **kwargs):
"""
Serializes the table to CSV and writes it to any file-like object.
"""
rows = self.to_rows(serialize_dates=True)

# Insert header row
rows.insert(0, self.headers())

csv_writer = agate.writer(output, **kwargs)
csv_writer.writerows(rows)
48 changes: 1 addition & 47 deletions csvkit/typeinference.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dateutil.parser import parse
import six

from csvkit.exceptions import InvalidValueForTypeException, InvalidValueForTypeListException
from csvkit.exceptions import InvalidValueForTypeException

NoneType = type(None)

Expand Down Expand Up @@ -201,49 +201,3 @@ def normalize_column_type(l, normal_type=None, blanks_as_nulls=True):
return six.text_type, [x if x != '' else None for x in l]
else:
return six.text_type, l


def normalize_table(rows, normal_types=None, accumulate_errors=False, blanks_as_nulls=True):
"""
Given a sequence of sequences, normalize the lot.
Optionally accepts a normal_types parameter which is a list of
types that the columns must normalize to.
"""
data_columns = []
column_count = 0
row_count = 0

for row in rows:
while column_count < len(row):
data_columns.append([None] * row_count)
column_count += 1

for i, value in enumerate(row):
data_columns[i].append(value)

row_count += 1

new_normal_types = []
new_normal_columns = []
errors = {}

for i, column in enumerate(data_columns):
try:
if normal_types:
t, c = normalize_column_type(column, normal_types[i], blanks_as_nulls=blanks_as_nulls)
else:
t, c = normalize_column_type(column, blanks_as_nulls=blanks_as_nulls)

new_normal_types.append(t)
new_normal_columns.append(c)
except InvalidValueForTypeException as e:
if not accumulate_errors:
raise

errors[i] = e

if errors:
raise InvalidValueForTypeListException(errors)

return new_normal_types, new_normal_columns
101 changes: 0 additions & 101 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,82 +99,6 @@ def test_from_csv_dev_null(self):
with open('/dev/null', 'r') as f:
table.Table.from_csv(f)

def test_to_csv(self):
raise SkipTest
with open('examples/testfixed_converted.csv', 'r') as f:
contents = f.read()
f.seek(0)
o = six.StringIO()
table.Table.from_csv(f).to_csv(o)
conversion = o.getvalue()
o.close()

self.assertEqual(contents, conversion)

def test_table_append(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
t = table.Table()
t.append(c)
self.assertEqual(len(t), 1)
self.assertEqual(t[0], c)

def test_table_append_duplicate_name(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c2 = table.Column(0, u'test', [u'test', u'column', u''])
c3 = table.Column(0, u'test', [u'test', u'column', u''])
t = table.Table()
t.append(c)
t.append(c2)
t.append(c3)
self.assertEqual(t[0].name, 'test')
self.assertEqual(t[1].name, 'test_2')
self.assertEqual(t[2].name, 'test_3')

def test_table_insert(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c2 = table.Column(0, u'test', [u'test', u'column', u''])
t = table.Table([c])
t.insert(0, c2)
self.assertEqual(len(t), 2)
self.assertEqual(t[0], c2)
self.assertEqual(t[1], c)
self.assertEqual(t[0].order, 0)
self.assertEqual(t[1].order, 1)

def test_table_extend(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c2 = table.Column(0, u'test', [u'test', u'column', u''])
c3 = table.Column(0, u'test', [u'test', u'column', u''])
t = table.Table([c])
t.extend([c2, c3])
self.assertEqual(len(t), 3)
self.assertEqual(t[0], c)
self.assertEqual(t[1], c2)
self.assertEqual(t[2], c3)
self.assertEqual(t[0].order, 0)
self.assertEqual(t[1].order, 1)
self.assertEqual(t[2].order, 2)

def test_table_remove(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c2 = table.Column(0, u'test', [u'test', u'column', u''])
c3 = table.Column(0, u'test', [u'test', u'column', u''])
t = table.Table([c, c2, c3])
t.remove(c2)
self.assertEqual(len(t), 2)
self.assertEqual(t[0], c)
self.assertEqual(t[1], c3)
self.assertEqual(t[0].order, 0)
self.assertEqual(t[1].order, 1)

def test_table_sort(self):
t = table.Table()
self.assertRaises(NotImplementedError, t.sort)

def test_table_reverse(self):
t = table.Table()
self.assertRaises(NotImplementedError, t.reverse)

def test_table_count_rows(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c_short = table.Column(0, u'test', [u'test'])
Expand All @@ -187,28 +111,3 @@ def test_table_count_rows(self):
self.assertEqual(t.count_rows(), 3)
t.append(c_long)
self.assertEqual(t.count_rows(), 4)

def test_table_row(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c2 = table.Column(0, u'test', [u'test', u'column', u''])
c3 = table.Column(0, u'test', [u'test', u'column', u''])
t = table.Table([c, c2, c3])
self.assertEqual(t.row(1), [u'column', u'column', u'column'])

def test_table_row_out_of_bounds(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c2 = table.Column(0, u'test', [u'test', u'column', u''])
c3 = table.Column(0, u'test', [u'test', u'column', u''])
t = table.Table([c, c2, c3])
self.assertRaises(IndexError, t.row, -1)
self.assertRaises(IndexError, t.row, 3)

def test_table_uneven_columns(self):
c = table.Column(0, u'test', [u'test', u'column', u''])
c_short = table.Column(0, u'test', [u'test'])
c_long = table.Column(0, u'test', [u'', u'', u'', u'way out here'])
t = table.Table([c, c_short, c_long])
self.assertEqual(t.row(0), [u'test', u'test', None])
self.assertEqual(t.row(1), [u'column', None, None])
self.assertEqual(t.row(2), [None, None, None])
self.assertEqual(t.row(3), [None, None, u'way out here'])
67 changes: 1 addition & 66 deletions tests/test_typeinference.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from csvkit import typeinference

from csvkit.exceptions import InvalidValueForTypeException, InvalidValueForTypeListException
from csvkit.exceptions import InvalidValueForTypeException

NoneType = type(None)

Expand Down Expand Up @@ -187,68 +187,3 @@ def test_jeremy_singer_vine_datetimes(self):
This obscure test named after Jeremy Singer-Vine, who discovered it.
"""
self.assertEqual((six.text_type, [u'P', u'H', u'H']), typeinference.normalize_column_type([u'P', u'H', u'H']))

def test_normalize_table(self):
expected_types = [six.text_type, int, float, NoneType]
data = [
[u'a', u'1', u'2.1', u''],
[u'b', u'5', u'4.1'],
[u'c', u'100', u'100.9999', u''],
[u'd', u'2', u'5.3', u'']
]
types, columns = typeinference.normalize_table(data)

self.assertEqual(4, len(types))
self.assertEqual(4, len(columns))

for i, tup in enumerate(zip(columns, types, expected_types)):
c, t, et = tup
self.assertEqual(et, t)
for row, normalized in zip(data, c):
if t is NoneType:
self.assertTrue(normalized is None)
else:
self.assertEqual(t(row[i]), normalized)

def test_normalize_table_known_types(self):
normal_types = [six.text_type, int, float, NoneType]
data = [
[u'a', u'1', u'2.1', u''],
[u'b', u'5', u'4.1'],
[u'c', u'100', u'100.9999', u''],
[u'd', u'2', u'5.3', u'']
]
types, columns = typeinference.normalize_table(data, normal_types)

self.assertEqual(4, len(types))
self.assertEqual(4, len(columns))

for i, tup in enumerate(zip(columns, types, normal_types)):
c, t, et = tup
self.assertEqual(et, t)
for row, normalized in zip(data, c):
if t is NoneType:
self.assertTrue(normalized is None)
else:
self.assertEqual(t(row[i]), normalized)

def test_normalize_table_known_types_invalid(self):
normal_types = [bool, int, int, NoneType]
data = [
[u'a', u'1', u'2.1', u''],
[u'b', u'5', u'4.1'],
[u'c', u'100', u'100.9999', u''],
[u'd', u'2', u'5.3', u'']
]

try:
typeinference.normalize_table(data, normal_types, accumulate_errors=True)
self.assertEqual(True, False)
except InvalidValueForTypeListException as e:
self.assertEqual(len(e.errors), 2)
self.assertEqual(e.errors[0].index, 0)
self.assertEqual(e.errors[0].value, 'a')
self.assertEqual(e.errors[0].normal_type, bool)
self.assertEqual(e.errors[2].index, 0)
self.assertEqual(e.errors[2].value, '2.1')
self.assertEqual(e.errors[2].normal_type, int)

0 comments on commit 3e5f5a2

Please sign in to comment.