Skip to content

Commit

Permalink
Merge pull request #543 from onyxfish/table
Browse files Browse the repository at this point in the history
Trim down csvkit.table.Table
  • Loading branch information
James McKinney committed Jan 30, 2016
2 parents 0c42d1c + 732e8a4 commit 523f2d2
Show file tree
Hide file tree
Showing 11 changed files with 31 additions and 357 deletions.
19 changes: 0 additions & 19 deletions csvkit/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ def length(self):
return len(self.row)


class CSVJSONException(CustomException):
"""
Exception raised when there is a problem converting data to CSV.
"""
pass


class InvalidValueForTypeException(CustomException):
"""
Exception raised when a value can not be normalized to a specified type.
Expand All @@ -69,18 +62,6 @@ def __init__(self, index, value, normal_type):
super(InvalidValueForTypeException, self).__init__(msg)


class InvalidValueForTypeListException(CustomException):
"""
Exception raised when one or more InvalidValueForTypeException
has been raised while accumulating errors.
"""

def __init__(self, errors):
self.errors = errors
msg = 'Encountered errors converting values in %i columns' % len(errors)
super(InvalidValueForTypeListException, self).__init__(msg)


class RequiredHeaderError(CustomException):
"""
Exception raised when an operation requires a CSV file to have a header row.
Expand Down
8 changes: 0 additions & 8 deletions csvkit/headers.py

This file was deleted.

18 changes: 0 additions & 18 deletions csvkit/sniffer.py

This file was deleted.

117 changes: 25 additions & 92 deletions csvkit/table.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,36 @@
#!/usr/bin/env python

import csv
import datetime
import itertools

import agate
import six

from csvkit import sniffer
from csvkit import typeinference
from csvkit.cli import parse_column_identifiers
from csvkit.headers import make_default_headers

POSSIBLE_DELIMITERS = [',', '\t', ';', ' ', ':', '|']


def make_default_headers(n):
"""
Make a set of simple, default headers for files that are missing them.
"""
return ['column%i' % (i + 1) for i in range(n)]


def sniff_dialect(sample):
"""
A functional version of ``csv.Sniffer().sniff``, that extends the
list of possible delimiters to include some seen in the wild.
"""
try:
dialect = csv.Sniffer().sniff(sample, POSSIBLE_DELIMITERS)
except:
dialect = None

return dialect


class InvalidType(object):
Expand Down Expand Up @@ -104,68 +125,6 @@ def __init__(self, columns=[], name='new_table'):
list.__init__(self, columns)
self.name = name

def __str__(self):
return str(self.__unicode__())

def __unicode__(self):
"""
Stringify a description of all columns in this table.
"""
return '\n'.join([six.text_type(c) for c in self])

def _reindex_columns(self):
"""
Update order properties of all columns in table.
"""
for i, c in enumerate(self):
c.order = i

def _deduplicate_column_name(self, column):
while column.name in self.headers():
try:
i = column.name.rindex('_')
counter = int(column.name[i + 1:])
column.name = '%s_%i' % (column.name[:i], counter + 1)
except:
column.name += '_2'

return column.name

def append(self, column):
"""Implements list append."""
self._deduplicate_column_name(column)

list.append(self, column)
column.index = len(self) - 1

def insert(self, i, column):
"""Implements list insert."""
self._deduplicate_column_name(column)

list.insert(self, i, column)
self._reindex_columns()

def extend(self, columns):
"""Implements list extend."""
for c in columns:
self._deduplicate_column_name(c)

list.extend(self, columns)
self._reindex_columns()

def remove(self, column):
"""Implements list remove."""
list.remove(self, column)
self._reindex_columns()

def sort(self):
"""Forbids list sort."""
raise NotImplementedError()

def reverse(self):
"""Forbids list reverse."""
raise NotImplementedError()

def headers(self):
return [c.name for c in self]

Expand All @@ -177,20 +136,6 @@ def count_rows(self):

return 0

def row(self, i):
"""
Fetch a row of data from this table.
"""
if i < 0:
raise IndexError('Negative row numbers are not valid.')

if i >= self.count_rows():
raise IndexError('Row number exceeds the number of rows in the table.')

row_data = [c[i] for c in self]

return row_data

@classmethod
def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
"""
Expand All @@ -207,9 +152,9 @@ def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, bl

# snifflimit == 0 means do not sniff
if snifflimit is None:
kwargs['dialect'] = sniffer.sniff_dialect(contents)
kwargs['dialect'] = sniff_dialect(contents)
elif snifflimit > 0:
kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit])
kwargs['dialect'] = sniff_dialect(contents[:snifflimit])

f = six.StringIO(contents)
rows = agate.reader(f, **kwargs)
Expand Down Expand Up @@ -281,15 +226,3 @@ def to_rows(self, serialize_dates=False):
return list(zip(*out_columns))
else:
return list(zip(*self))

def to_csv(self, output, **kwargs):
"""
Serializes the table to CSV and writes it to any file-like object.
"""
rows = self.to_rows(serialize_dates=True)

# Insert header row
rows.insert(0, self.headers())

csv_writer = agate.writer(output, **kwargs)
csv_writer.writerows(rows)
48 changes: 1 addition & 47 deletions csvkit/typeinference.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dateutil.parser import parse
import six

from csvkit.exceptions import InvalidValueForTypeException, InvalidValueForTypeListException
from csvkit.exceptions import InvalidValueForTypeException

NoneType = type(None)

Expand Down Expand Up @@ -201,49 +201,3 @@ def normalize_column_type(l, normal_type=None, blanks_as_nulls=True):
return six.text_type, [x if x != '' else None for x in l]
else:
return six.text_type, l


def normalize_table(rows, normal_types=None, accumulate_errors=False, blanks_as_nulls=True):
"""
Given a sequence of sequences, normalize the lot.
Optionally accepts a normal_types parameter which is a list of
types that the columns must normalize to.
"""
data_columns = []
column_count = 0
row_count = 0

for row in rows:
while column_count < len(row):
data_columns.append([None] * row_count)
column_count += 1

for i, value in enumerate(row):
data_columns[i].append(value)

row_count += 1

new_normal_types = []
new_normal_columns = []
errors = {}

for i, column in enumerate(data_columns):
try:
if normal_types:
t, c = normalize_column_type(column, normal_types[i], blanks_as_nulls=blanks_as_nulls)
else:
t, c = normalize_column_type(column, blanks_as_nulls=blanks_as_nulls)

new_normal_types.append(t)
new_normal_columns.append(c)
except InvalidValueForTypeException as e:
if not accumulate_errors:
raise

errors[i] = e

if errors:
raise InvalidValueForTypeListException(errors)

return new_normal_types, new_normal_columns
2 changes: 1 addition & 1 deletion csvkit/utilities/csvcut.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import agate

from csvkit.cli import CSVKitUtility, parse_column_identifiers
from csvkit.headers import make_default_headers
from csvkit.table import make_default_headers


class CSVCut(CSVKitUtility):
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvgrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from csvkit.cli import CSVKitUtility, parse_column_identifiers
from csvkit.grep import FilteringCSVReader
from csvkit.headers import make_default_headers
from csvkit.table import make_default_headers


class CSVGrep(CSVKitUtility):
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvlook.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import six

from csvkit.cli import CSVKitUtility
from csvkit.headers import make_default_headers
from csvkit.table import make_default_headers


class CSVLook(CSVKitUtility):
Expand Down
2 changes: 1 addition & 1 deletion csvkit/utilities/csvstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import agate

from csvkit.cli import CSVKitUtility
from csvkit.headers import make_default_headers
from csvkit.table import make_default_headers


class CSVStack(CSVKitUtility):
Expand Down

0 comments on commit 523f2d2

Please sign in to comment.