Merge pull request #543 from onyxfish/table

Trim down csvkit.table.Table
wireservice · Jan 30, 2016 · 523f2d2 · 523f2d2
2 parents 0c42d1c + 732e8a4
commit 523f2d2
Show file tree

Hide file tree

Showing 11 changed files with 31 additions and 357 deletions.
diff --git a/csvkit/exceptions.py b/csvkit/exceptions.py
@@ -49,13 +49,6 @@ def length(self):
         return len(self.row)
 
 
-class CSVJSONException(CustomException):
-    """
-    Exception raised when there is a problem converting data to CSV.
-    """
-    pass
-
-
 class InvalidValueForTypeException(CustomException):
     """
     Exception raised when a value can not be normalized to a specified type.
@@ -69,18 +62,6 @@ def __init__(self, index, value, normal_type):
         super(InvalidValueForTypeException, self).__init__(msg)
 
 
-class InvalidValueForTypeListException(CustomException):
-    """
-    Exception raised when one or more InvalidValueForTypeException
-    has been raised while accumulating errors.
-    """
-
-    def __init__(self, errors):
-        self.errors = errors
-        msg = 'Encountered errors converting values in %i columns' % len(errors)
-        super(InvalidValueForTypeListException, self).__init__(msg)
-
-
 class RequiredHeaderError(CustomException):
     """
     Exception raised when an operation requires a CSV file to have a header row.

diff --git a/csvkit/headers.py b/csvkit/headers.py
diff --git a/csvkit/sniffer.py b/csvkit/sniffer.py
diff --git a/csvkit/table.py b/csvkit/table.py
@@ -1,15 +1,36 @@
 #!/usr/bin/env python
 
+import csv
 import datetime
 import itertools
 
 import agate
 import six
 
-from csvkit import sniffer
 from csvkit import typeinference
 from csvkit.cli import parse_column_identifiers
-from csvkit.headers import make_default_headers
+
+POSSIBLE_DELIMITERS = [',', '\t', ';', ' ', ':', '|']
+
+
+def make_default_headers(n):
+    """
+    Make a set of simple, default headers for files that are missing them.
+    """
+    return ['column%i' % (i + 1) for i in range(n)]
+
+
+def sniff_dialect(sample):
+    """
+    A functional version of ``csv.Sniffer().sniff``, that extends the
+    list of possible delimiters to include some seen in the wild.
+    """
+    try:
+        dialect = csv.Sniffer().sniff(sample, POSSIBLE_DELIMITERS)
+    except:
+        dialect = None
+
+    return dialect
 
 
 class InvalidType(object):
@@ -104,68 +125,6 @@ def __init__(self, columns=[], name='new_table'):
         list.__init__(self, columns)
         self.name = name
 
-    def __str__(self):
-        return str(self.__unicode__())
-
-    def __unicode__(self):
-        """
-        Stringify a description of all columns in this table.
-        """
-        return '\n'.join([six.text_type(c) for c in self])
-
-    def _reindex_columns(self):
-        """
-        Update order properties of all columns in table.
-        """
-        for i, c in enumerate(self):
-            c.order = i
-
-    def _deduplicate_column_name(self, column):
-        while column.name in self.headers():
-            try:
-                i = column.name.rindex('_')
-                counter = int(column.name[i + 1:])
-                column.name = '%s_%i' % (column.name[:i], counter + 1)
-            except:
-                column.name += '_2'
-
-        return column.name
-
-    def append(self, column):
-        """Implements list append."""
-        self._deduplicate_column_name(column)
-
-        list.append(self, column)
-        column.index = len(self) - 1
-
-    def insert(self, i, column):
-        """Implements list insert."""
-        self._deduplicate_column_name(column)
-
-        list.insert(self, i, column)
-        self._reindex_columns()
-
-    def extend(self, columns):
-        """Implements list extend."""
-        for c in columns:
-            self._deduplicate_column_name(c)
-
-        list.extend(self, columns)
-        self._reindex_columns()
-
-    def remove(self, column):
-        """Implements list remove."""
-        list.remove(self, column)
-        self._reindex_columns()
-
-    def sort(self):
-        """Forbids list sort."""
-        raise NotImplementedError()
-
-    def reverse(self):
-        """Forbids list reverse."""
-        raise NotImplementedError()
-
     def headers(self):
         return [c.name for c in self]
 
@@ -177,20 +136,6 @@ def count_rows(self):
 
         return 0
 
-    def row(self, i):
-        """
-        Fetch a row of data from this table.
-        """
-        if i < 0:
-            raise IndexError('Negative row numbers are not valid.')
-
-        if i >= self.count_rows():
-            raise IndexError('Row number exceeds the number of rows in the table.')
-
-        row_data = [c[i] for c in self]
-
-        return row_data
-
     @classmethod
     def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
         """
@@ -207,9 +152,9 @@ def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, bl
 
         # snifflimit == 0 means do not sniff
         if snifflimit is None:
-            kwargs['dialect'] = sniffer.sniff_dialect(contents)
+            kwargs['dialect'] = sniff_dialect(contents)
         elif snifflimit > 0:
-            kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit])
+            kwargs['dialect'] = sniff_dialect(contents[:snifflimit])
 
         f = six.StringIO(contents)
         rows = agate.reader(f, **kwargs)
@@ -281,15 +226,3 @@ def to_rows(self, serialize_dates=False):
             return list(zip(*out_columns))
         else:
             return list(zip(*self))
-
-    def to_csv(self, output, **kwargs):
-        """
-        Serializes the table to CSV and writes it to any file-like object.
-        """
-        rows = self.to_rows(serialize_dates=True)
-
-        # Insert header row
-        rows.insert(0, self.headers())
-
-        csv_writer = agate.writer(output, **kwargs)
-        csv_writer.writerows(rows)
diff --git a/csvkit/typeinference.py b/csvkit/typeinference.py
@@ -5,7 +5,7 @@
 from dateutil.parser import parse
 import six
 
-from csvkit.exceptions import InvalidValueForTypeException, InvalidValueForTypeListException
+from csvkit.exceptions import InvalidValueForTypeException
 
 NoneType = type(None)
 
@@ -201,49 +201,3 @@ def normalize_column_type(l, normal_type=None, blanks_as_nulls=True):
         return six.text_type, [x if x != '' else None for x in l]
     else:
         return six.text_type, l
-
-
-def normalize_table(rows, normal_types=None, accumulate_errors=False, blanks_as_nulls=True):
-    """
-    Given a sequence of sequences, normalize the lot.
-
-    Optionally accepts a normal_types parameter which is a list of
-    types that the columns must normalize to.
-    """
-    data_columns = []
-    column_count = 0
-    row_count = 0
-
-    for row in rows:
-        while column_count < len(row):
-            data_columns.append([None] * row_count)
-            column_count += 1
-
-        for i, value in enumerate(row):
-            data_columns[i].append(value)
-
-        row_count += 1
-
-    new_normal_types = []
-    new_normal_columns = []
-    errors = {}
-
-    for i, column in enumerate(data_columns):
-        try:
-            if normal_types:
-                t, c = normalize_column_type(column, normal_types[i], blanks_as_nulls=blanks_as_nulls)
-            else:
-                t, c = normalize_column_type(column, blanks_as_nulls=blanks_as_nulls)
-
-            new_normal_types.append(t)
-            new_normal_columns.append(c)
-        except InvalidValueForTypeException as e:
-            if not accumulate_errors:
-                raise
-
-            errors[i] = e
-
-    if errors:
-        raise InvalidValueForTypeListException(errors)
-
-    return new_normal_types, new_normal_columns
diff --git a/csvkit/utilities/csvcut.py b/csvkit/utilities/csvcut.py
@@ -14,7 +14,7 @@
 import agate
 
 from csvkit.cli import CSVKitUtility, parse_column_identifiers
-from csvkit.headers import make_default_headers
+from csvkit.table import make_default_headers
 
 
 class CSVCut(CSVKitUtility):

diff --git a/csvkit/utilities/csvgrep.py b/csvkit/utilities/csvgrep.py
@@ -8,7 +8,7 @@
 
 from csvkit.cli import CSVKitUtility, parse_column_identifiers
 from csvkit.grep import FilteringCSVReader
-from csvkit.headers import make_default_headers
+from csvkit.table import make_default_headers
 
 
 class CSVGrep(CSVKitUtility):

diff --git a/csvkit/utilities/csvlook.py b/csvkit/utilities/csvlook.py
@@ -6,7 +6,7 @@
 import six
 
 from csvkit.cli import CSVKitUtility
-from csvkit.headers import make_default_headers
+from csvkit.table import make_default_headers
 
 
 class CSVLook(CSVKitUtility):

diff --git a/csvkit/utilities/csvstack.py b/csvkit/utilities/csvstack.py
@@ -5,7 +5,7 @@
 import agate
 
 from csvkit.cli import CSVKitUtility
-from csvkit.headers import make_default_headers
+from csvkit.table import make_default_headers
 
 
 class CSVStack(CSVKitUtility):