Implement CSV read and write. Closes #168. Closes #169.

wireservice · Aug 29, 2015 · c2a5b8f · c2a5b8f
1 parent 07ff7ed
commit c2a5b8f
Show file tree

Hide file tree

Showing 6 changed files with 103 additions and 61 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,6 +1,8 @@
 0.6.0
 -----
 
+* Table.to_csv implemented (#169)
+* Table.from_csv implemented. (#168)
 * Added Table.format method for pretty-printing tables. (#191)
 * Analysis class now implements a caching workflow. (#171)
 

diff --git a/agate/table.py b/agate/table.py
@@ -13,6 +13,11 @@
 except ImportError: # pragma: no cover
     from ordereddict import OrderedDict
 
+try:
+    import csvkit as csv
+except ImportError:
+    import csv
+
 import six
 
 from agate.aggregators import Sum, Mean, Median, StDev, MAD
@@ -107,6 +112,53 @@ def _fork(self, rows, column_info=None):
 
         return Table(rows, column_info)
 
+    @classmethod
+    def from_csv(cls, path, column_info, header=True, **kwargs):
+        """
+        Create a new table for a CSV. Will use csvkit if it is available,
+        otherwise will use Python's builtin csv module. ``args`` and ``kwargs``
+        will be passed through to :meth:`csv.reader`.
+
+        Note: if using Python 2 and not using csvkit, this method is not
+        unicode-safe.
+
+        :param path: Path to the CSV file to read from.
+        :param column_info: See :class:`.Table` constructor.
+        """
+        with open(path) as f:
+            rows = list(csv.reader(f, **kwargs))
+
+        if header:
+            column_names = rows.pop(0)
+
+        if len(column_names) != len(column_info):
+            # TKTK Better Error
+            raise ValueError
+
+        return Table(rows, column_info)
+
+    def to_csv(self, path, **kwargs):
+        """
+        Write table to a CSV. Will use csvkit if it is available, otherwise
+        will use Python's builtin csv module. ``args`` and ``kwargs``
+        will be passed through to :meth:`csv.writer`.
+
+        Note: if using Python 2 and not using csvkit, this method is not
+        unicode-safe.
+
+        :param path: Path to the CSV file to read from.
+        """
+        if 'lineterminator' not in kwargs:
+            kwargs['lineterminator'] = '\n'
+
+        with open(path, 'w') as f:
+            writer = csv.writer(f, **kwargs)
+
+            writer.writerow(self._column_names)
+
+            for row in self._data:
+                writer.writerow(row)
+
     def get_column_types(self):
         """
         Get an ordered list of this table's column types.

diff --git a/docs/cookbook/basics.rst b/docs/cookbook/basics.rst
@@ -2,12 +2,16 @@
 The basics
 ==========
 
+You can always use Python's builtin :mod:`csv` to read and write CSV files, but agate also includes shortcuts to save time.
+
+.. note::
+
+    If you have `csvkit <http://csvkit.rtfd.org/>`_ installed, agate will use it instead of Python's builtin :mod:`csv`. The builting module is not unicode-safe for Python 2, so it is strongly suggested that you do install csvkit.
+
 Loading a table from a CSV
 ==========================
 
-You can use Python's builtin :mod:`csv` to read CSV files.
-
-If your file does not have headers:
+Assuming your file has a single row of headers:
 
 .. code-block:: python
 
@@ -22,58 +26,17 @@ If your file does not have headers:
         ('population', number_type)
     )
 
-    with open('population.csv') as f:
-        rows = list(csv.reader(f)
-
-    table = Table(rows, columns)
-
-If your file does have headers (and you want to use them):
-
-.. code-block:: python
-
-    with open('population.csv') as f:
-        rows = list(csv.reader(f))
-
-    column_names = rows.pop(0)
-    column_types = (text_type, number_type, number_type)
-
-    table = Table(rows, zip(column_names, column_types))
+    table = Table.from_csv('population.csv', columns)
 
-Loading a table from a CSV w/ csvkit
-====================================
-
-Of course, cool kids use `csvkit <http://csvkit.rtfd.org/>`_. (Hint: it supports unicode!)
+If your file does not have headers:
 
 .. code-block:: python
 
-    import csvkit
-
-    with open('population.csv') as f:
-        rows = list(csvkit.reader(f))
-
-    column_names = rows.pop(0)
-    column_types = (text_type, number_type, number_type)
-
-    table = Table(rows, zip(column_names, column_types))
+    table = Table.from_csv('population.csv', columns, header=False)
 
 Writing a table to a CSV
 ========================
 
 .. code-block:: python
 
-    with open('output.csv') as f:
-        writer = csv.writer(f)
-
-        writer.writerow(table.get_column_names())
-        writer.writerows(table.rows)
-
-Writing a table to a CSV w/ csvkit
-==================================
-
-.. code-block:: python
-
-    with open('output.csv') as f:
-        writer = csvkit.writer(f)
-
-        writer.writerow(table.get_column_names())
-        writer.writerows(table.rows)
+    table.to_csv('output.csv')
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -99,25 +99,15 @@ You'll notice here that we define the names and types as pairs (tuples), which i
 Loading data from a CSV
 =======================
 
-Now let's read the data in the CSV file and use it to create the table.
+The :class:`.Table` is the basic class in agate. A time-saving method is included to load table data from CSV:
 
 .. code-block:: python
 
-    with open('exonerations-20150828.csv') as f:
-        # Create a CSV reader
-        reader = csv.reader(f)
-
-        # Skip header
-        next(reader)
-
-        # Create the table
-        exonerations = agate.Table(reader, COLUMNS)
-
-:class:`.Table` will accept any array (iterable) of rows (iterables) as its first argument. In this case we're using a CSV reader.
+    exonerations = agate.Table.from_csv('exonerations-20150828.csv', COLUMNS)
 
 .. note::
 
-    The data is copied when the table is constructed so it safe to close the file handle immediately.
+    If you have data that you've generated in another way you can always pass it in the :class:`.Table` constructor directly.
 
 Aggregating column data
 =======================

diff --git a/examples/test.csv b/examples/test.csv
@@ -0,0 +1,4 @@
+one,two,three
+1,4,a
+2,3,b
+,2,c
diff --git a/tests/test_table.py b/tests/test_table.py
@@ -5,6 +5,8 @@
 except ImportError: #pragma: no cover
     from decimal import Decimal
 
+import os
+
 try:
     import unittest2 as unittest
 except ImportError:
@@ -51,6 +53,35 @@ def test_create_duplicate_column_names(self):
         with self.assertRaises(ValueError):
             Table(self.rows, columns)
 
+    def test_from_csv(self):
+        table1 = Table(self.rows, self.columns)
+        table2 = Table.from_csv('examples/test.csv', self.columns)
+
+        self.assertSequenceEqual(table1.get_column_names(), table2.get_column_names())
+        self.assertSequenceEqual(table1.get_column_types(), table2.get_column_types())
+
+        self.assertEqual(len(table1.columns), len(table2.columns))
+        self.assertEqual(len(table1.rows), len(table2.rows))
+
+        self.assertSequenceEqual(table1.rows[0], table2.rows[0])
+        self.assertSequenceEqual(table1.rows[1], table2.rows[1])
+        self.assertSequenceEqual(table1.rows[2], table2.rows[2])
+
+    def test_to_csv(self):
+        table = Table(self.rows, self.columns)
+
+        table.to_csv('.test.csv')
+
+        with open('.test.csv') as f:
+            contents1 = f.read()
+
+        with open('examples/test.csv') as f:
+            contents2 = f.read()
+
+        self.assertEqual(contents1, contents2)
+
+        os.remove('.test.csv')
+
     def test_get_column_types(self):
         table = Table(self.rows, self.columns)