TableSet.to_csv and TableSet.from_csv. Closes #194. Closes #195.

wireservice · Aug 29, 2015 · 0f5b307 · 0f5b307
1 parent c2a5b8f
commit 0f5b307
Show file tree

Hide file tree

Showing 8 changed files with 117 additions and 16 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,6 +1,8 @@
 0.6.0
 -----
 
+* TableSet.to_csv implemented. (#195)
+* TableSet.from_csv implemented. (#194)
 * Table.to_csv implemented (#169)
 * Table.from_csv implemented. (#168)
 * Added Table.format method for pretty-printing tables. (#191)

diff --git a/agate/table.py b/agate/table.py
@@ -115,15 +115,18 @@ def _fork(self, rows, column_info=None):
     @classmethod
     def from_csv(cls, path, column_info, header=True, **kwargs):
         """
-        Create a new table for a CSV. Will use csvkit if it is available,
-        otherwise will use Python's builtin csv module. ``args`` and ``kwargs``
-        will be passed through to :meth:`csv.reader`.
+        Create a new table for a CSV. This method will use csvkit if it is
+        available, otherwise it will use Python's builtin csv module.
 
-        Note: if using Python 2 and not using csvkit, this method is not
+        ``kwargs`` will be passed through to :meth:`csv.reader`.
+
+        If you are using Python 2 and not using csvkit, this method is not
         unicode-safe.
 
         :param path: Path to the CSV file to read from.
         :param column_info: See :class:`.Table` constructor.
+        :param header: If `True`, the first row of the CSV is assumed to contains
+            headers and will be skipped.
         """
         with open(path) as f:
             rows = list(csv.reader(f, **kwargs))
@@ -139,11 +142,12 @@ def from_csv(cls, path, column_info, header=True, **kwargs):
 
     def to_csv(self, path, **kwargs):
         """
-        Write table to a CSV. Will use csvkit if it is available, otherwise
-        will use Python's builtin csv module. ``args`` and ``kwargs``
-        will be passed through to :meth:`csv.writer`.
+        Write this table to a CSV. This method will use csvkit if it is
+        available, otherwise it will use Python's builtin csv module.
+
+        ``kwargs`` will be passed through to :meth:`csv.writer`.
 
-        Note: if using Python 2 and not using csvkit, this method is not
+        If you are using Python 2 and not using csvkit, this method is not
         unicode-safe.
 
         :param path: Path to the CSV file to read from.

diff --git a/agate/tableset.py b/agate/tableset.py
@@ -7,6 +7,8 @@
 
 from collections import Mapping
 from copy import copy
+from glob import glob
+import os
 
 try:
     from collections import OrderedDict
@@ -86,6 +88,60 @@ def __iter__(self):
     def __len__(self):
         return self._tables.__len__()
 
+    @classmethod
+    def from_csv(cls, dir_path, column_info, header=True, **kwargs):
+        """
+        Create a new :class:`TableSet` from a directory of CSVs. This method
+        will use csvkit if it is available, otherwise it will use Python's
+        builtin csv module.
+
+        ``kwargs`` will be passed through to :meth:`csv.reader`.
+
+        If you are using Python 2 and not using csvkit, this method is not
+        unicode-safe.
+
+        :param dir_path: Path to a directory full of CSV files. All CSV files
+            in this directory will be loaded.
+        :param column_info: See :class:`.Table` constructor.
+        :param header: If `True`, the first row of the CSV is assumed to contains
+            headers and will be skipped.
+        """
+        from agate.table import Table
+
+        if not os.path.isdir(dir_path):
+            raise IOError('Specified path doesn\'t exist or isn\'t a directory.')
+
+        tables = OrderedDict()
+
+        for path in glob(os.path.join(dir_path, '*.csv')):
+            name = os.path.split(path)[1].strip('.csv')
+            table = Table.from_csv(path, column_info, header=header, **kwargs)
+
+            tables[name] = table
+
+        return TableSet(tables)
+
+    def to_csv(self, dir_path, **kwargs):
+        """
+        Write this each table in this set to a separate CSV in a given
+        directory. This method will use csvkit if it is available, otherwise
+        it will use Python's builtin csv module.
+
+        ``kwargs`` will be passed through to :meth:`csv.writer`.
+
+        If you are using Python 2 and not using csvkit, this method is not
+        unicode-safe.
+
+        :param dir_path: Path to the directory to write the CSV files to.
+        """
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+
+        for name, table in self._tables.items():
+            path = os.path.join(dir_path, '%s.csv' % name)
+
+            table.to_csv(path, **kwargs)
+
     def get_column_types(self):
         """
         Get an ordered list of this :class:`.TableSet`'s column types.

diff --git a/examples/tableset/table1.csv b/examples/tableset/table1.csv
@@ -0,0 +1,4 @@
+letter,number
+a,1
+a,3
+b,2
diff --git a/examples/tableset/table2.csv b/examples/tableset/table2.csv
@@ -0,0 +1,4 @@
+letter,number
+b,0
+a,2
+c,5
diff --git a/examples/tableset/table3.csv b/examples/tableset/table3.csv
@@ -0,0 +1,4 @@
+letter,number
+a,1
+a,2
+c,3
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
@@ -16,14 +16,6 @@
 
 TEST_CACHE = '.agate-test'
 
-def wait_for_create(path):
-    while not os.path.exists(path):
-        sleep(1)
-
-def wait_for_delete(path):
-    while os.path.exists(path):
-        sleep(1)
-
 class TestAnalysis(unittest.TestCase):
     def setUp(self):
         self.executed_stage1 = 0

diff --git a/tests/test_tableset.py b/tests/test_tableset.py
@@ -10,6 +10,8 @@
 except ImportError: #pragma: no cover
     from decimal import Decimal
 
+import shutil
+
 try:
     import unittest2 as unittest
 except ImportError:
@@ -60,6 +62,39 @@ def test_create_tableset(self):
 
         self.assertEqual(len(tableset), 3)
 
+    def test_from_csv(self):
+        tableset1 = TableSet(self.tables)
+        tableset2 = TableSet.from_csv('examples/tableset', self.columns)
+
+        self.assertSequenceEqual(tableset1.get_column_names(), tableset2.get_column_names())
+        self.assertSequenceEqual(tableset1.get_column_types(), tableset2.get_column_types())
+
+        self.assertEqual(len(tableset1), len(tableset2))
+
+        for name in ['table1', 'table2', 'table3']:
+            self.assertEqual(len(tableset1[name].columns), len(tableset2[name].columns))
+            self.assertEqual(len(tableset1[name].rows), len(tableset2[name].rows))
+
+            self.assertSequenceEqual(tableset1[name].rows[0], tableset2[name].rows[0])
+            self.assertSequenceEqual(tableset1[name].rows[1], tableset2[name].rows[1])
+            self.assertSequenceEqual(tableset1[name].rows[2], tableset2[name].rows[2])
+
+    def test_to_csv(self):
+        tableset = TableSet(self.tables)
+
+        tableset.to_csv('.test-tableset')
+
+        for name in ['table1', 'table2', 'table3']:
+            with open('.test-tableset/%s.csv' % name) as f:
+                contents1 = f.read()
+
+            with open('examples/tableset/%s.csv' % name) as f:
+                contents2 = f.read()
+
+            self.assertEqual(contents1, contents2)
+
+        shutil.rmtree('.test-tableset')
+
     def test_get_column_types(self):
         tableset = TableSet(self.tables)