Implement Table.format. Closes #191.

wireservice · Aug 29, 2015 · a5c4567 · a5c4567
1 parent 00ea72a
commit a5c4567
Show file tree

Hide file tree

Showing 5 changed files with 150 additions and 18 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,6 +1,7 @@
 0.6.0
 -----
 
+* Added Table.format method for pretty-printing tables. (#191)
 * Analysis class now implements a caching workflow. (#171)
 
 0.5.0

diff --git a/agate/table.py b/agate/table.py
@@ -6,6 +6,7 @@
 """
 
 from copy import copy
+from itertools import chain
 
 try:
     from collections import OrderedDict
@@ -71,7 +72,7 @@ def __init__(self, rows, column_info):
 
     def _get_column(self, i):
         """
-        Get a Column of data, caching a copy for next request.
+        Get a :class:`.Column` of data, caching a copy for next request.
         """
         if i not in self._cached_columns:
             column_type = self._column_types[i]
@@ -82,7 +83,7 @@ def _get_column(self, i):
 
     def _get_row(self, i):
         """
-        Get a Row of data, caching a copy for the next request.
+        Get a :class:`.Row` of data, caching a copy for the next request.
         """
         if i not in self._cached_rows:
             # If rows are from a fork, they are safe to access directly
@@ -512,3 +513,98 @@ def compute(self, computations):
             new_rows.append(tuple(row) + new_columns)
 
         return self._fork(new_rows, zip(column_names, column_types))
+
+    def format(self, max_rows=None, max_columns=None):
+        """
+        Formats a text preview of this table.
+
+        :param max_rows: The maximum number of rows to display before
+            truncating the data.
+        :param max_columns: The maximum number of columns to display before
+            truncating the data.
+
+        :returns: A unicode representation of this table suitable for printing
+            to the console.
+        """
+        if max_rows is None:
+            max_rows = len(self._data)
+
+        if max_columns is None:
+            max_columns = len(self._column_names)
+
+        widths = []
+        rows_truncated = False
+        columns_truncated = False
+
+        for i, row in enumerate(chain([self._column_names], self._data)):
+            if i >= max_rows + 1:
+                rows_truncated = True
+
+                break
+
+            for j, v in enumerate(row):
+                if j >= max_columns:
+                    columns_truncated = True
+
+                    try:
+                        widths[j] = 3
+                    except IndexError:
+                        widths.append(3)
+
+                    break
+
+                v = six.text_type(v)
+
+                try:
+                    if len(v) > widths[j]:
+                        widths[j] = len(v)
+                except IndexError:
+                    widths.append(len(v))
+
+        def _format_row(row):
+            """
+            Helper function that formats individual rows.
+            """
+            row_output = []
+
+            for j, d in enumerate(row):
+                if j >= max_columns:
+                    break
+
+                if d is None:
+                    d = ''
+                row_output.append(' %s ' % six.text_type(d).ljust(widths[j]))
+
+            if columns_truncated:
+                row_output.append(' %s ' % six.text_type('...').ljust(widths[j]))
+
+            return '| %s |' % ('|'.join(row_output))
+
+        # Dashes span each width with '+' character at intersection of
+        # horizontal and vertical dividers.
+        divider = '|--' + '-+-'.join('-' * w for w in widths) + '--|'
+
+        output = []
+
+        # Initial divider
+        output.append('%s' % divider)
+
+        # Rows
+        for i, row in enumerate(chain([self._column_names], self._data)):
+            if i >= max_rows + 1:
+                break
+
+            output.append(_format_row(row))
+
+            # Divider under headers
+            if (i == 0):
+                output.append('%s' % divider)
+
+        # Row indicating data was truncated
+        if rows_truncated:
+            output.append(_format_row(['...' for n in self._column_names]))
+
+        # Final divider
+        output.append('%s' % divider)
+
+        return '\n'.join(output)
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -291,28 +291,35 @@ We can then use :meth:`.Table.limit` get only the first ten rows of the data.
 
     youngest_ten = sorted_by_age.limit(10)
 
-Now let's print some information about the resulting rows:
+Now let's use :meth:`.Table.format` to help us pretty the results in a way we can easily review:
 
 .. code-block:: python
 
-    for row in youngest_ten.rows:
-        print('%(first_name)s %(last_name)s (%(age)i) %(crime)s' % row)
+    print(youngest_ten.format(max_columns=7))
 
 ::
 
-    Lacresha Murray (11) Murder
-    Johnathan Adams (12) Murder
-    Anthony Harris (12) Murder
-    Tyler Edmonds (13) Murder
-    Zachary Handley (13) Arson
-    Thaddeus Jimenez (13) Murder
-    Jerry Pacek (13) Murder
-    Jonathan Barr (14) Murder
-    Dominique Brim (14) Assault
-    Timothy Brown (14) Murder
+    |------------+------------+-----+-----------+-------+---------+---------+------|
+    |  last_name | first_name | age | race      | state | tags    | crime   | ...  |
+    |------------+------------+-----+-----------+-------+---------+---------+------|
+    |  Murray    | Lacresha   | 11  | Black     | TX    | CV, F   | Murder  | ...  |
+    |  Adams     | Johnathan  | 12  | Caucasian | GA    | CV, P   | Murder  | ...  |
+    |  Harris    | Anthony    | 12  | Black     | OH    | CV      | Murder  | ...  |
+    |  Edmonds   | Tyler      | 13  | Caucasian | MS    |         | Murder  | ...  |
+    |  Handley   | Zachary    | 13  | Caucasian | PA    | A, CV   | Arson   | ...  |
+    |  Jimenez   | Thaddeus   | 13  | Hispanic  | IL    |         | Murder  | ...  |
+    |  Pacek     | Jerry      | 13  | Caucasian | PA    |         | Murder  | ...  |
+    |  Barr      | Jonathan   | 14  | Black     | IL    | CDC, CV | Murder  | ...  |
+    |  Brim      | Dominique  | 14  | Black     | MI    | F       | Assault | ...  |
+    |  Brown     | Timothy    | 14  | Black     | FL    |         | Murder  | ...  |
+    |------------+------------+-----+-----------+-------+---------+---------+------|
 
 If you find it impossible to believe that an eleven year-old was convicted of murder, I encourage you to read the Registry's `description of the case <http://www.law.umich.edu/special/exoneration/Pages/casedetail.aspx?caseid=3499>`_.
 
+.. note::
+
+    In the previous example we could have omitted the :meth:`.Table.limit` and passed a ``max_rows=10`` to :meth:`.Table.format` instead.
+
 Grouping and aggregating
 ========================
 

diff --git a/exonerations.py b/exonerations.py
@@ -40,6 +40,8 @@ def load_data(data):
         # Create the table
         data['exonerations'] = agate.Table(reader, columns)
 
+    print(data['exonerations'].format(3, 3))
+
 def confessions(data):
     num_false_confessions = data['exonerations'].columns['false_confession'].aggregate(agate.Count(True))
 
@@ -61,8 +63,7 @@ def youth(data):
     sorted_by_age = data['exonerations'].order_by('age')
     youngest_ten = sorted_by_age.limit(10)
 
-    for row in youngest_ten.rows:
-        print('%(first_name)s %(last_name)s (%(age)i) %(crime)s' % row)
+    print(youngest_ten.format(max_columns=7))
 
 def states(data):
     state_totals = data['with_years_in_prison'].group_by('state')
@@ -85,4 +86,4 @@ def states(data):
 years_analysis = analysis.then(years_in_prison)
 years_analysis.then(states)
 
-analysis.run()
+analysis.run(refresh=True)
diff --git a/tests/test_table.py b/tests/test_table.py
@@ -354,6 +354,33 @@ def test_chain_select_where(self):
         self.assertEqual(new_table._column_names, ('one', 'two'))
         self.assertSequenceEqual(new_table.columns['one'], (2,))
 
+    def test_format(self):
+        table = Table(self.rows, self.columns)
+
+        output = table.format()
+        lines = output.split('\n')
+
+        self.assertEqual(len(lines), 7)
+        self.assertEqual(len(lines[0]), 24)
+
+    def test_format_max_rows(self):
+        table = Table(self.rows, self.columns)
+
+        output = table.format(max_rows=2)
+        lines = output.split('\n')
+
+        self.assertEqual(len(lines), 7)
+        self.assertEqual(len(lines[0]), 23)
+
+    def test_format_max_columns(self):
+        table = Table(self.rows, self.columns)
+
+        output = table.format(max_columns=2)
+        lines = output.split('\n')
+
+        self.assertEqual(len(lines), 7)
+        self.assertEqual(len(lines[0]), 22)
+
 class TestTableGrouping(unittest.TestCase):
     def setUp(self):
         self.rows = (