Skip to content

Commit

Permalink
Implement Table.fuse. wireservice/csvkit#711.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Dec 26, 2016
1 parent 28912b4 commit e02221c
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
1.5.3
-----

* Added :meth:`.Table.fuse` to join tables sequentially without filtering.
* :meth:`.Table.join` now supports full outer joins via the ``full_outer`` keyword.
* :meth:`.Table.join` can now accept column indicies instead of column names.
* :meth:`.Table.from_csv` now buffers input files to prevent issues with using STDIN as an input.
Expand Down
2 changes: 2 additions & 0 deletions agate/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ def print_json(self, **kwargs):
from agate.table.from_fixed import from_fixed
from agate.table.from_json import from_json
from agate.table.from_object import from_object
from agate.table.fuse import fuse
from agate.table.group_by import group_by
from agate.table.homogenize import homogenize
from agate.table.join import join
Expand Down Expand Up @@ -363,6 +364,7 @@ def print_json(self, **kwargs):
Table.from_fixed = from_fixed
Table.from_json = from_json
Table.from_object = from_object
Table.fuse = fuse
Table.group_by = group_by
Table.homogenize = homogenize
Table.join = join
Expand Down
39 changes: 39 additions & 0 deletions agate/table/fuse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env python
# pylint: disable=W0212

from agate.rows import Row
from agate import utils


def fuse(self, right_table):
"""
Join two tables by aligning them horizontally without performing any
filtering. This is effectively a "join by row number".
:param right_table:
The "right" table to join to.
:returns:
A new :class:`.Table`.
"""
len_left = len(self._columns)
len_right = len(right_table._columns)

left_rows = (list(r) for r in self._rows)
right_rows = (list(r) for r in right_table._rows)

column_names = self._column_names + right_table._column_names
column_types = self._column_types + right_table._column_types

new_rows = []

for left_row in left_rows:
try:
right_row = next(right_rows)
new_rows.append(Row(left_row + right_row, column_names))
except StopIteration:
new_rows.append(Row(left_row + ([None] * len_right), column_names))

for right_row in right_rows:
new_rows.append(Row(([None] * len_left) + right_row, column_names))

return self._fork(new_rows, column_names, column_types)
6 changes: 6 additions & 0 deletions agate/tableset/proxy_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ def find(self, *args, **kwargs):
"""
return self._proxy('find', *args, **kwargs)

def fuse(self, *args, **kwargs):
"""
Calls :meth:`.Table.fuse` on each table in the TableSet.
"""
return self._proxy('fuse', *args, **kwargs)

def group_by(self, *args, **kwargs):
"""
Calls :meth:`.Table.group_by` on each table in the TableSet.
Expand Down
81 changes: 81 additions & 0 deletions tests/test_table/test_fuse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-

from agate import Table
from agate.data_types import *
from agate.testcase import AgateTestCase


class TestFuse(AgateTestCase):
def setUp(self):
self.left_rows = (
(1, 4, 'a'),
(2, 3, 'b'),
(None, 2, 'c')
)

self.right_rows = (
(1, 4, 'a'),
(2, 3, 'b'),
(None, 2, 'c')
)

self.number_type = Number()
self.text_type = Text()

self.left_column_names = ['one', 'two', 'three']
self.right_column_names = ['four', 'five', 'six']
self.column_types = [self.number_type, self.number_type, self.text_type]

self.left = Table(self.left_rows, self.left_column_names, self.column_types)
self.right = Table(self.right_rows, self.right_column_names, self.column_types)

def test_fuse(self):
new_table = self.left.fuse(self.right)

self.assertIsNot(new_table, self.left)
self.assertIsNot(new_table, self.right)
self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
self.assertRows(new_table, [
(1, 4, 'a', 1, 4, 'a'),
(2, 3, 'b', 2, 3, 'b'),
(None, 2, 'c', None, 2, 'c')
])

def test_fuse_short_right(self):
right_rows = self.right_rows + ((7, 9, 'z'),)
right = Table(right_rows, self.right_column_names, self.column_types)

new_table = self.left.fuse(right)

self.assertIsNot(new_table, self.left)
self.assertIsNot(new_table, right)
self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
self.assertRows(new_table, [
(1, 4, 'a', 1, 4, 'a'),
(2, 3, 'b', 2, 3, 'b'),
(None, 2, 'c', None, 2, 'c'),
(None, None, None, 7, 9, 'z')
])

def test_fuse_short_left(self):
left_rows = self.left_rows + ((7, 9, 'z'),)
left = Table(left_rows, self.left_column_names, self.column_types)

new_table = left.fuse(self.right)

import sys
new_table.print_table(output=sys.stdout)

self.assertIsNot(new_table, left)
self.assertIsNot(new_table, self.right)
self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
self.assertRows(new_table, [
(1, 4, 'a', 1, 4, 'a'),
(2, 3, 'b', 2, 3, 'b'),
(None, 2, 'c', None, 2, 'c'),
(7, 9, 'z', None, None, None)
])

0 comments on commit e02221c

Please sign in to comment.