Skip to content

Commit

Permalink
Move 'fuse' function into join. wireservice/csvkit#711
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Dec 26, 2016
1 parent d061020 commit 0ee6857
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 144 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
-----

* Fixed empty :class:`.Table` column names would be intialized as list instead of tuple.
* Added :meth:`.Table.fuse` to join tables sequentially without filtering.
* :meth:`.Table.join` can now join by row numbers—a sequential join.
* :meth:`.Table.join` now supports full outer joins via the ``full_outer`` keyword.
* :meth:`.Table.join` can now accept column indicies instead of column names.
* :meth:`.Table.from_csv` now buffers input files to prevent issues with using STDIN as an input.
Expand Down
2 changes: 0 additions & 2 deletions agate/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ def print_json(self, **kwargs):
from agate.table.from_fixed import from_fixed
from agate.table.from_json import from_json
from agate.table.from_object import from_object
from agate.table.fuse import fuse
from agate.table.group_by import group_by
from agate.table.homogenize import homogenize
from agate.table.join import join
Expand Down Expand Up @@ -364,7 +363,6 @@ def print_json(self, **kwargs):
Table.from_fixed = from_fixed
Table.from_json = from_json
Table.from_object = from_object
Table.fuse = fuse
Table.group_by = group_by
Table.homogenize = homogenize
Table.join = join
Expand Down
39 changes: 0 additions & 39 deletions agate/table/fuse.py

This file was deleted.

60 changes: 45 additions & 15 deletions agate/table/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,59 @@
from agate import utils


def join(self, right_table, left_key, right_key=None, inner=False, full_outer=False, require_match=False, columns=None):
def join(self, right_table, left_key=None, right_key=None, inner=False, full_outer=False, require_match=False, columns=None):
"""
Create a new table by joining two table's on common values.
Create a new table by joining two table's on common values. This method
implements most varieties of SQL join, in addition to some unique features.
This method performs the equivalent of SQL's "left outer join", combining
columns from this table and from :code:`right_table` anywhere that the
:code:`left_key` and :code:`right_key` are equivalent.
If :code:`left_key` and :code:`right_key` are both :code:`None` then this
method will peform a "sequential join", which is to say it will join on row
number. The :code:`inner` and :code:`full_outer` arguments will determine
whether dangling left-hand and right-hand rows are included, respectively.
Where there is no match for :code:`left_key` the left columns will
be included with the right columns set to :code:`None` unless
the :code:`inner` argument is specified.
If :code:`left_key` is specified, then a "left outer join" will be
performed. This will combine columns from the :code:`right_table` anywhere
that :code:`left_key` and :code:`right_key` are equal. Unmatched rows from
the left table will be included with the right-hand columns set to
:code:`None`.
If :code:`left_key` and :code:`right_key` are column names, only
the left columns will be included in the output table.
If :code:`inner` is :code:`True` then an "inner join" will be performed.
Unmatched rows from either table will be left out.
If :code:`full_outer` is :code:`True` then a "full outer join" will be
performed. Unmatched rows from both tables will be included, with the
columns in the other table set to :code:`None`.
In all cases, if :code:`right_key` is :code:`None` then it :code:`left_key`
will be used for both tables.
If :code:`left_key` and :code:`right_key` are column names, the right-hand
identifier column will not be included in the output table.
If :code:`require_match` is :code:`True` unmatched rows will raise an
exception. This is like an "inner join" except any row that doesn't have a
match will raise an exception instead of being dropped. This is useful for
enforcing expectations about datasets that should match.
Column names from the right table which also exist in this table will
be suffixed "2" in the new table.
A subset of columns from the right-hand table can be included in the joined
table using the :code:`columns` argument.
:param right_table:
The "right" table to join to.
:param left_key:
Either the name of a column from the this table to join on, the index
of a column, a sequence of such column identifiers, or a
:class:`function` that takes a row and returns a value to join on.
of a column, a sequence of such column identifiers, a
:class:`function` that takes a row and returns a value to join on, or
:code:`None` in which case the tables will be joined on row number.
:param right_key:
Either the name of a column from :code:table` to join on, the index of
a column, a sequence of such column identifiers, or a :class:`function`
that takes a ow and returns a value to join on. If :code:`None` then
:code:`left_key` will be used for both.
:code:`left_key` will be used for both. If :code:`left_key` is
:code:`None` then this value is ignored.
:param inner:
Perform a SQL-style "inner join" instead of a left outer join. Rows
which have no match for :code:`left_key` will not be included in
Expand Down Expand Up @@ -63,8 +87,11 @@ def join(self, right_table, left_key, right_key=None, inner=False, full_outer=Fa
left_key_is_func = hasattr(left_key, '__call__')
left_key_is_sequence = utils.issequence(left_key)

# Left key is None
if left_key is None:
left_data = tuple(range(len(self._rows)))
# Left key is a function
if left_key_is_func:
elif left_key_is_func:
left_data = [left_key(row) for row in self._rows]
# Left key is a sequence
elif left_key_is_sequence:
Expand All @@ -77,8 +104,11 @@ def join(self, right_table, left_key, right_key=None, inner=False, full_outer=Fa
right_key_is_func = hasattr(right_key, '__call__')
right_key_is_sequence = utils.issequence(right_key)

# Sequential join
if left_key is None:
right_data = tuple(range(len(right_table._rows)))
# Right key is a function
if right_key_is_func:
elif right_key_is_func:
right_data = [right_key(row) for row in right_table._rows]
# Right key is a sequence
elif right_key_is_sequence:
Expand Down
6 changes: 0 additions & 6 deletions agate/tableset/proxy_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ def find(self, *args, **kwargs):
"""
return self._proxy('find', *args, **kwargs)

def fuse(self, *args, **kwargs):
"""
Calls :meth:`.Table.fuse` on each table in the TableSet.
"""
return self._proxy('fuse', *args, **kwargs)

def group_by(self, *args, **kwargs):
"""
Calls :meth:`.Table.group_by` on each table in the TableSet.
Expand Down
81 changes: 0 additions & 81 deletions tests/test_table/test_fuse.py

This file was deleted.

47 changes: 47 additions & 0 deletions tests/test_table/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,3 +279,50 @@ def test_full_outer(self):
(3, 2, 'c', None, None, None),
(None, None, None, 4, 2, 'c')
])

def test_join_by_row_number(self):
new_table = self.left.join(self.right, full_outer=True)

self.assertIsNot(new_table, self.left)
self.assertIsNot(new_table, self.right)
self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
self.assertRows(new_table, [
(1, 4, 'a', 1, 4, 'a'),
(2, 3, 'b', 2, 3, 'b'),
(None, 2, 'c', None, 2, 'c')
])

def test_join_by_row_number_short_right(self):
right_rows = self.right_rows + ((7, 9, 'z'),)
right = Table(right_rows, self.right_column_names, self.column_types)

new_table = self.left.join(right, full_outer=True)

self.assertIsNot(new_table, self.left)
self.assertIsNot(new_table, right)
self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
self.assertRows(new_table, [
(1, 4, 'a', 1, 4, 'a'),
(2, 3, 'b', 2, 3, 'b'),
(None, 2, 'c', None, 2, 'c'),
(None, None, None, 7, 9, 'z')
])

def test_join_by_row_number_short_left(self):
left_rows = self.left_rows + ((7, 9, 'z'),)
left = Table(left_rows, self.left_column_names, self.column_types)

new_table = left.join(self.right, full_outer=True)

self.assertIsNot(new_table, left)
self.assertIsNot(new_table, self.right)
self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six'])
self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text])
self.assertRows(new_table, [
(1, 4, 'a', 1, 4, 'a'),
(2, 3, 'b', 2, 3, 'b'),
(None, 2, 'c', None, 2, 'c'),
(7, 9, 'z', None, None, None)
])

0 comments on commit 0ee6857

Please sign in to comment.