Skip to content

Commit

Permalink
Ensure necessary Column methods are public. Closes #183.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Aug 30, 2015
1 parent 295474f commit c973f8f
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 57 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
0.6.0
-----

* Some private Column methods made public. (#183)
* Rename agate.aggegators to agate.aggregations.
* TableSet.to_csv implemented. (#195)
* TableSet.from_csv implemented. (#194)
Expand Down
32 changes: 16 additions & 16 deletions agate/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
the :attr:`.Table.columns` attribute.
Aggregations can also be applied to instances of :class:`.TableSet` using the
:meth:`.Tableset.aggregate` method, in which case the result will be a new
:meth:`.Tableset.aggregate` method, in which case the result will be a new
:class:`.Table` with a column for each aggregation and a row for each table in
the set.
"""
Expand Down Expand Up @@ -46,7 +46,7 @@ class NonNullAggregation(Aggregation):
being applied to contains null values.
"""
def run(self, column):
if column._has_nulls():
if column.has_nulls():
raise NullComputationError

class HasNulls(Aggregation):
Expand All @@ -60,7 +60,7 @@ def run(self, column):
"""
:returns: :class:`bool`
"""
return column._has_nulls()
return column.has_nulls()

class Any(Aggregation):
"""
Expand All @@ -80,7 +80,7 @@ def run(self, column):
"""
:returns: :class:`bool`
"""
data = column._data()
data = column.get_data()

if isinstance(column, BooleanColumn):
return any(data)
Expand All @@ -107,7 +107,7 @@ def run(self, column):
"""
:returns: :class:`bool`
"""
data = column._data()
data = column.get_data()

if isinstance(column, BooleanColumn):
return all(data)
Expand All @@ -132,7 +132,7 @@ def run(self, column):
"""
:returns: :class:`int`
"""
return column._data().count(self._value)
return column.get_data().count(self._value)

class Min(Aggregation):
"""
Expand All @@ -156,7 +156,7 @@ def run(self, column):
if not any(isinstance(column, t) for t in supported_columns):
raise UnsupportedAggregationError(self, column)

return min(column._data_without_nulls())
return min(column.get_data_without_nulls())

class Max(Aggregation):
"""
Expand All @@ -180,7 +180,7 @@ def run(self, column):
if not any(isinstance(column, t) for t in supported_columns):
raise UnsupportedAggregationError(self, column)

return max(column._data_without_nulls())
return max(column.get_data_without_nulls())

class Sum(Aggregation):
"""
Expand All @@ -196,7 +196,7 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

return column._sum()
return column.sum()

class Mean(NonNullAggregation):
"""
Expand All @@ -214,7 +214,7 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

return column._mean()
return column.mean()

class Median(NonNullAggregation):
"""
Expand All @@ -235,7 +235,7 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

return column._median()
return column.median()

class Mode(NonNullAggregation):
"""
Expand All @@ -253,7 +253,7 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

data = column._data()
data = column.get_data()
state = defaultdict(int)

for n in data:
Expand Down Expand Up @@ -297,7 +297,7 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

return column._variance()
return column.variance()

class StDev(NonNullAggregation):
"""
Expand All @@ -315,7 +315,7 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

return column._variance().sqrt()
return column.variance().sqrt()

class MAD(NonNullAggregation):
"""
Expand Down Expand Up @@ -346,7 +346,7 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

data = column._data_sorted()
data = column.get_data_sorted()
m = column.percentiles()[50]

return self._median(tuple(abs(n - m) for n in data))
Expand All @@ -365,4 +365,4 @@ def run(self, column):
if not isinstance(column, TextColumn):
raise UnsupportedAggregationError(self, column)

return max([len(d) for d in column._data_without_nulls()])
return max([len(d) for d in column.get_data_without_nulls()])
52 changes: 32 additions & 20 deletions agate/columns/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, table, index):
self._index = index

def __unicode__(self):
data = self._data()
data = self.get_data()

sample = ', '.join(six.text_type(d) for d in data[:5])

Expand All @@ -86,41 +86,53 @@ def __unicode__(self):
def __str__(self):
return str(self.__unicode__())

@memoize
def _data(self):
return tuple(r[self._index] for r in self._table._data)

@memoize
def _data_without_nulls(self):
return tuple(d for d in self._data() if d is not None)

@memoize
def _data_sorted(self):
return sorted(self._data())

@memoize
def _has_nulls(self):
return None in self._data()

def __getitem__(self, j):
return self._data()[j]
return self.get_data()[j]

@memoize
def __len__(self):
return len(self._data())
return len(self.get_data())

def __eq__(self, other):
"""
Ensure equality test with lists works.
"""
return self._data() == other
return self.get_data() == other

def __ne__(self, other):
"""
Ensure inequality test with lists works.
"""
return not self.__eq__(other)

@memoize
def get_data(self):
"""
Get the data contained in this column as a :class:`tuple`.
"""
return tuple(r[self._index] for r in self._table._data)

@memoize
def get_data_without_nulls(self):
"""
Get the data contained in this column with any null values removed.
"""
return tuple(d for d in self.get_data() if d is not None)

@memoize
def get_data_sorted(self):
"""
Get the data contained in this column sorted.
"""
return sorted(self.get_data())

@memoize
def has_nulls(self):
"""
Returns `True` if this column contains null values.
"""
return None in self.get_data()

def aggregate(self, aggregation):
"""
Apply a :class:`.Aggregation` to this column and return the result.
Expand Down
20 changes: 10 additions & 10 deletions agate/columns/number.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,25 @@ def __init__(self, *args, **kwargs):
super(NumberColumn, self).__init__(*args, **kwargs)

@memoize
def _sum(self):
def sum(self):
"""
Compute the values in this column.
Should be invoked via the :class:`.Sum` aggregation.
"""
return sum(self._data_without_nulls())
return sum(self.get_data_without_nulls())

@memoize
def _mean(self):
def mean(self):
"""
Compute the mean of the values in this column.
Should be invoked via the :class:`.Mean` aggregation.
"""
return self._sum() / len(self)
return self.sum() / len(self)

@memoize
def _median(self):
def median(self):
"""
Compute the median of the values in this column.
Expand All @@ -46,14 +46,14 @@ def _median(self):
return self.percentiles()[50]

@memoize
def _variance(self):
def variance(self):
"""
Compute the median of the values in this column.
Should be invoked via the :class:`.Variance` aggregation.
"""
data = self._data()
mean = self._mean()
data = self.get_data()
mean = self.mean()

return sum((n - mean) ** 2 for n in data) / len(self)

Expand All @@ -65,7 +65,7 @@ def percentiles(self):
:returns: :class:`Percentiles`.
:raises: :exc:`.NullComputationError`
"""
if self._has_nulls():
if self.has_nulls():
raise NullComputationError

return Percentiles(self)
Expand Down Expand Up @@ -155,7 +155,7 @@ class Percentiles(Quantiles):
def __init__(self, column):
super(Percentiles, self).__init__()

data = column._data_sorted()
data = column.get_data_sorted()

if len(data) == 0:
raise ValueError('Column does not contain data.')
Expand Down
2 changes: 1 addition & 1 deletion agate/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def pearson_correlation(self, column_one, column_two):
x = self.columns[column_one]
y = self.columns[column_two]

if x._has_nulls() or y._has_nulls():
if x.has_nulls() or y.has_nulls():
raise NullComputationError

n = len(x)
Expand Down
18 changes: 9 additions & 9 deletions tests/test_aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,24 @@ def test_count(self):
class TestBooleanAggregation(unittest.TestCase):
def test_any(self):
column = BooleanColumn(None, 'one')
column._data = lambda: (True, False, None)
column.get_data = lambda: (True, False, None)
self.assertEqual(column.aggregate(Any()), True)

column._data = lambda: (False, False, None)
column.get_data = lambda: (False, False, None)
self.assertEqual(column.aggregate(Any()), False)

def test_all(self):
column = BooleanColumn(None, 'one')
column._data = lambda: (True, True, None)
column.get_data = lambda: (True, True, None)
self.assertEqual(column.aggregate(All()), False)

column._data = lambda: (True, True, True)
column.get_data = lambda: (True, True, True)
self.assertEqual(column.aggregate(All()), True)

class TestDateAggregation(unittest.TestCase):
def test_min(self):
column = DateColumn(None, 'one')
column._data_without_nulls = lambda: (
column.get_data_without_nulls = lambda: (
datetime.date(1994, 3, 1),
datetime.date(1011, 2, 17),
datetime.date(1984, 1, 5)
Expand All @@ -87,7 +87,7 @@ def test_min(self):

def test_max(self):
column = DateColumn(None, 'one')
column._data_without_nulls = lambda: (
column.get_data_without_nulls = lambda: (
datetime.date(1994, 3, 1),
datetime.date(1011, 2, 17),
datetime.date(1984, 1, 5)
Expand All @@ -98,7 +98,7 @@ def test_max(self):
class TestDateTimeAggregation(unittest.TestCase):
def test_min(self):
column = DateTimeColumn(None, 'one')
column._data_without_nulls = lambda: (
column.get_data_without_nulls = lambda: (
datetime.datetime(1994, 3, 3, 6, 31),
datetime.datetime(1994, 3, 3, 6, 30, 30),
datetime.datetime(1994, 3, 3, 6, 30)
Expand All @@ -108,7 +108,7 @@ def test_min(self):

def test_max(self):
column = DateTimeColumn(None, 'one')
column._data_without_nulls = lambda: (
column.get_data_without_nulls = lambda: (
datetime.datetime(1994, 3, 3, 6, 31),
datetime.datetime(1994, 3, 3, 6, 30, 30),
datetime.datetime(1994, 3, 3, 6, 30)
Expand Down Expand Up @@ -193,5 +193,5 @@ def test_mad(self):
class TestTextAggregation(unittest.TestCase):
def test_max_length(self):
column = TextColumn(None, 'one')
column._data = lambda: ('a', 'gobble', 'wow')
column.get_data = lambda: ('a', 'gobble', 'wow')
self.assertEqual(column.aggregate(MaxLength()), 6)
2 changes: 1 addition & 1 deletion tests/test_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_length(self):
self.assertEqual(len(self.table.columns), 3)

def test_get_column_data(self):
self.assertSequenceEqual(self.table.columns['one']._data(), (1, 2, None))
self.assertSequenceEqual(self.table.columns['one'].get_data(), (1, 2, None))

def test_get_column(self):
self.assertSequenceEqual(self.table.columns['one'], (1, 2, None))
Expand Down

0 comments on commit c973f8f

Please sign in to comment.