Skip to content

Commit

Permalink
Differentiate sample and population variance/stdev. Closes #208. #123.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Sep 6, 2015
1 parent 950b92a commit 9aa319b
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 20 deletions.
45 changes: 28 additions & 17 deletions agate/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,6 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

if column.has_nulls():
raise NullCalculationError

return column.mean()

class Median(Aggregation):
Expand All @@ -237,9 +234,6 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

if column.has_nulls():
raise NullCalculationError

return column.median()

class Mode(Aggregation):
Expand Down Expand Up @@ -281,16 +275,13 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

if column.has_nulls():
raise NullCalculationError

percentiles = column.percentiles()

return percentiles[75] - percentiles[25]

class Variance(Aggregation):
"""
Compute the variance of a column.
Compute the sample variance of a column.
"""
def get_aggregate_column_type(self, column):
return NumberType()
Expand All @@ -302,14 +293,24 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

if column.has_nulls():
raise NullCalculationError

return column.variance()

class PopulationVariance(Variance):
"""
Compute the population variance of a column.
"""
def run(self, column):
"""
:returns: :class:`decimal.Decimal`.
"""
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

return column.population_variance()

class StDev(Aggregation):
"""
Compute the standard of deviation of a column.
Compute the sample standard of deviation of a column.
"""
def get_aggregate_column_type(self, column):
return NumberType()
Expand All @@ -321,11 +322,21 @@ def run(self, column):
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

if column.has_nulls():
raise NullCalculationError

return column.variance().sqrt()

class PopulationStDev(StDev):
"""
Compute the population standard of deviation of a column.
"""
def run(self, column):
"""
:returns: :class:`decimal.Decimal`.
"""
if not isinstance(column, NumberColumn):
raise UnsupportedAggregationError(self, column)

return column.population_variance().sqrt()

class MAD(Aggregation):
"""
Compute the `median absolute deviation <http://en.wikipedia.org/wiki/Median_absolute_deviation>`_
Expand Down
26 changes: 25 additions & 1 deletion agate/columns/number.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def mean(self):
Should be invoked via the :class:`.Mean` aggregation.
"""
if self.has_nulls():
raise NullCalculationError

return self.sum() / len(self)

@memoize
Expand All @@ -40,15 +43,36 @@ def median(self):
Should be invoked via the :class:`.Median` aggregation.
"""
if self.has_nulls():
raise NullCalculationError

return self.percentiles()[50]

@memoize
def variance(self):
"""
Compute the median of the values in this column.
Compute the sample variance of the values in this column.
Should be invoked via the :class:`.Variance` aggregation.
"""
if self.has_nulls():
raise NullCalculationError

data = self.get_data()
mean = self.mean()

return sum((n - mean) ** 2 for n in data) / (len(self) - 1)

@memoize
def population_variance(self):
"""
Compute the population variance of the values in this column.
Should be invoked via the :class:`.Variance` aggregation.
"""
if self.has_nulls():
raise NullCalculationError

data = self.get_data()
mean = self.mean()

Expand Down
16 changes: 14 additions & 2 deletions tests/test_aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,25 @@ def test_variance(self):
with self.assertRaises(NullCalculationError):
self.table.columns['one'].aggregate(Variance())

self.assertEqual(self.table.columns['two'].aggregate(Variance()).quantize(Decimal('0.01')), Decimal('0.47'))
self.assertEqual(self.table.columns['two'].aggregate(Variance()).quantize(Decimal('0.0001')), Decimal('0.6332'))

def test_population_variance(self):
with self.assertRaises(NullCalculationError):
self.table.columns['one'].aggregate(PopulationVariance())

self.assertEqual(self.table.columns['two'].aggregate(PopulationVariance()).quantize(Decimal('0.0001')), Decimal('0.4749'))

def test_stdev(self):
with self.assertRaises(NullCalculationError):
self.table.columns['one'].aggregate(StDev())

self.assertAlmostEqual(self.table.columns['two'].aggregate(StDev()).quantize(Decimal('0.01')), Decimal('0.69'))
self.assertAlmostEqual(self.table.columns['two'].aggregate(StDev()).quantize(Decimal('0.0001')), Decimal('0.7958'))

def test_population_stdev(self):
with self.assertRaises(NullCalculationError):
self.table.columns['one'].aggregate(PopulationStDev())

self.assertAlmostEqual(self.table.columns['two'].aggregate(PopulationStDev()).quantize(Decimal('0.0001')), Decimal('0.6891'))

def test_mad(self):
with self.assertRaises(NullCalculationError):
Expand Down

0 comments on commit 9aa319b

Please sign in to comment.