Skip to content

Commit

Permalink
Implement competition ranking. Closes #125.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Sep 5, 2015
1 parent 2f82123 commit 144b10d
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 40 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
0.8.0
-----


* Competition ranking implemented as default. (#125)

0.7.0
-----
Expand Down
13 changes: 11 additions & 2 deletions agate/columns/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import six

from agate.exceptions import ColumnDoesNotExistError
from agate.utils import memoize
from agate.utils import NullOrder, memoize

class ColumnMapping(Mapping):
"""
Expand Down Expand Up @@ -119,12 +119,21 @@ def get_data_without_nulls(self):
"""
return tuple(d for d in self.get_data() if d is not None)

def _null_handler(self, k):
"""
Key method for sorting nulls correctly.
"""
if k is None:
return NullOrder()

return k

@memoize
def get_data_sorted(self):
"""
Get the data contained in this column sorted.
"""
return sorted(self.get_data())
return sorted(self.get_data(), key=self._null_handler)

@memoize
def has_nulls(self):
Expand Down
24 changes: 14 additions & 10 deletions agate/computations.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,27 +156,31 @@ class Rank(Computation):
"""
Computes rank order of the values in a column.
NOTE: This rank algorithm is overly-simplistic and currently does not
handle ties.
Uses the "competition" ranking method: if there are four values and the
middle two are tied, then the output will be :code:`[1, 2, 2, 4]`.
Null values will always be ranked last.
"""
def __init__(self, column_name):
self._column_name = column_name

def get_computed_column_type(self, table):
return NumberType()

def _null_handler(self, k):
if k is None:
return NullOrder()
def prepare(self, table):
self._ranks = {}
rank = 0

for c in table.columns[self._column_name].get_data_sorted():
rank += 1

return k
if c in self._ranks:
continue

def prepare(self, table):
values = [row[self._column_name] for row in table.rows]
self._rank_column = sorted(values, key=self._null_handler)
self._ranks[c] = rank

def run(self, row):
return self._rank_column.index(row[self._column_name]) + 1
return self._ranks[row[self._column_name]]

class PercentileRank(Rank):
"""
Expand Down
27 changes: 0 additions & 27 deletions tests/test_computations.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,6 @@ def test_rank_number(self):

self.assertEqual(len(new_table.rows), 4)
self.assertEqual(len(new_table.columns), 5)

self.assertSequenceEqual(new_table.rows[0], ('a', 2, 3, 4, 1))
self.assertSequenceEqual(new_table.rows[1], (None, 3, 5, None, 3))
self.assertSequenceEqual(new_table.rows[2], ('a', 2, 4, None, 1))
self.assertSequenceEqual(new_table.rows[3], ('b', 3, 4, None, 3))

self.assertSequenceEqual(new_table.columns['rank'], (1, 3, 1, 3))

def test_rank_text(self):
Expand All @@ -119,29 +113,8 @@ def test_rank_text(self):

self.assertEqual(len(new_table.rows), 4)
self.assertEqual(len(new_table.columns), 5)

self.assertSequenceEqual(new_table.rows[0], ('a', 2, 3, 4, 1))
self.assertSequenceEqual(new_table.rows[1], (None, 3, 5, None, 4))
self.assertSequenceEqual(new_table.rows[2], ('a', 2, 4, None, 1))
self.assertSequenceEqual(new_table.rows[3], ('b', 3, 4, None, 3))

self.assertSequenceEqual(new_table.columns['rank'], (1, 4, 1, 3))

def test_rank_column_name(self):
new_table = self.table.compute([
('rank', Rank('two'))
])

self.assertEqual(len(new_table.rows), 4)
self.assertEqual(len(new_table.columns), 5)

self.assertSequenceEqual(new_table.rows[0], ('a', 2, 3, 4, 1))
self.assertSequenceEqual(new_table.rows[1], (None, 3, 5, None, 3))
self.assertSequenceEqual(new_table.rows[2], ('a', 2, 4, None, 1))
self.assertSequenceEqual(new_table.rows[3], ('b', 3, 4, None, 3))

self.assertSequenceEqual(new_table.columns['rank'], (1, 3, 1, 3))

def test_percentile_rank(self):
rows = [(n,) for n in range(1, 1001)]

Expand Down

0 comments on commit 144b10d

Please sign in to comment.