Skip to content

Commit

Permalink
ENH: can pass values and custom aggregation function to crosstab
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Jan 18, 2012
1 parent 66410b5 commit f2b7c68
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 8 deletions.
29 changes: 21 additions & 8 deletions pandas/tools/pivot.py
Expand Up @@ -172,16 +172,23 @@ def _convert_by(by):
by = list(by)
return by

def crosstab(rows, cols, rownames=None, colnames=None, margins=False):
def crosstab(rows, cols, values=None, rownames=None, colnames=None,
aggfunc=None, margins=False):
"""
Compute a simple cross-tabulation of two (or more) factors
Compute a simple cross-tabulation of two (or more) factors. By default
computes a frequency table of the factors unless an array of values and an
aggregation function are passed
Parameters
----------
rows : array-like, Series, or list of arrays/Series
Values to group by in the rows
cols : array-like, Series, or list of arrays/Series
Values to group by in the columns
values : array-like, optional
Array of values to aggregate according to the factors
aggfunc : function, optional
If no values array is passed, computes a frequency table
rownames : sequence, default None
If passed, must match number of row arrays passed
colnames : sequence, default None
Expand Down Expand Up @@ -226,13 +233,19 @@ def crosstab(rows, cols, rownames=None, colnames=None, margins=False):
data = {}
data.update(zip(rownames, rows))
data.update(zip(colnames, cols))
df = DataFrame(data)
df['__dummy__'] = 0

table = df.pivot_table('__dummy__', rows=rownames, cols=colnames,
aggfunc=len, margins=margins)

return table.fillna(0).astype(np.int64)
if values is None:
df = DataFrame(data)
df['__dummy__'] = 0
table = df.pivot_table('__dummy__', rows=rownames, cols=colnames,
aggfunc=len, margins=margins)
return table.fillna(0).astype(np.int64)
else:
data['__dummy__'] = values
df = DataFrame(data)
table = df.pivot_table('__dummy__', rows=rownames, cols=colnames,
aggfunc=aggfunc, margins=margins)
return table

def _get_names(arrs, names, prefix='row'):
if names is None:
Expand Down
15 changes: 15 additions & 0 deletions pandas/tools/tests/test_pivot.py
Expand Up @@ -210,6 +210,21 @@ def test_crosstab_margins(self):
exp_rows = exp_rows.fillna(0).astype(np.int64)
tm.assert_series_equal(all_rows, exp_rows)

def test_crosstab_pass_values(self):
a = np.random.randint(0, 7, size=100)
b = np.random.randint(0, 3, size=100)
c = np.random.randint(0, 5, size=100)
values = np.random.randn(100)

table = crosstab([a, b], c, values, aggfunc=np.sum,
rownames=['foo', 'bar'], colnames=['baz'])

df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values' : values})

expected = df.pivot_table('values', rows=['foo', 'bar'], cols='baz',
aggfunc=np.sum)
tm.assert_frame_equal(table, expected)

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
Expand Down

0 comments on commit f2b7c68

Please sign in to comment.