diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 1c15f7c48423b..c7c2f3a0d15e1 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -172,9 +172,12 @@ def _convert_by(by): by = list(by) return by -def crosstab(rows, cols, rownames=None, colnames=None, margins=False): +def crosstab(rows, cols, values=None, rownames=None, colnames=None, + aggfunc=None, margins=False): """ - Compute a simple cross-tabulation of two (or more) factors + Compute a simple cross-tabulation of two (or more) factors. By default + computes a frequency table of the factors unless an array of values and an + aggregation function are passed Parameters ---------- @@ -182,6 +185,10 @@ def crosstab(rows, cols, rownames=None, colnames=None, margins=False): Values to group by in the rows cols : array-like, Series, or list of arrays/Series Values to group by in the columns + values : array-like, optional + Array of values to aggregate according to the factors + aggfunc : function, optional + If no values array is passed, computes a frequency table rownames : sequence, default None If passed, must match number of row arrays passed colnames : sequence, default None @@ -226,13 +233,19 @@ def crosstab(rows, cols, rownames=None, colnames=None, margins=False): data = {} data.update(zip(rownames, rows)) data.update(zip(colnames, cols)) - df = DataFrame(data) - df['__dummy__'] = 0 - table = df.pivot_table('__dummy__', rows=rownames, cols=colnames, - aggfunc=len, margins=margins) - - return table.fillna(0).astype(np.int64) + if values is None: + df = DataFrame(data) + df['__dummy__'] = 0 + table = df.pivot_table('__dummy__', rows=rownames, cols=colnames, + aggfunc=len, margins=margins) + return table.fillna(0).astype(np.int64) + else: + data['__dummy__'] = values + df = DataFrame(data) + table = df.pivot_table('__dummy__', rows=rownames, cols=colnames, + aggfunc=aggfunc, margins=margins) + return table def _get_names(arrs, names, prefix='row'): if names is None: diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index eff5772225613..12164d01ef700 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -210,6 +210,21 @@ def test_crosstab_margins(self): exp_rows = exp_rows.fillna(0).astype(np.int64) tm.assert_series_equal(all_rows, exp_rows) + def test_crosstab_pass_values(self): + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + values = np.random.randn(100) + + table = crosstab([a, b], c, values, aggfunc=np.sum, + rownames=['foo', 'bar'], colnames=['baz']) + + df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values' : values}) + + expected = df.pivot_table('values', rows=['foo', 'bar'], cols='baz', + aggfunc=np.sum) + tm.assert_frame_equal(table, expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],