## pivot_table

In [None]:
import pandas as pd
import numpy as np
df = pd.DataFrame({
    "A": ["aaa", "aaa", "aaa", "aaa", "aaa", "ccc", "ccc", "ccc", "ccc"],
    "B": ["xxx", "xxx", "xxx", "yyy", "yyy", "xxx", "xxx", "yyy", "yyy"],
    "C": [
        "small", "LARGE", "LARGE", "small", "small", "LARGE", "small", "small",
        "LARGE"
    ],
    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]
})

In [21]:
df

Unnamed: 0,A,B,C,D,E
0,aaa,xxx,small,1,2
1,aaa,xxx,LARGE,2,4
2,aaa,xxx,LARGE,2,5
3,aaa,yyy,small,3,5
4,aaa,yyy,small,3,6
5,ccc,xxx,LARGE,4,6
6,ccc,xxx,small,5,8
7,ccc,yyy,small,6,9
8,ccc,yyy,LARGE,7,9


In [22]:
pd.pivot_table(df, values='D', index=['A', 'B'],
               columns=['C'], aggfunc=np.sum)

Unnamed: 0_level_0,C,LARGE,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
aaa,xxx,4.0,1.0
aaa,yyy,,6.0
ccc,xxx,4.0,5.0
ccc,yyy,7.0,6.0


In [23]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'], aggfunc=np.sum, fill_value=0)

Unnamed: 0_level_0,C,LARGE,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
aaa,xxx,4,1
aaa,yyy,0,6
ccc,xxx,4,5
ccc,yyy,7,6


In [35]:
pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
            aggfunc={'D': [np.mean,np.min],
                     'E': [np.mean,np.max]})

Unnamed: 0_level_0,Unnamed: 1_level_0,D,D,E,E
Unnamed: 0_level_1,Unnamed: 1_level_1,amin,mean,amax,mean
A,C,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
aaa,LARGE,2.0,2.0,5.0,4.5
aaa,small,1.0,2.333333,6.0,4.333333
ccc,LARGE,4.0,5.5,9.0,7.5
ccc,small,5.0,5.5,9.0,8.5


In [33]:
pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
               aggfunc={'D': np.mean,
                        'E': [np.min, np.max, np.mean]})

Unnamed: 0_level_0,Unnamed: 1_level_0,D,E,E,E
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,amax,amin,mean
A,C,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
aaa,LARGE,2.0,5.0,4.0,4.5
aaa,small,2.333333,6.0,2.0,4.333333
ccc,LARGE,5.5,9.0,6.0,7.5
ccc,small,5.5,9.0,8.0,8.5


## pivot

In [37]:
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
                           'two'],
                   'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                   'baz': [1, 2, 3, 4, 5, 6],
                   'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
df

Unnamed: 0,foo,bar,baz,zoo
0,one,A,1,x
1,one,B,2,y
2,one,C,3,z
3,two,A,4,q
4,two,B,5,w
5,two,C,6,t


In [38]:
df.pivot(index='foo', columns='bar', values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


In [39]:
df.pivot(index='foo', columns='bar')

Unnamed: 0_level_0,baz,baz,baz,zoo,zoo,zoo
bar,A,B,C,A,B,C
foo,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,1,2,3,x,y,z
two,4,5,6,q,w,t


In [40]:
df.pivot(index='foo', columns='bar')['baz']

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


In [41]:
# 如果有重复的值则会报错：

df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'],
                   "bar": ['A', 'A', 'B', 'C'],
                   "baz": [1, 2, 3, 4]})
df

Unnamed: 0,foo,bar,baz
0,one,A,1
1,one,A,2
2,two,B,3
3,two,C,4


In [42]:
df.pivot(index='foo', columns='bar', values='baz')

ValueError: Index contains duplicate entries, cannot reshape

## stack & unstack

In [43]:
data = pd.DataFrame(np.random.randn(9,2), columns=['num1','num2'],
                    index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],[1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

Unnamed: 0,Unnamed: 1,num1,num2
a,1,-0.20542,0.380133
a,2,0.441859,-0.442451
a,3,-1.035527,1.824973
b,1,1.066176,0.067806
b,3,-0.842479,0.969839
c,1,0.03259,0.356842
c,2,-1.217016,0.328401
d,2,-0.062957,1.879494
d,3,0.547132,-0.992702


In [44]:
data.unstack()

Unnamed: 0_level_0,num1,num1,num1,num2,num2,num2
Unnamed: 0_level_1,1,2,3,1,2,3
a,-0.20542,0.441859,-1.035527,0.380133,-0.442451,1.824973
b,1.066176,,-0.842479,0.067806,,0.969839
c,0.03259,-1.217016,,0.356842,0.328401,
d,,-0.062957,0.547132,,1.879494,-0.992702


In [45]:
data.unstack(level=0).stack()

Unnamed: 0,Unnamed: 1,num1,num2
1,a,-0.20542,0.380133
1,b,1.066176,0.067806
1,c,0.03259,0.356842
2,a,0.441859,-0.442451
2,c,-1.217016,0.328401
2,d,-0.062957,1.879494
3,a,-1.035527,1.824973
3,b,-0.842479,0.969839
3,d,0.547132,-0.992702


In [92]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado'],
                              ['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [93]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [94]:
frame.unstack()

state,Ohio,Ohio,Ohio,Ohio,Colorado,Colorado
color,Green,Green,Red,Red,Green,Green
key2,1,2,1,2,1,2
key1,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
a,0,3,1,4,2,5
b,6,9,7,10,8,11


In [95]:
frame.unstack().unstack()

state     color  key2  key1
Ohio      Green  1     a        0
                       b        6
                 2     a        3
                       b        9
          Red    1     a        1
                       b        7
                 2     a        4
                       b       10
Colorado  Green  1     a        2
                       b        8
                 2     a        5
                       b       11
dtype: int64

In [96]:
frame.unstack().stack()

Unnamed: 0_level_0,state,Colorado,Ohio,Ohio
Unnamed: 0_level_1,color,Green,Green,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,2,0,1
a,2,5,3,4
b,1,8,6,7
b,2,11,9,10


In [97]:
frame.index.names = [None,None]
frame.columns.names = [None,None]
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [103]:
frame.stack(level=1)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Colorado,Ohio
a,1,Green,2.0,0
a,1,Red,,1
a,2,Green,5.0,3
a,2,Red,,4
b,1,Green,8.0,6
b,1,Red,,7
b,2,Green,11.0,9
b,2,Red,,10


In [105]:
frame.stack(level=1).stack()

a  1  Green  Colorado     2.0
             Ohio         0.0
      Red    Ohio         1.0
   2  Green  Colorado     5.0
             Ohio         3.0
      Red    Ohio         4.0
b  1  Green  Colorado     8.0
             Ohio         6.0
      Red    Ohio         7.0
   2  Green  Colorado    11.0
             Ohio         9.0
      Red    Ohio        10.0
dtype: float64

In [99]:
frame1 = frame.stack(level=1).reset_index()
frame1

Unnamed: 0,level_0,level_1,level_2,Colorado,Ohio
0,a,1,Green,2.0,0
1,a,1,Red,,1
2,a,2,Green,5.0,3
3,a,2,Red,,4
4,b,1,Green,8.0,6
5,b,1,Red,,7
6,b,2,Green,11.0,9
7,b,2,Red,,10


In [101]:
pd.pivot_table(frame1, index=['level_0','level_1'],columns=['level_2'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Colorado,Ohio,Ohio
Unnamed: 0_level_1,level_2,Green,Green,Red
level_0,level_1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,2.0,0,1
a,2,5.0,3,4
b,1,8.0,6,7
b,2,11.0,9,10


In [102]:
pd.pivot_table(frame1, index=['level_0','level_1','level_2'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Colorado,Ohio
level_0,level_1,level_2,Unnamed: 3_level_1,Unnamed: 4_level_1
a,1,Green,2.0,0
a,1,Red,,1
a,2,Green,5.0,3
a,2,Red,,4
b,1,Green,8.0,6
b,1,Red,,7
b,2,Green,11.0,9
b,2,Red,,10
