In [1]:
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell 
InteractiveShell.ast_node_interactivity = 'all' #默认为'last'

在许多应用中，数据可能分散在许多文件或数据库中，存储的形式也不利于分 析。本章关注可以聚合、合并、重塑数据的方法。

## 8.1层次化索引：使得一个轴上有两个以上的索引

### Series

In [3]:
data = pd.Series(np.random.randn(9),index=[['a','a','a','b','b','c','c','d','d'],[1,2,3,1,3,1,2,2,3]])
data
data.index

a  1    1.398389
   2   -0.261426
   3   -0.465281
b  1    0.058355
   3   -1.639430
c  1   -1.185353
   2    0.957911
d  2   -0.121199
   3    0.493093
dtype: float64

MultiIndex(levels=[['a', 'b', 'c', 'd'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1, 1, 2]])

In [10]:
# 使用部分索引
data['b']
data['b':'d']
data.loc[['b','d']]
# 从‘内层’选取
data.loc[:,[2,3]]

1    0.058355
3   -1.639430
dtype: float64

b  1    0.058355
   3   -1.639430
c  1   -1.185353
   2    0.957911
d  2   -0.121199
   3    0.493093
dtype: float64

b  1    0.058355
   3   -1.639430
d  2   -0.121199
   3    0.493093
dtype: float64

a  2   -0.261426
   3   -0.465281
b  3   -1.639430
c  2    0.957911
d  2   -0.121199
   3    0.493093
dtype: float64

In [14]:
# unstack实现数据重塑：把带两个索引的Series转为一个DataFrame
#ps:stack(一捆一摞)
data.unstack()
data.unstack().stack()

Unnamed: 0,1,2,3
a,1.398389,-0.261426,-0.465281
b,0.058355,,-1.63943
c,-1.185353,0.957911,
d,,-0.121199,0.493093


a  1    1.398389
   2   -0.261426
   3   -0.465281
b  1    0.058355
   3   -1.639430
c  1   -1.185353
   2    0.957911
d  2   -0.121199
   3    0.493093
dtype: float64

### DataFrame

In [15]:
frame = pd.DataFrame(np.arange(12).reshape(4,3),index=[['a','a','b','b'],[1,2,1,2]],columns=[['Ohio','Ohio','Colorado'],['Green','Red','Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [16]:
frame.index.names=['key1','key2']
frame.columns.names=['state','color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [25]:
frame['Red',axis=1]???

SyntaxError: invalid syntax (<ipython-input-25-3d0a3dddbe1c>, line 1)

### 重排与分级排序

In [26]:
#swaplevel接受两个级别编号或名称，并返回一个互换了级别的新 对象（但数据不会发生变化）
frame.swaplevel('key1','key2')

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [28]:
# sort_index 对单个级别中的值对数据进行排序
frame.sort_index(level=1)
# swaplevel & sort_index 一起使用
frame.swaplevel('key1','key2').sort_index(level=0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
1,b,6,7,8
2,a,3,4,5
2,b,9,10,11


### 根据级别汇总统计

In [29]:
frame.sum(level='key2')

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,6,8,10
2,12,14,16


In [32]:
frame.sum(axis=1,level='color')

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,1
a,2,8,4
b,1,14,7
b,2,20,10


### 使用 DataFrame 的列进行索引

In [33]:
frame = pd.DataFrame({'a':range(7),'b':range(7,0,-1),'c':['one','one','one','two','two','two','two'],'d':[0,1,2,0,1,2,3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


#### set_index函数将一列或者多列转换为行索引（默认那些列被drop）

In [35]:
frame2 = frame.set_index(['c','d'])
frame2
#reset_index功能相反
frame2.reset_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1


## 8.2合并数据集

pandas 对象中的数据可以通过一些方式进行合并：
* pandas.merge 可根据一个或多个键将不同 DataFrame 中的行连接起来。类似join
* pandas.concat 可以沿着一条轴将多个对象堆叠到一起。 
* 实例方法 combine_first 可以将重复数据编接在一起，用一个对象中的值填充另一个 对象中的缺失值。

### 数据库风格的DataFrame合并

#### 多对一的合并

In [39]:
#列名相同的merge
df1 = pd.DataFrame({'key':['b','b','a','c','a','a','b'],'data1':range(7)})
df2 = pd.DataFrame({'key':['a','b','d'],'data2':range(3)})
df1
df2
pd.merge(df1,df2,on='key')


Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [42]:
#inner连接两个表都有的键  
pd.merge(df1,df2,on='key',how='inner')
#outer连所有键
pd.merge(df1,df2,on='key',how='outer')
#left连左表所有键 
pd.merge(df1,df2,on='key',how='left')
#right连右表所有键 
pd.merge(df1,df2,on='key',how='right')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


Unnamed: 0,key,data1,data2
0,b,0.0,1.0
1,b,1.0,1.0
2,b,6.0,1.0
3,a,2.0,0.0
4,a,4.0,0.0
5,a,5.0,0.0
6,c,3.0,
7,d,,2.0


Unnamed: 0,key,data1,data2
0,b,0,1.0
1,b,1,1.0
2,a,2,0.0
3,c,3,
4,a,4,0.0
5,a,5,0.0
6,b,6,1.0


Unnamed: 0,key,data1,data2
0,b,0.0,1
1,b,1.0,1
2,b,6.0,1
3,a,2.0,0
4,a,4.0,0
5,a,5.0,0
6,d,,2


In [38]:
#列名不同的merge
df3 = pd.DataFrame({'lkey':['b','b','a','c','a','a','b'],'data1':range(7)})
df4 = pd.DataFrame({'rkey':['a','b','d'],'data2':range(3)})
df3
df4
pd.merge(df3,df4,left_on='lkey',right_on='rkey')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


#### 多对多的合并：笛卡尔积

In [45]:
df1 = pd.DataFrame({'key':['b', 'b', 'a', 'c', 'a', 'b'],'data1':range(6)})
df2 = pd.DataFrame({'key':['a', 'b', 'a', 'b', 'd'],'data2':range(5)})
df1
df2
pd.merge(df1,df2,on='key',how='left')
#多对多连接产生的是行的笛卡尔积。由于左边的 DataFrame 有 3 个"b"行，右边 的有 2 个，所以最终结果中就有 6 个"b"行。

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,b,5


Unnamed: 0,key,data2
0,a,0
1,b,1
2,a,2
3,b,3
4,d,4


Unnamed: 0,key,data1,data2
0,b,0,1.0
1,b,0,3.0
2,b,1,1.0
3,b,1,3.0
4,a,2,0.0
5,a,2,2.0
6,c,3,
7,a,4,0.0
8,a,4,2.0
9,b,5,1.0


#### 多个键进行合并

In [47]:
left = pd.DataFrame({'key1':['foo','foo','bar'],'key2': ['one', 'two', 'one'],'lval': [1, 2, 3]})
right = pd.DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],'key2': ['one', 'one', 'one', 'two'],'rval': [4, 5, 6, 7]})
left
right
pd.merge(left,right,on=['key1','key2'],how='outer')

Unnamed: 0,key1,key2,lval
0,foo,one,1
1,foo,two,2
2,bar,one,3


Unnamed: 0,key1,key2,rval
0,foo,one,4
1,foo,one,5
2,bar,one,6
3,bar,two,7


Unnamed: 0,key1,key2,lval,rval
0,foo,one,1.0,4.0
1,foo,one,1.0,5.0
2,foo,two,2.0,
3,bar,one,3.0,6.0
4,bar,two,,7.0


#### suffixes 实现重复列名的重命名

In [48]:
pd.merge(left,right,on='key1',how='outer',suffixes=('_left','_right'))

Unnamed: 0,key1,key2_left,lval,key2_right,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


### 索引上的合并

In [49]:
left1 = pd.DataFrame({'key': ['a', 'b', 'a', 'a', 'b', 'c'],'value': range(6)})
right1 = pd.DataFrame({'group_val': [3.5, 7]}, index=['a', 'b'])
left1
right1

Unnamed: 0,key,value
0,a,0
1,b,1
2,a,2
3,a,3
4,b,4
5,c,5


Unnamed: 0,group_val
a,3.5
b,7.0


#### 连接键位于索引时：left_index=True 或right_index=True（或两个都传）

In [50]:
pd.merge(left1,right1,left_on='key',right_index=True,how='outer')

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0
5,c,5,


#### 层次化索引的合并

In [51]:
lefth = pd.DataFrame({'key1':['Ohio', 'Ohio', 'Ohio','Nevada', 'Nevada'],'key2':[2000, 2001, 2002, 2001,2002],'data': np.arange(5.)})
righth = pd.DataFrame(np.arange(12).reshape(6,2),index=[['Nevada','Nevada','Ohio','Ohio','Ohio','Ohio'],[2001,2000,2000,2000,2001,2002]],
                      columns=['event1','event2'])
lefth
righth

Unnamed: 0,key1,key2,data
0,Ohio,2000,0.0
1,Ohio,2001,1.0
2,Ohio,2002,2.0
3,Nevada,2001,3.0
4,Nevada,2002,4.0


Unnamed: 0,Unnamed: 1,event1,event2
Nevada,2001,0,1
Nevada,2000,2,3
Ohio,2000,4,5
Ohio,2000,6,7
Ohio,2001,8,9
Ohio,2002,10,11


In [52]:
#必须用列表的形式知名用合并键的多个列
pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True,how='outer')

Unnamed: 0,key1,key2,data,event1,event2
0,Ohio,2000,0.0,4.0,5.0
0,Ohio,2000,0.0,6.0,7.0
1,Ohio,2001,1.0,8.0,9.0
2,Ohio,2002,2.0,10.0,11.0
3,Nevada,2001,3.0,0.0,1.0
4,Nevada,2002,4.0,,
4,Nevada,2000,,2.0,3.0


In [54]:
left2 = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]],index=['a', 'c', 'e'], columns=['Ohio', 'Nevada'])
right2 = pd.DataFrame([[7., 8.], [9., 10.], [11., 12.], [13, 14]],index=['b', 'c', 'd', 'e'],columns=['Missouri', 'Alabama'])
left2
right2
#左右都索引也可以
pd.merge(left2,right2,left_index=True,right_index=True,how='outer')

Unnamed: 0,Ohio,Nevada
a,1.0,2.0
c,3.0,4.0
e,5.0,6.0


Unnamed: 0,Missouri,Alabama
b,7.0,8.0
c,9.0,10.0
d,11.0,12.0
e,13.0,14.0


Unnamed: 0,Ohio,Nevada,Missouri,Alabama
a,1.0,2.0,,
b,,,7.0,8.0
c,3.0,4.0,9.0,10.0
d,,,11.0,12.0
e,5.0,6.0,13.0,14.0


In [55]:
#join可以按索引连接
left2.join(right2,how='outer')

Unnamed: 0,Ohio,Nevada,Missouri,Alabama
a,1.0,2.0,,
b,,,7.0,8.0
c,3.0,4.0,9.0,10.0
d,,,11.0,12.0
e,5.0,6.0,13.0,14.0


In [57]:
left1
right1
#join连接DataFrame中的列
left1.join(right1,on='key')

Unnamed: 0,key,value
0,a,0
1,b,1
2,a,2
3,a,3
4,b,4
5,c,5


Unnamed: 0,group_val
a,3.5
b,7.0


Unnamed: 0,key,value,group_val
0,a,0,3.5
1,b,1,7.0
2,a,2,3.5
3,a,3,3.5
4,b,4,7.0
5,c,5,


In [61]:
#向join传入一组DataFrame
another = pd.DataFrame([[7., 8.], [9., 10.], [11., 12.], [16., 17.]],index=['a', 'c', 'e', 'f'], columns=['New York','Oregon'])
another
left2
right2
left2.join([right2,another],how='outer')

Unnamed: 0,New York,Oregon
a,7.0,8.0
c,9.0,10.0
e,11.0,12.0
f,16.0,17.0


Unnamed: 0,Ohio,Nevada
a,1.0,2.0
c,3.0,4.0
e,5.0,6.0


Unnamed: 0,Missouri,Alabama
b,7.0,8.0
c,9.0,10.0
d,11.0,12.0
e,13.0,14.0


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  verify_integrity=True)


Unnamed: 0,Ohio,Nevada,Missouri,Alabama,New York,Oregon
a,1.0,2.0,,,7.0,8.0
b,,,7.0,8.0,,
c,3.0,4.0,9.0,10.0,9.0,10.0
d,,,11.0,12.0,,
e,5.0,6.0,13.0,14.0,11.0,12.0
f,,,,,16.0,17.0


### 轴向连接

对于 pandas 对象（如 Series 和DataFrame），带有标签的轴使你能够进一步 推广数组的连接运算。具体点说，你还需要考虑以下这些东西：
* 如果对象在其它轴上的索引不同，我们应该合并这些轴的不同元素还是只使用交集？
* 连接的数据集是否需要在结果对象中可识别？ 
* 连接轴中保存的数据是否需要保留？许多情况下，DataFrame 默认的整数标签最好 在连接时删掉。

#### Series

In [63]:
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])
pd.concat([s1,s2,s3])
pd.concat([s1,s2,s3],axis=1)

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """


Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [65]:
s4 = pd.concat([s1,s3])
s4

a    0
b    1
f    5
g    6
dtype: int64

In [70]:
pd.concat([s1,s4],axis=1)
pd.concat([s1,s4],axis=1,join='inner')
pd.concat([s1,s4],axis=1,join_axes=[['a','c','b','e']])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,0,1
a,0.0,0
b,1.0,1
f,,5
g,,6


Unnamed: 0,0,1
a,0,0
b,1,1


Unnamed: 0,0,1
a,0.0,0.0
c,,
b,1.0,1.0
e,,


In [74]:
#concat的keys参数实现建立连接轴层次化索引的功能
result = pd.concat([s1,s1,s3],keys=['one','two','three'])
result
result.unstack()

one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

Unnamed: 0,a,b,f,g
one,0.0,1.0,,
two,0.0,1.0,,
three,,,5.0,6.0


In [75]:
#keys在axis=1时成为DataFrame的列头
pd.concat([s1,s2,s3],axis=1,keys=['one','two','three'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


#### DataFrame

In [76]:
df1 = pd.DataFrame(np.arange(6).reshape(3,2),index=['a','b','c'],columns=['one','two'])
df2 = pd.DataFrame(5+np.arange(4).reshape(2,2),index=['a','c'],columns=['three','four'])
df1
df2

Unnamed: 0,one,two
a,0,1
b,2,3
c,4,5


Unnamed: 0,three,four
a,5,6
c,7,8


In [81]:
pd.concat([df1,df2],keys=['level1','level2'])
pd.concat([df1,df2],keys=['level1','level2'],axis=1)
#用 names 参数命名创建的轴级别
pd.concat([df1,df2],keys=['level1','level2'],axis=1,names=['upper','lower'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,Unnamed: 1,four,one,three,two
level1,a,,0.0,,1.0
level1,b,,2.0,,3.0
level1,c,,4.0,,5.0
level2,a,6.0,,5.0,
level2,c,8.0,,7.0,


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  after removing the cwd from sys.path.


upper,level1,level1,level2,level2
lower,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [80]:
# 如果传入的不是列表而是一个字典，则字典的键就会被当做 keys 选项的值：
pd.concat({'level1':df1,'level2':df2},axis=1)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [82]:
# DataFrame的行索引不包含任何相关数 据：ignore_index=True
df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])
pd.concat([df1, df2])
pd.concat([df1, df2], ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,a,b,c,d
0,0.226726,1.785547,-0.808327,-0.617091
1,-1.458822,1.395598,0.308512,2.041397
2,1.201359,1.574097,0.193051,-0.428071
0,-1.290914,-1.103668,,-1.062818
1,-0.46206,-0.166715,,0.711024


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  after removing the cwd from sys.path.


Unnamed: 0,a,b,c,d
0,0.226726,1.785547,-0.808327,-0.617091
1,-1.458822,1.395598,0.308512,2.041397
2,1.201359,1.574097,0.193051,-0.428071
3,-1.290914,-1.103668,,-1.062818
4,-0.46206,-0.166715,,0.711024


### 合并重叠数据

In [83]:
a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan],index=['f', 'e', 'd', 'c', 'b', 'a'])
b = pd.Series(np.arange(len(a), dtype=np.float64),index=['f', 'e', 'd', 'c', 'b', 'a'])
b[-1] = np.nan
a
b
np.where(pd.isnull(a),b,a)

f    NaN
e    2.5
d    NaN
c    3.5
b    4.5
a    NaN
dtype: float64

f    0.0
e    1.0
d    2.0
c    3.0
b    4.0
a    NaN
dtype: float64

array([0. , 2.5, 2. , 3.5, 4.5, nan])

In [86]:
a[2:]
b[:-2]
b[:-2].combine_first(a[2:])
#留下的是b的值

d    NaN
c    3.5
b    4.5
a    NaN
dtype: float64

f    0.0
e    1.0
d    2.0
c    3.0
dtype: float64

a    NaN
b    4.5
c    3.0
d    2.0
e    1.0
f    0.0
dtype: float64

In [87]:
df1 = pd.DataFrame({'a': [1., np.nan, 5., np.nan],'b': [np.nan, 2., np.nan, 6.],'c': range(2, 18, 4)})
df2 = pd.DataFrame({'a': [5., 4., np.nan, 3., 7.],'b': [np.nan, 3., 4., 6., 8.]})
df1
df2

Unnamed: 0,a,b,c
0,1.0,,2
1,,2.0,6
2,5.0,,10
3,,6.0,14


Unnamed: 0,a,b
0,5.0,
1,4.0,3.0
2,,4.0
3,3.0,6.0
4,7.0,8.0


In [88]:
df1.combine_first(df2)
#留的是df1的数据，df1没有的才填充的df2

Unnamed: 0,a,b,c
0,1.0,,2.0
1,4.0,2.0,6.0
2,5.0,4.0,10.0
3,3.0,6.0,14.0
4,7.0,8.0,


## 8.3重塑和轴向旋转

In [89]:
data = pd.DataFrame(np.arange(6).reshape((2, 3)),index=pd.Index(['Ohio','Colorado'],ame='state'),columns=pd.Index(['one', 'two','three'],
name='number'))
data

number,one,two,three
Ohio,0,1,2
Colorado,3,4,5


In [90]:
result = data.stack()
result

          number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

In [93]:
result.unstack()
result.unstack(0)
result.unstack('state')

number,one,two,three
Ohio,0,1,2
Colorado,3,4,5


Unnamed: 0_level_0,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


KeyError: 'Level state not found'