## 8.1 分层索引

In [1]:
import pandas as pd
import numpy as np

data=pd.Series(np.random.randn(9),
              index=[['a','a','a','b','b','c','c','d','d'],
                    [1,2,3,1,3,1,2,2,3]])
data

a  1   -0.633714
   2   -1.882732
   3    0.199148
b  1   -0.317119
   3   -0.580563
c  1    0.341339
   2    2.057472
d  2   -0.323642
   3   -1.255576
dtype: float64

In [4]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [5]:
#通过分层索引的对象，也可称为部分索引，允许你简洁的选择出数据的子集
data['b']

1   -0.317119
3   -0.580563
dtype: float64

In [6]:
data['b':'c']

b  1   -0.317119
   3   -0.580563
c  1    0.341339
   2    2.057472
dtype: float64

In [7]:
data.loc[['b','d']]

b  1   -0.317119
   3   -0.580563
d  2   -0.323642
   3   -1.255576
dtype: float64

In [8]:
data.loc[:,2]#挑选出所有的索引2

a   -1.882732
c    2.057472
d   -0.323642
dtype: float64

In [9]:
data.unstack()#将DataFrame重新排列

Unnamed: 0,1,2,3
a,-0.633714,-1.882732,0.199148
b,-0.317119,,-0.580563
c,0.341339,2.057472,
d,,-0.323642,-1.255576


In [10]:
data.unstack().stack()# 反操作

a  1   -0.633714
   2   -1.882732
   3    0.199148
b  1   -0.317119
   3   -0.580563
c  1    0.341339
   2    2.057472
d  2   -0.323642
   3   -1.255576
dtype: float64

### 8.1.3 使用DataFrame的列进行索引

In [11]:
frame=pd.DataFrame({'a':range(7),'b':range(7,0,-1),
                   'c':['one','one','one','two','two','two','two'],
                   'd':[0,1,2,0,1,2,3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [12]:
frame2=frame.set_index(['c','d'])
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [14]:
#保留用来作为索引的列
frame.set_index(['c','d'],drop=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,0,3,4,two,0
two,1,4,3,two,1
two,2,5,2,two,2
two,3,6,1,two,3


In [15]:
frame2.reset_index() #分层索引的索引层会被移动到列中

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1


## 8.2 联合与合并数据集

In [16]:
df1=pd.DataFrame({'key':['b','b','a','c','a','a','b'],
                 'data1':range(7)})
df2=pd.DataFrame({'key':['a','b','d'],
                 'data2':range(3)})
print(df1,df2)

  key  data1
0   b      0
1   b      1
2   a      2
3   c      3
4   a      4
5   a      5
6   b      6   key  data2
0   a      0
1   b      1
2   d      2


In [17]:
#多行对单行,调用merge处理获得的对象
pd.merge(df1,df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [18]:
pd.merge(df1,df2,on='key')#显示的指明连接的键

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


### 8.2.2 根据索引合并

In [19]:
left1=pd.DataFrame({'key':['a','b','b','a','a','c'],
                   'value':range(6)})
right1=pd.DataFrame({'group_val':[3.5,7]},index=['a','b'])
pd.merge(left1,right1,left_on='key',right_index=True)

Unnamed: 0,key,value,group_val
0,a,0,3.5
3,a,3,3.5
4,a,4,3.5
1,b,1,7.0
2,b,2,7.0


In [20]:
#默认的合并方法是连接键相交，可以使用外连接惊醒合并
pd.merge(left1,right1,left_on='key',right_index=True,how='outer')

Unnamed: 0,key,value,group_val
0,a,0,3.5
3,a,3,3.5
4,a,4,3.5
1,b,1,7.0
2,b,2,7.0
5,c,5,


### 8.2.3 沿轴向连接

In [21]:
arr=np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [22]:
np.concatenate([arr,arr],axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [23]:
#默认按行拼接
np.concatenate([arr,arr])

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
#join_axes=[[]]用来加入轴