In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
def make_df(cols,ind):
    """一个简单的DataFrame创建函数"""
    data={c:[str(c)+str(i) for i in ind] for c in cols}
    return pd.DataFrame(data,ind)

### 多级索引

In [12]:
index=[('a',2000),('b',2001),('c',2000),('a',2002)]
populations=[131515,15316211,1513151,1315135]

In [13]:
index=pd.MultiIndex.from_tuples(index)
index

MultiIndex(levels=[['a', 'b', 'c'], [2000, 2001, 2002]],
           codes=[[0, 1, 2, 0], [0, 1, 0, 2]])

##### 不可取的笨办法

In [14]:
pop=pd.Series(populations,index=index)
pop

a  2000      131515
b  2001    15316211
c  2000     1513151
a  2002     1315135
dtype: int64

##### 应该这样做

In [15]:
pop=pop.reindex(index)
pop

a  2000      131515
b  2001    15316211
c  2000     1513151
a  2002     1315135
dtype: int64

In [18]:
pop[:,2000]

a     131515
c    1513151
dtype: int64

In [20]:
pop_df=pop.unstack()
pop_df

Unnamed: 0,2000,2001,2002
a,131515.0,,1315135.0
b,,15316211.0,
c,1513151.0,,


In [21]:
pop_df.stack()

a  2000      131515.0
   2002     1315135.0
b  2001    15316211.0
c  2000     1513151.0
dtype: float64

In [3]:
index=pd.MultiIndex(levels=[['a','b'],[1,2]],
             codes=[[0,0,1,1],[0,1,0,1]])

In [6]:
pop=pd.Series([12,14,52,69],index=index)
pop

a  1    12
   2    14
b  1    52
   2    69
dtype: int64

In [8]:
pop['a']

1    12
2    14
dtype: int64

In [7]:
pop['a',2]

14

In [9]:
pop[pop>20]

b  1    52
   2    69
dtype: int64

### 索引顺序

In [3]:
index=pd.MultiIndex.from_product([['a','c','b'],[1,2]])
data=pd.Series(np.random.rand(6),index=index)
data.index.names=['char','int']
data

char  int
a     1      0.577469
      2      0.747314
c     1      0.696739
      2      0.309403
b     1      0.338231
      2      0.868221
dtype: float64

此时无序不能切片

In [4]:
data=data.sort_index()
data

char  int
a     1      0.577469
      2      0.747314
b     1      0.338231
      2      0.868221
c     1      0.696739
      2      0.309403
dtype: float64

In [6]:
data['a':'b']

char  int
a     1      0.577469
      2      0.747314
b     1      0.338231
      2      0.868221
dtype: float64

In [8]:
data.reset_index(name='rand')

Unnamed: 0,char,int,rand
0,a,1,0.577469
1,a,2,0.747314
2,b,1,0.338231
3,b,2,0.868221
4,c,1,0.696739
5,c,2,0.309403


In [10]:
data.reset_index(name='rand').set_index(['char','int'])

Unnamed: 0_level_0,Unnamed: 1_level_0,rand
char,int,Unnamed: 2_level_1
a,1,0.577469
a,2,0.747314
b,1,0.338231
b,2,0.868221
c,1,0.696739
c,2,0.309403
