## Hierarchical Indexing

In [1]:
import pandas as pd
import numpy as np

In [44]:
data = pd.Series(np.random.randn(9),
    index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
    [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data 

a  1   -0.857196
   2   -1.588545
   3   -0.633209
b  1   -0.227143
   3    1.154892
c  1   -0.345913
   2   -0.185574
d  2    1.065660
   3    0.749403
dtype: float64

In [51]:
data = pd.Series(np.random.randn(9),
    index = [['a','a','b','b','b','c','d','d','d'],
            [1,2,1,2,3,3,1,2,3]])
data

a  1   -0.492444
   2   -0.232660
b  1   -2.050239
   2   -0.425832
   3   -0.518747
c  3   -1.498495
d  1    1.524683
   2   -0.829012
   3   -0.084867
dtype: float64

In [52]:
data.index 

MultiIndex(levels=[['a', 'b', 'c', 'd'], [1, 2, 3]],
           labels=[[0, 0, 1, 1, 1, 2, 3, 3, 3], [0, 1, 0, 1, 2, 2, 0, 1, 2]])

In [53]:
'''With a hierarchically indexed object, so-called partial indexing is possible, 
enabling you to concisely select subsets of the data:'''

data['c'] 

3   -1.498495
dtype: float64

In [56]:
data['b':'d'] 

b  1   -2.050239
   2   -0.425832
   3   -0.518747
c  3   -1.498495
d  1    1.524683
   2   -0.829012
   3   -0.084867
dtype: float64

In [55]:
data.loc[['b', 'd']] 

b  1   -2.050239
   2   -0.425832
   3   -0.518747
d  1    1.524683
   2   -0.829012
   3   -0.084867
dtype: float64

In [59]:
data.loc[:, 1] 

a   -0.492444
b   -2.050239
d    1.524683
dtype: float64

In [60]:
#rearrange the data into a DataFrame using its unstack method
data.unstack()  #stack概念


Unnamed: 0,1,2,3
a,-0.492444,-0.23266,
b,-2.050239,-0.425832,-0.518747
c,,,-1.498495
d,1.524683,-0.829012,-0.084867


In [61]:
data.unstack().stack() 

a  1   -0.492444
   2   -0.232660
b  1   -2.050239
   2   -0.425832
   3   -0.518747
c  3   -1.498495
d  1    1.524683
   2   -0.829012
   3   -0.084867
dtype: float64

In [4]:
frame = pd.DataFrame(np.arange(12).reshape(4, 3),
                     index=[['CD', 'CD', 'MP', 'MP'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado'],
                              ['Green', 'Red', 'Green']])
frame 

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
CD,1,0,1,2
CD,2,3,4,5
MP,1,6,7,8
MP,2,9,10,11


In [70]:
frame.index.names = ['Candy', 'Class']
frame.columns.names = ['state', 'color']
frame 

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
Candy,Class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
CD,1,0,1,2
CD,2,3,4,5
MP,1,6,7,8
MP,2,9,10,11


In [71]:
frame['Ohio']['Red']

Candy  Class
CD     1         1
       2         4
MP     1         7
       2        10
Name: Red, dtype: int32

## Reordering and Sorting Levels

In [73]:
frame.swaplevel('Class', 'Candy') 

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
Class,Candy,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,CD,0,1,2
2,CD,3,4,5
1,MP,6,7,8
2,MP,9,10,11


In [80]:
frame.sort_index(level=0) #by key1 or key2

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
Candy,Class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
CD,1,0,1,2
CD,2,3,4,5
MP,1,6,7,8
MP,2,9,10,11


In [81]:
frame.swaplevel(0, 1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
Class,Candy,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,CD,0,1,2
2,CD,3,4,5
1,MP,6,7,8
2,MP,9,10,11


## Summary Statistics by Level

In [82]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
Candy,Class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
CD,1,0,1,2
CD,2,3,4,5
MP,1,6,7,8
MP,2,9,10,11


In [83]:
frame.sum(level='Candy')

state,Ohio,Ohio,Colorado
color,Green,Red,Green
Candy,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
CD,3,5,7
MP,15,17,19


In [88]:
frame.sum(level='color', axis=1)
# frame.sum(level='Candy', axis=0) 

Unnamed: 0_level_0,color,Green,Red
Candy,Class,Unnamed: 2_level_1,Unnamed: 3_level_1
CD,1,2,1
CD,2,8,4
MP,1,14,7
MP,2,20,10


## Indexing with a DataFrame’s columns

In [38]:
frame = pd.DataFrame({'a': range(7), 'b': range(7, 0, -1),
                      'c': ['one', 'one', 'one', 'two', 'two','two', 'two'],
                      'd': [0, 1, 2, 0, 1, 2, 3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [39]:
'''
.set_index：
will create a new DataFrame using one or more of its columns as the index
'''
frame2 = frame.set_index(['c', 'd'])
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [40]:
#for rows
frame.set_index(['c', 'd'], drop=False) 

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,0,3,4,two,0
two,1,4,3,two,1
two,2,5,2,two,2
two,3,6,1,two,3


In [22]:
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [23]:
frame2.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1
