In [1]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

from numpy.random import randn

In [3]:
#Now we'll learn about Index Hierarchy
#pandas allows you to get multipile index levels, which is very clear with the exmaple
ser = Series(np.random.randn(6),index=[[1,1,1,2,2,2],['a','b','c','a','b','c']])

In [4]:
ser

1  a   -0.420368
   b    1.704632
   c   -1.181242
2  a   -0.938952
   b    0.075418
   c    1.477464
dtype: float64

In [6]:
 # We can check the multiple levels
ser.index

MultiIndex(levels=[[1, 2], [u'a', u'b', u'c']],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [7]:
#Now we can sleect specific subsets
ser[1]

a   -0.420368
b    1.704632
c   -1.181242
dtype: float64

In [8]:
# We can also select from an internal index level
ser[:,'a']

1   -0.420368
2   -0.938952
dtype: float64

In [9]:
# We can also create Data Frames from Series with multiple levels
dframe = ser.unstack()

#Show
dframe

Unnamed: 0,a,b,c
1,-0.420368,1.704632,-1.181242
2,-0.938952,0.075418,1.477464


In [10]:
#Can also reverse
dframe.unstack()

a  1   -0.420368
   2   -0.938952
b  1    1.704632
   2    0.075418
c  1   -1.181242
   2    1.477464
dtype: float64

In [11]:
# We can also apply multiple level indexing to DataFrames
dframe2 = DataFrame(np.arange(16).reshape(4,4),
                    index=[['a','a','b','b'],[1,2,1,2]], 
                    columns=[['NY','NY','LA','SF'],['cold','hot','hot','cold']])                                                   
dframe2

Unnamed: 0_level_0,Unnamed: 1_level_0,NY,NY,LA,SF
Unnamed: 0_level_1,Unnamed: 1_level_1,cold,hot,hot,cold
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [13]:
#We can also give these index levels names
#name the index levels
dframe2.index.names = ['Index1','Index2']

#name the column levels
dframe2.columns.names = ['Cities','Temp']

dframe2

Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,cold,hot,hot,cold
Index1,Index2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [15]:
#we can also interchange level orders (note the axis = 1 for columns)
dframe2.swaplevel('Cities','Temp', axis = 1)

Unnamed: 0_level_0,Temp,cold,hot,hot,cold
Unnamed: 0_level_1,Cities,NY,NY,LA,SF
Index1,Index2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [23]:
#we can also sort levels
dframe2.sort_index(level=1)

Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,cold,hot,hot,cold
Index1,Index2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
b,1,8,9,10,11
a,2,4,5,6,7
b,2,12,13,14,15


In [24]:
#Note the change in sorting, now the Dframe index is sorted by the INDEX_2


In [25]:
#We can also perform operations on particular levels
dframe2.sum(level='Temp',axis=1)

Unnamed: 0_level_0,Temp,cold,hot
Index1,Index2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,3,3
a,2,11,11
b,1,19,19
b,2,27,27
