In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], 
          ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

df = pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])

df
#df.stack()

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,-0.771934,0.756456,0.56541,-0.432835,0.463277,-1.066387
bar,two,1.333448,0.687361,1.529741,0.642463,-1.018757,-1.31575
baz,one,0.91194,-1.303877,0.061235,-0.75944,0.473684,-1.564714
baz,two,1.720486,0.469485,0.265047,2.349704,-1.316525,1.677491
foo,one,0.10569,-0.547427,1.857891,-1.783535,0.614654,-0.331959
foo,two,0.504453,1.015445,1.428449,0.383355,0.542489,0.176649


In [3]:
with pd.option_context('display.multi_sparse', False):
    print(df)

first              bar       bar       baz       baz       foo       foo
second             one       two       one       two       one       two
first second                                                            
bar   one    -0.771934  0.756456  0.565410 -0.432835  0.463277 -1.066387
bar   two     1.333448  0.687361  1.529741  0.642463 -1.018757 -1.315750
baz   one     0.911940 -1.303877  0.061235 -0.759440  0.473684 -1.564714
baz   two     1.720486  0.469485  0.265047  2.349704 -1.316525  1.677491
foo   one     0.105690 -0.547427  1.857891 -1.783535  0.614654 -0.331959
foo   two     0.504453  1.015445  1.428449  0.383355  0.542489  0.176649


In [4]:
labels0 = index.get_level_values(0)
labels1 = index.get_level_values('second')

print('labels0 -> {}'.format(labels0))
print('labels1 -> {}\n'.format(labels1))

print('labels0.unique() -> {}'.format(labels0.unique()))
print('labels1.unique().values -> {}\n'.format(labels1.unique().values))

labels0 -> Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')
labels1 -> Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

labels0.unique() -> Index(['bar', 'baz', 'foo', 'qux'], dtype='object', name='first')
labels1.unique().values -> ['one' 'two']



## Advanced indexing with hierarchical index

In [5]:
df = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)
df = df.T
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,1.649118,-1.249283,-0.249469
bar,two,0.008635,1.290942,-0.004653
baz,one,0.175744,-0.741571,0.078247
baz,two,-0.679255,-0.337499,-0.49093
foo,one,0.540226,-0.851214,-0.696536
foo,two,-1.102737,0.55389,-1.211879
qux,one,-1.188888,1.291136,0.493444
qux,two,-0.945215,0.412778,-0.299432


In [6]:
df.loc[('bar', 'two'), 'A']

0.008634922683611728

#### MultiIndex slicers

In [8]:
def mklbl(prefix,n):
    return ["%s%s" % (prefix,i)  for i in range(n)]

miindex = pd.MultiIndex.from_product([mklbl('A',4), 
                                      mklbl('B',2), 
                                      mklbl('C',4), 
                                      mklbl('D',2)], names=['rl1', 'rl2', 'rl3', 'rl4'])

micolumns = pd.MultiIndex.from_tuples([('a','foo'),('a','bar'),('b','foo'),('b','bah')], names=['lvl0', 'lvl1'])

dfmi = pd.DataFrame(np.arange(len(miindex)*len(micolumns)).reshape((len(miindex),len(micolumns))), 
                    index=miindex, columns=micolumns).sort_index().sort_index(axis=1)

dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
rl1,rl2,rl3,rl4,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
A0,B0,C2,D1,21,20,23,22
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C0,D0,33,32,35,34
A0,B1,C0,D1,37,36,39,38


In [15]:
dfmi.loc[(slice('A1','A3'), slice(None), ['C1', 'C3']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
rl1,rl2,rl3,rl4,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


#### pandas.IndexSlice  a.k.a idx

You can use pandas.IndexSlice to facilitate a more natural syntax using :, rather than using slice(None)

In [17]:
idx = pd.IndexSlice
dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
rl1,rl2,rl3,rl4,Unnamed: 4_level_2,Unnamed: 5_level_2
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


#### boolean mask + idx

In [29]:
mask = dfmi[('a', 'foo')] > 200
dfmi.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']]
# same results as
# dfmi.loc[idx[:, mask, ['C1', 'C3']], idx[:, 'foo']]
# dfmi.loc[idx[:, :, ['C1', 'C3'], mask], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
rl1,rl2,rl3,rl4,Unnamed: 4_level_2,Unnamed: 5_level_2
A3,B0,C1,D1,204,206
A3,B0,C3,D0,216,218
A3,B0,C3,D1,220,222
A3,B1,C1,D0,232,234
A3,B1,C1,D1,236,238
A3,B1,C3,D0,248,250
A3,B1,C3,D1,252,254


In [31]:
mask = (dfmi[('a', 'foo')] > 200) | (dfmi[('a', 'foo')] < 100)
dfmi.loc[idx[:, :, ['C1', 'C3'], mask], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
rl1,rl2,rl3,rl4,Unnamed: 4_level_2,Unnamed: 5_level_2
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [52]:
ind = dfmi.index
print(ind.get_level_values('rl1'))
print(list(ind.get_level_values('rl1')).index('A1'))
print(ind.labels)
ind.labels[0][16], ind.labels[1][16], ind.labels[2][16], ind.labels[3][16]

Index(['A0', 'A0', 'A0', 'A0', 'A0', 'A0', 'A0', 'A0', 'A0', 'A0', 'A0', 'A0',
       'A0', 'A0', 'A0', 'A0', 'A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'A1',
       'A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'A1', 'A2', 'A2', 'A2', 'A2',
       'A2', 'A2', 'A2', 'A2', 'A2', 'A2', 'A2', 'A2', 'A2', 'A2', 'A2', 'A2',
       'A3', 'A3', 'A3', 'A3', 'A3', 'A3', 'A3', 'A3', 'A3', 'A3', 'A3', 'A3',
       'A3', 'A3', 'A3', 'A3'],
      dtype='object', name='rl1')
16
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0,

(1, 0, 0, 0)

#### swapping levels

In [58]:
dfmi.swaplevel(0,3).sort_index(level=[0,1,2,3])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
rl4,rl2,rl3,rl1,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
D0,B0,C0,A0,1,0,3,2
D0,B0,C0,A1,65,64,67,66
D0,B0,C0,A2,129,128,131,130
D0,B0,C0,A3,193,192,195,194
D0,B0,C1,A0,9,8,11,10
D0,B0,C1,A1,73,72,75,74
D0,B0,C1,A2,137,136,139,138
D0,B0,C1,A3,201,200,203,202
D0,B0,C2,A0,17,16,19,18
D0,B0,C2,A1,81,80,83,82
