In [1]:
import pandas as pd
import numpy as np

# 1 Hierarchical indexing (MultiIndex)

## 1.1 Creating a MultiIndex object

In [2]:
arrays = [
    ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
    ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']
]
arrays

[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

In [3]:
tuples = list(zip(*arrays)); tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [4]:
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [5]:
s = pd.Series(np.random.randn(8), index=index)
s

first  second
bar    one       1.661198
       two      -0.189759
baz    one      -0.437132
       two      -0.109275
foo    one       1.852422
       two       0.646600
qux    one       0.936533
       two       0.362061
dtype: float64

In [6]:
iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]

In [7]:
pd.MultiIndex.from_product(iterables, names=['first', 'second'])

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [8]:
s = pd.Series(np.random.randn(8), index=arrays)
s

bar  one   -0.243480
     two   -0.252371
baz  one    2.066949
     two   -1.450699
foo  one    0.364512
     two    0.004851
qux  one    0.359941
     two    0.424922
dtype: float64

In [9]:
arrays = [
    np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
    np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])
]

In [10]:
s = pd.Series(np.random.randn(8), index=arrays)
s

bar  one   -1.257294
     two   -0.229304
baz  one    0.056317
     two    1.761722
foo  one    0.084193
     two   -0.045295
qux  one    0.019855
     two    0.840311
dtype: float64

In [11]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.534755,0.862152,-1.856978,-1.59962
bar,two,0.721928,0.634421,-0.068895,0.462318
baz,one,0.752075,-1.137665,-1.520126,0.157731
baz,two,0.993903,-0.38225,1.072863,0.87453
foo,one,-1.194706,0.75501,0.331135,0.805441
foo,two,-0.762158,0.157262,-0.887823,-1.070862
qux,one,0.543013,-1.627763,-1.734227,-0.155116
qux,two,-1.513871,-1.558163,2.180366,-0.806075


In [12]:
df.index.names

FrozenList([None, None])

In [13]:
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [14]:
df = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.716672,-0.584356,-0.00696,-0.180276,1.232329,0.310096,0.626289,-0.617187
B,-0.248329,0.439353,-0.537444,0.336864,0.216987,0.843178,-0.944052,0.839479
C,-0.50488,1.355695,-0.649914,-1.184475,-0.666082,-0.098478,0.430738,0.347153


In [15]:
pd.DataFrame(np.random.randn(6,6), index=index[:6], columns=index[:6])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,0.361393,0.01007,0.726394,0.404799,0.479856,-0.380783
bar,two,0.87978,0.071757,0.552168,2.020111,-0.33255,-0.548968
baz,one,1.94646,0.896479,-0.224418,-0.667932,-0.750195,0.447053
baz,two,0.012223,-0.670436,-0.051103,1.737144,-1.044526,1.784837
foo,one,-0.870993,-0.056488,-0.467795,0.444885,0.425277,1.766682
foo,two,-0.439388,1.074512,0.030803,-1.317823,1.634811,0.96224


In [16]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [17]:
pd.Series(np.random.randn(8), index=tuples)

(bar, one)    0.616991
(bar, two)    1.605279
(baz, one)    0.832294
(baz, two)    0.227029
(foo, one)    0.487696
(foo, two)    1.772634
(qux, one)   -0.388832
(qux, two)   -1.037342
dtype: float64

In [18]:
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

## 1.2 Reconstructing the level labels

In [19]:
index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [20]:
index.get_level_values(1)

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

In [21]:
index.get_level_values('second') # mesmo que o anterior

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

## 1.3 Basic indexing on axis with MultiIndex

In [22]:
df['bar']

second,one,two
A,0.716672,-0.584356
B,-0.248329,0.439353
C,-0.50488,1.355695


In [23]:
df['bar', 'one']

A    0.716672
B   -0.248329
C   -0.504880
Name: (bar, one), dtype: float64

In [24]:
df['bar']['one']

A    0.716672
B   -0.248329
C   -0.504880
Name: one, dtype: float64

In [25]:
s

bar  one   -1.257294
     two   -0.229304
baz  one    0.056317
     two    1.761722
foo  one    0.084193
     two   -0.045295
qux  one    0.019855
     two    0.840311
dtype: float64

In [26]:
s['qux']

one    0.019855
two    0.840311
dtype: float64

## 1.4 Defined Levels

In [27]:
df.columns

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [28]:
df[['foo', 'qux']].columns

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[2, 2, 3, 3], [0, 1, 0, 1]],
           names=['first', 'second'])

In [29]:
df[['foo', 'qux']].columns.values

array([('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')], dtype=object)

In [30]:
df[['foo', 'qux']].columns.get_level_values(0)

Index(['foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [31]:
df[['foo', 'qux']].columns.remove_unused_levels()

MultiIndex(levels=[['foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
           names=['first', 'second'])

## 1.5 Data alignment and using 

In [32]:
s

bar  one   -1.257294
     two   -0.229304
baz  one    0.056317
     two    1.761722
foo  one    0.084193
     two   -0.045295
qux  one    0.019855
     two    0.840311
dtype: float64

In [33]:
s + s[:-2] # soma os valores menos as duas ultimas linhas

bar  one   -2.514587
     two   -0.458608
baz  one    0.112634
     two    3.523445
foo  one    0.168385
     two   -0.090590
qux  one         NaN
     two         NaN
dtype: float64

In [34]:
s + s[::2] # soma os valores de duas em duas linhas

bar  one   -2.514587
     two         NaN
baz  one    0.112634
     two         NaN
foo  one    0.168385
     two         NaN
qux  one    0.039710
     two         NaN
dtype: float64

In [35]:
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [36]:
s

bar  one   -1.257294
     two   -0.229304
baz  one    0.056317
     two    1.761722
foo  one    0.084193
     two   -0.045295
qux  one    0.019855
     two    0.840311
dtype: float64

In [37]:
index[:3]

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1], [0, 1, 0]],
           names=['first', 'second'])

In [38]:
s.reindex(index[:3])

first  second
bar    one      -1.257294
       two      -0.229304
baz    one       0.056317
dtype: float64

In [39]:
s.reindex([('foo', 'two'), ('bar', 'one'), ('qux', 'one'), ('baz', 'one')])

foo  two   -0.045295
bar  one   -1.257294
qux  one    0.019855
baz  one    0.056317
dtype: float64

## 1.6 Advanced indexing with hierarchical index

In [40]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.716672,-0.584356,-0.00696,-0.180276,1.232329,0.310096,0.626289,-0.617187
B,-0.248329,0.439353,-0.537444,0.336864,0.216987,0.843178,-0.944052,0.839479
C,-0.50488,1.355695,-0.649914,-1.184475,-0.666082,-0.098478,0.430738,0.347153


In [41]:
df.T

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.716672,-0.248329,-0.50488
bar,two,-0.584356,0.439353,1.355695
baz,one,-0.00696,-0.537444,-0.649914
baz,two,-0.180276,0.336864,-1.184475
foo,one,1.232329,0.216987,-0.666082
foo,two,0.310096,0.843178,-0.098478
qux,one,0.626289,-0.944052,0.430738
qux,two,-0.617187,0.839479,0.347153


In [42]:
df = df.T; df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.716672,-0.248329,-0.50488
bar,two,-0.584356,0.439353,1.355695
baz,one,-0.00696,-0.537444,-0.649914
baz,two,-0.180276,0.336864,-1.184475
foo,one,1.232329,0.216987,-0.666082
foo,two,0.310096,0.843178,-0.098478
qux,one,0.626289,-0.944052,0.430738
qux,two,-0.617187,0.839479,0.347153


In [43]:
df.loc['bar']

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.716672,-0.248329,-0.50488
two,-0.584356,0.439353,1.355695


In [44]:
df.loc['bar', 'two']

A   -0.584356
B    0.439353
C    1.355695
Name: (bar, two), dtype: float64

In [45]:
df.loc['bar':'foo'] # localiza em um intervalo de index

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.716672,-0.248329,-0.50488
bar,two,-0.584356,0.439353,1.355695
baz,one,-0.00696,-0.537444,-0.649914
baz,two,-0.180276,0.336864,-1.184475
foo,one,1.232329,0.216987,-0.666082
foo,two,0.310096,0.843178,-0.098478


In [46]:
df.loc[('baz', 'two'): ('qux','one')]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,-0.180276,0.336864,-1.184475
foo,one,1.232329,0.216987,-0.666082
foo,two,0.310096,0.843178,-0.098478
qux,one,0.626289,-0.944052,0.430738


In [47]:
df.loc[('baz', 'two'): 'foo'] 

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,-0.180276,0.336864,-1.184475
foo,one,1.232329,0.216987,-0.666082
foo,two,0.310096,0.843178,-0.098478


## 1.7 Using slicers

In [48]:
def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]

In [49]:
miindex = pd.MultiIndex.from_product([
    mklbl('A', 4),
    mklbl('B', 2),
    mklbl('C', 4),
    mklbl('D', 2)
])

In [50]:
micolumns = pd.MultiIndex.from_tuples([
    ('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')
], names=['lv10', 'lvl1'])

In [51]:
dfmi = (pd.DataFrame(
            np.arange(len(miindex) * len(micolumns))
              .reshape((len(miindex), len(micolumns))),
            index=miindex,
            columns=micolumns)
        .sort_index(axis=1))
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
A0,B0,C2,D1,21,20,23,22
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C0,D0,33,32,35,34
A0,B1,C0,D1,37,36,39,38


In [52]:
dfmi.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [53]:
idx = pd.IndexSlice

In [54]:
dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [55]:
dfmi.loc['A1', (slice(None), 'foo')]

Unnamed: 0_level_0,Unnamed: 1_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,foo,foo
B0,C0,D0,64,66
B0,C0,D1,68,70
B0,C1,D0,72,74
B0,C1,D1,76,78
B0,C2,D0,80,82
B0,C2,D1,84,86
B0,C3,D0,88,90
B0,C3,D1,92,94
B1,C0,D0,96,98
B1,C0,D1,100,102


In [56]:
 dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [57]:
mask = dfmi[('a', 'foo')] > 200

In [58]:
mask

A0  B0  C0  D0    False
            D1    False
        C1  D0    False
            D1    False
        C2  D0    False
            D1    False
        C3  D0    False
            D1    False
    B1  C0  D0    False
            D1    False
        C1  D0    False
            D1    False
        C2  D0    False
            D1    False
        C3  D0    False
            D1    False
A1  B0  C0  D0    False
            D1    False
        C1  D0    False
            D1    False
        C2  D0    False
            D1    False
        C3  D0    False
            D1    False
    B1  C0  D0    False
            D1    False
        C1  D0    False
            D1    False
        C2  D0    False
            D1    False
                  ...  
A2  B0  C1  D0    False
            D1    False
        C2  D0    False
            D1    False
        C3  D0    False
            D1    False
    B1  C0  D0    False
            D1    False
        C1  D0    False
            D1    False
        C2  D0  

In [59]:
dfmi.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A3,B0,C1,D1,204,206
A3,B0,C3,D0,216,218
A3,B0,C3,D1,220,222
A3,B1,C1,D0,232,234
A3,B1,C1,D1,236,238
A3,B1,C3,D0,248,250
A3,B1,C3,D1,252,254


## 1.8 Coress-section

In [60]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.716672,-0.248329,-0.50488
bar,two,-0.584356,0.439353,1.355695
baz,one,-0.00696,-0.537444,-0.649914
baz,two,-0.180276,0.336864,-1.184475
foo,one,1.232329,0.216987,-0.666082
foo,two,0.310096,0.843178,-0.098478
qux,one,0.626289,-0.944052,0.430738
qux,two,-0.617187,0.839479,0.347153


In [61]:
df.xs('one', level='second')

Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,0.716672,-0.248329,-0.50488
baz,-0.00696,-0.537444,-0.649914
foo,1.232329,0.216987,-0.666082
qux,0.626289,-0.944052,0.430738


In [62]:
df.loc[(slice(None), 'one'),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.716672,-0.248329,-0.50488
baz,one,-0.00696,-0.537444,-0.649914
foo,one,1.232329,0.216987,-0.666082
qux,one,0.626289,-0.944052,0.430738


In [63]:
df = df.T

In [64]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.716672,-0.584356,-0.00696,-0.180276,1.232329,0.310096,0.626289,-0.617187
B,-0.248329,0.439353,-0.537444,0.336864,0.216987,0.843178,-0.944052,0.839479
C,-0.50488,1.355695,-0.649914,-1.184475,-0.666082,-0.098478,0.430738,0.347153


In [65]:
df.xs('one', level='second', axis=1)

first,bar,baz,foo,qux
A,0.716672,-0.00696,1.232329,0.626289
B,-0.248329,-0.537444,0.216987,-0.944052
C,-0.50488,-0.649914,-0.666082,0.430738


In [66]:
df.loc[:, (slice(None), 'one')]

first,bar,baz,foo,qux
second,one,one,one,one
A,0.716672,-0.00696,1.232329,0.626289
B,-0.248329,-0.537444,0.216987,-0.944052
C,-0.50488,-0.649914,-0.666082,0.430738


In [67]:
df.xs(('one', 'bar'), level=('second', 'first'), axis=1)

first,bar
second,one
A,0.716672
B,-0.248329
C,-0.50488


In [68]:
df.xs('one', level='second', axis=1, drop_level=True)

first,bar,baz,foo,qux
A,0.716672,-0.00696,1.232329,0.626289
B,-0.248329,-0.537444,0.216987,-0.944052
C,-0.50488,-0.649914,-0.666082,0.430738


## 1.9 Advanced reindexing and alignment

In [69]:
midx = pd.MultiIndex(levels=[['zero', 'one'], ['x', 'x']],
                    labels=[[1, 1, 0, 0], [1, 0, 1, 0]])
midx

MultiIndex(levels=[['zero', 'one'], ['x', 'x']],
           labels=[[1, 1, 0, 0], [1, 0, 1, 0]])

In [70]:
df = pd.DataFrame(np.random.randn(4,2), index=midx)
df

Unnamed: 0,Unnamed: 1,0,1
one,x,-0.787529,0.777548
one,x,0.274582,0.447482
zero,x,0.603486,-0.148635
zero,x,-0.049966,-0.662502


In [71]:
df2 = df.mean(level=0); df2

Unnamed: 0,0,1
one,-0.256474,0.612515
zero,0.27676,-0.405568


In [72]:
df2.reindex(df.index, level=0)

Unnamed: 0,Unnamed: 1,0,1
one,x,-0.256474,0.612515
one,x,-0.256474,0.612515
zero,x,0.27676,-0.405568
zero,x,0.27676,-0.405568


In [73]:
df_aligned, df2_aligned = df.align(df2, level=0)

In [74]:
df_aligned

Unnamed: 0,Unnamed: 1,0,1
one,x,-0.787529,0.777548
one,x,0.274582,0.447482
zero,x,0.603486,-0.148635
zero,x,-0.049966,-0.662502


In [75]:
df2_aligned

Unnamed: 0,Unnamed: 1,0,1
one,x,-0.256474,0.612515
one,x,-0.256474,0.612515
zero,x,0.27676,-0.405568
zero,x,0.27676,-0.405568


## 1.10 Swapping levels with swaplevel()

In [76]:
df[:5]

Unnamed: 0,Unnamed: 1,0,1
one,x,-0.787529,0.777548
one,x,0.274582,0.447482
zero,x,0.603486,-0.148635
zero,x,-0.049966,-0.662502


In [77]:
df[:5].swaplevel(0, 1, axis=0) # troca o nível

Unnamed: 0,Unnamed: 1,0,1
x,one,-0.787529,0.777548
x,one,0.274582,0.447482
x,zero,0.603486,-0.148635
x,zero,-0.049966,-0.662502


## 1.11 Reordering levels with reorder_levels()

In [78]:
df[:5]

Unnamed: 0,Unnamed: 1,0,1
one,x,-0.787529,0.777548
one,x,0.274582,0.447482
zero,x,0.603486,-0.148635
zero,x,-0.049966,-0.662502


In [79]:
df[:5].reorder_levels([1, 0], axis=0)

Unnamed: 0,Unnamed: 1,0,1
x,one,-0.787529,0.777548
x,one,0.274582,0.447482
x,zero,0.603486,-0.148635
x,zero,-0.049966,-0.662502


## 1.12 Sorting a MultiIndex

In [88]:
import random

In [89]:
random.shuffle(tuples)

In [90]:
s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples))
s

qux  one   -0.381796
bar  one   -0.536179
qux  two    0.648748
foo  one    1.548490
bar  two    1.108795
foo  two   -0.314873
baz  two    1.091798
     one   -0.528110
dtype: float64

In [91]:
s.sort_index(level=0)

bar  one   -0.536179
     two    1.108795
baz  one   -0.528110
     two    1.091798
foo  one    1.548490
     two   -0.314873
qux  one   -0.381796
     two    0.648748
dtype: float64

In [92]:
s.sort_index(level=1)

bar  one   -0.536179
baz  one   -0.528110
foo  one    1.548490
qux  one   -0.381796
bar  two    1.108795
baz  two    1.091798
foo  two   -0.314873
qux  two    0.648748
dtype: float64

In [93]:
s.index.set_names(['L1', 'L2'], inplace=True)

In [94]:
s.sort_index(level='L1')

L1   L2 
bar  one   -0.536179
     two    1.108795
baz  one   -0.528110
     two    1.091798
foo  one    1.548490
     two   -0.314873
qux  one   -0.381796
     two    0.648748
dtype: float64

In [97]:
s.sort_index(level='L2')

L1   L2 
bar  one   -0.536179
baz  one   -0.528110
foo  one    1.548490
qux  one   -0.381796
bar  two    1.108795
baz  two    1.091798
foo  two   -0.314873
qux  two    0.648748
dtype: float64

In [98]:
df

Unnamed: 0,Unnamed: 1,0,1
one,x,-0.787529,0.777548
one,x,0.274582,0.447482
zero,x,0.603486,-0.148635
zero,x,-0.049966,-0.662502


In [99]:
df.T.sort_index(level=1, axis=1)

Unnamed: 0_level_0,zero,one,zero,one
Unnamed: 0_level_1,x,x,x.1,x.1
0,-0.049966,0.274582,0.603486,-0.787529
1,-0.662502,0.447482,-0.148635,0.777548


In [100]:
dfm = pd.DataFrame({
    'jim': [0, 0, 1, 1],
    'joe': ['x', 'x', 'z', 'y'],
    'jolie': np.random.rand(4)
})
dfm

Unnamed: 0,jim,joe,jolie
0,0,x,0.733704
1,0,x,0.82213
2,1,z,0.107833
3,1,y,0.65018


In [101]:
dfm = dfm.set_index(['jim', 'joe']); dfm

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.733704
0,x,0.82213
1,z,0.107833
1,y,0.65018


In [102]:
dfm.loc[1, 'z']

  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,z,0.107833


In [103]:
dfm.loc[(0, 'y'):(1, 'z')]

UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'

In [104]:
dfm.index.is_lexsorted()

False

In [105]:
dfm.index.lexsort_depth

1

In [106]:
dfm = dfm.sort_index(); dfm

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.733704
0,x,0.82213
1,y,0.65018
1,z,0.107833


In [107]:
dfm.index.is_lexsorted()

True

In [108]:
dfm.index.lexsort_depth

2

In [109]:
 dfm.loc[(0,'y'):(1, 'z')]

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,y,0.65018
1,z,0.107833


## 1.13 Take Methods

In [112]:
index = pd.Index(np.random.randint(0, 1000, 10))
index

Int64Index([116, 651, 691, 773, 861, 368, 762, 997, 103, 2], dtype='int64')

In [113]:
positions = [0, 9, 3]

In [114]:
index[positions]

Int64Index([116, 2, 773], dtype='int64')

In [115]:
index.take(positions)

Int64Index([116, 2, 773], dtype='int64')

In [116]:
ser = pd.Series(np.random.randn(10))

In [117]:
ser

0    0.893937
1    0.309469
2   -1.269666
3    1.721294
4    0.877875
5   -0.387215
6    0.564095
7   -1.938894
8   -0.648470
9   -0.724051
dtype: float64

In [118]:
ser.iloc[positions]

0    0.893937
9   -0.724051
3    1.721294
dtype: float64

In [119]:
ser.take(positions)

0    0.893937
9   -0.724051
3    1.721294
dtype: float64

In [121]:
frm = pd.DataFrame(np.random.randn(5, 3))
frm

Unnamed: 0,0,1,2
0,-0.705508,1.16045,-0.08164
1,-1.079504,-1.081768,0.436331
2,-0.535784,-1.595242,0.21854
3,-0.744278,-0.825634,1.295145
4,0.508435,0.013689,0.213475


In [122]:
frm.take([1, 4, 3])

Unnamed: 0,0,1,2
1,-1.079504,-1.081768,0.436331
4,0.508435,0.013689,0.213475
3,-0.744278,-0.825634,1.295145


In [123]:
frm.take([0, 2], axis=1)

Unnamed: 0,0,2
0,-0.705508,-0.08164
1,-1.079504,0.436331
2,-0.535784,0.21854
3,-0.744278,1.295145
4,0.508435,0.213475


In [124]:
arr = np.random.randn(10)

In [126]:
arr.take([False, False, True, True])

array([-0.45759144, -0.45759144,  0.44613521,  0.44613521])

In [127]:
arr[[0,1]]

array([-0.45759144,  0.44613521])

In [128]:
ser = pd.Series(np.random.randn(10))

In [130]:
ser.take([False, False, True, True])

0   -1.042134
0   -1.042134
1   -1.069794
1   -1.069794
dtype: float64

In [133]:
ser.iloc[[0, 1]]

0   -1.042134
1   -1.069794
dtype: float64