# A Multiply Indexed Series

In [2]:
import numpy as np
import pandas as pd

In [3]:
index = [('Ha Noi', 2000), ('Ha Noi', 2010),
         ('Da Nang', 2000), ('Da Nang',2010),
         ('Hue',2000), ('Hue',2010)]
populations = [7658765,8053663,
               987687,1134310,
               897065,1128620]
pop = pd.Series(populations, index=index)
pop

(Ha Noi, 2000)     7658765
(Ha Noi, 2010)     8053663
(Da Nang, 2000)     987687
(Da Nang, 2010)    1134310
(Hue, 2000)         897065
(Hue, 2010)        1128620
dtype: int64

In [4]:
pop[('Ha Noi',2000) : ('Hue',2000)]

(Ha Noi, 2000)     7658765
(Ha Noi, 2010)     8053663
(Da Nang, 2000)     987687
(Da Nang, 2010)    1134310
(Hue, 2000)         897065
dtype: int64

In [5]:
pop[[i for i in pop.index if i[1]==2010]]

(Ha Noi, 2010)     8053663
(Da Nang, 2010)    1134310
(Hue, 2010)        1128620
dtype: int64

In [6]:
index = pd.MultiIndex.from_tuples(index)
index

MultiIndex([( 'Ha Noi', 2000),
            ( 'Ha Noi', 2010),
            ('Da Nang', 2000),
            ('Da Nang', 2010),
            (    'Hue', 2000),
            (    'Hue', 2010)],
           )

In [7]:
pop = pop.reindex(index)
pop

Ha Noi   2000    7658765
         2010    8053663
Da Nang  2000     987687
         2010    1134310
Hue      2000     897065
         2010    1128620
dtype: int64

In [8]:
pop[:,2010]

Ha Noi     8053663
Da Nang    1134310
Hue        1128620
dtype: int64

In [9]:
pop_df = pop.unstack()
pop_df

Unnamed: 0,2000,2010
Da Nang,987687,1134310
Ha Noi,7658765,8053663
Hue,897065,1128620


In [10]:
pop_df.stack()

Da Nang  2000     987687
         2010    1134310
Ha Noi   2000    7658765
         2010    8053663
Hue      2000     897065
         2010    1128620
dtype: int64

In [11]:
pop_df = pd.DataFrame({'total':pop,
                      'under18':[1879556,2054678,
                                245645, 345765,
                                266576, 298767]})
pop_df

Unnamed: 0,Unnamed: 1,total,under18
Ha Noi,2000,7658765,1879556
Ha Noi,2010,8053663,2054678
Da Nang,2000,987687,245645
Da Nang,2010,1134310,345765
Hue,2000,897065,266576
Hue,2010,1128620,298767


In [12]:
f_u18 = pop_df['under18'] / pop_df['total']
f_u18.unstack()

Unnamed: 0,2000,2010
Da Nang,0.248707,0.304824
Ha Noi,0.245412,0.255123
Hue,0.297165,0.264719


# Methods of MultiIndex Creation

In [13]:
df = pd.DataFrame(np.random.randint(0,10,(4,2)),
                 index=[['a','a','b','b'],[1,2,1,2]],
                 columns=['data1','data2'])
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,3,1
a,2,1,4
b,1,6,7
b,2,8,7


In [14]:
data = {('Ha Noi',2000): 7658765,
        ('Ha Noi',2010): 8053663,
        ('Da Nang',2000): 987687,
        ('Da Nang',2010): 1134310,
        ('Hue',2000): 987065,
        ('Hue',2010): 1128620}
pd.Series(data)

Ha Noi   2000    7658765
         2010    8053663
Da Nang  2000     987687
         2010    1134310
Hue      2000     987065
         2010    1128620
dtype: int64

In [15]:
pd.MultiIndex.from_arrays([['a','a','b','b'],[1,2,1,2]])

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [16]:
pd.MultiIndex.from_tuples([('a',1),('a',2),('b',1),('b',2)])

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [17]:
pd.MultiIndex.from_product([['a','b'],[1,2]])

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [18]:
pd.MultiIndex(levels=[['a','b'],[1,2]],
             labels=[[0,0,1,1],[0,1,0,1]])

TypeError: __new__() got an unexpected keyword argument 'labels'

In [19]:
pop

Ha Noi   2000    7658765
         2010    8053663
Da Nang  2000     987687
         2010    1134310
Hue      2000     897065
         2010    1128620
dtype: int64

In [20]:
pop.index.names = ['cites','year']

In [21]:
pop

cites    year
Ha Noi   2000    7658765
         2010    8053663
Da Nang  2000     987687
         2010    1134310
Hue      2000     897065
         2010    1128620
dtype: int64

# Indexing and Slicing a MultiIndex

In [22]:
pop

cites    year
Ha Noi   2000    7658765
         2010    8053663
Da Nang  2000     987687
         2010    1134310
Hue      2000     897065
         2010    1128620
dtype: int64

In [23]:
pop['Ha Noi',2010]

8053663

In [24]:
pop['Ha Noi']

year
2000    7658765
2010    8053663
dtype: int64

In [26]:
pop.loc['Da Nang':'Hue']

UnsortedIndexError: 'Key length (1) was greater than MultiIndex lexsort depth (0)'

In [27]:
pop[:,2000]

cites
Ha Noi     7658765
Da Nang     987687
Hue         897065
dtype: int64

In [28]:
pop[pop>1000000]

cites    year
Ha Noi   2000    7658765
         2010    8053663
Da Nang  2010    1134310
Hue      2010    1128620
dtype: int64

In [29]:
pop[['Ha Noi','Hue']]

cites   year
Ha Noi  2000    7658765
        2010    8053663
Hue     2000     897065
        2010    1128620
dtype: int64

In [30]:
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],
names=['subject', 'type'])
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

In [31]:
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,44.0,35.1,42.0,36.2,42.0,36.5
2013,2,58.0,38.6,45.0,37.1,42.0,37.0
2014,1,36.0,36.9,37.0,35.3,34.0,38.1
2014,2,40.0,37.9,29.0,34.9,55.0,36.8


In [32]:
health_data['Guido', 'HR']

year  visit
2013  1        42.0
      2        45.0
2014  1        37.0
      2        29.0
Name: (Guido, HR), dtype: float64

In [33]:
health_data.iloc[:2,:2]

Unnamed: 0_level_0,subject,Bob,Bob
Unnamed: 0_level_1,type,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2
2013,1,44.0,35.1
2013,2,58.0,38.6


In [34]:
health_data.loc[:,('Bob','HR')]

year  visit
2013  1        44.0
      2        58.0
2014  1        36.0
      2        40.0
Name: (Bob, HR), dtype: float64

# Rearranging Multi-Indices

In [40]:
index = pd.MultiIndex.from_product([['a', 'c', 'b'], [1, 2]])
data = pd.Series(np.random.rand(6), index=index)
data.index.names = ['char', 'int']
data

char  int
a     1      0.861963
      2      0.612948
c     1      0.195125
      2      0.443881
b     1      0.896779
      2      0.579484
dtype: float64

In [42]:
try:
    data['a':'b']
except KeyError as e:    
    print(type(e))
    print(e)

<class 'pandas.errors.UnsortedIndexError'>
'Key length (1) was greater than MultiIndex lexsort depth (0)'


In [43]:
data = data.sort_index()
data

char  int
a     1      0.861963
      2      0.612948
b     1      0.896779
      2      0.579484
c     1      0.195125
      2      0.443881
dtype: float64

In [44]:
pop

cites    year
Ha Noi   2000    7658765
         2010    8053663
Da Nang  2000     987687
         2010    1134310
Hue      2000     897065
         2010    1128620
dtype: int64

In [45]:
pop.unstack(level=0)

cites,Da Nang,Ha Noi,Hue
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,987687,7658765,897065
2010,1134310,8053663,1128620


In [46]:
pop.unstack(level=1)

year,2000,2010
cites,Unnamed: 1_level_1,Unnamed: 2_level_1
Da Nang,987687,1134310
Ha Noi,7658765,8053663
Hue,897065,1128620


In [47]:
pop.unstack().stack()

cites    year
Da Nang  2000     987687
         2010    1134310
Ha Noi   2000    7658765
         2010    8053663
Hue      2000     897065
         2010    1128620
dtype: int64

In [48]:
pop_flat = pop.reset_index(name='population')
pop_flat

Unnamed: 0,cites,year,population
0,Ha Noi,2000,7658765
1,Ha Noi,2010,8053663
2,Da Nang,2000,987687
3,Da Nang,2010,1134310
4,Hue,2000,897065
5,Hue,2010,1128620


In [51]:
pop_flat.set_index(['cites','year'])

Unnamed: 0_level_0,Unnamed: 1_level_0,population
cites,year,Unnamed: 2_level_1
Ha Noi,2000,7658765
Ha Noi,2010,8053663
Da Nang,2000,987687
Da Nang,2010,1134310
Hue,2000,897065
Hue,2010,1128620


# Data Aggregations on Multi-Indices

In [52]:
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,44.0,35.1,42.0,36.2,42.0,36.5
2013,2,58.0,38.6,45.0,37.1,42.0,37.0
2014,1,36.0,36.9,37.0,35.3,34.0,38.1
2014,2,40.0,37.9,29.0,34.9,55.0,36.8


In [53]:
data_mean = health_data.mean(level='year')
data_mean

  data_mean = health_data.mean(level='year')


subject,Bob,Bob,Guido,Guido,Sue,Sue
type,HR,Temp,HR,Temp,HR,Temp
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2013,51.0,36.85,43.5,36.65,42.0,36.75
2014,38.0,37.4,33.0,35.1,44.5,37.45
