In [123]:
import numpy as np
import pandas as pd

In [127]:
index = [('California', 2010), ('California', 2020), ('New York', 2010), ('New York', 2020), ('Texas', 2010), ('Texas', 2020)]
populations = [3373462, 63424324,
               19912301, 681923132, 
               29123023, 998515251]

pop = pd.Series(populations, index = index)
pop

(California, 2010)      3373462
(California, 2020)     63424324
(New York, 2010)       19912301
(New York, 2020)      681923132
(Texas, 2010)          29123023
(Texas, 2020)         998515251
dtype: int64

In [128]:
index

[('California', 2010),
 ('California', 2020),
 ('New York', 2010),
 ('New York', 2020),
 ('Texas', 2010),
 ('Texas', 2020)]

In [129]:
index = pd.MultiIndex.from_tuples(index)

In [130]:
pop = pop.reindex(index)
pop

California  2010      3373462
            2020     63424324
New York    2010     19912301
            2020    681923132
Texas       2010     29123023
            2020    998515251
dtype: int64

In [131]:
pop[:2020]

California  2010      3373462
            2020     63424324
New York    2010     19912301
            2020    681923132
Texas       2010     29123023
            2020    998515251
dtype: int64

In [132]:
pop_df = pop.unstack()
pop_df

Unnamed: 0,2010,2020
California,3373462,63424324
New York,19912301,681923132
Texas,29123023,998515251


In [133]:
pop_df.stack(future_stack=True)

California  2010      3373462
            2020     63424324
New York    2010     19912301
            2020    681923132
Texas       2010     29123023
            2020    998515251
dtype: int64

In [134]:
df = pd.DataFrame(np.random.rand(4,2),
                  index=[['a','c', 'b','b'], [1,2,1,2]],
                  columns = ['data1', 'data2'])

In [135]:
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.384233,0.873006
c,2,0.350764,0.541736
b,1,0.011203,0.893129
b,2,0.25529,0.078245


In [136]:
data = [('California', 2010), ('California', 2020), ('New York', 2010), ('New York', 2020), ('Texas', 2010), ('Texas', 2020)]
pd.Series(data)

0    (California, 2010)
1    (California, 2020)
2      (New York, 2010)
3      (New York, 2020)
4         (Texas, 2010)
5         (Texas, 2020)
dtype: object

In [137]:
pd.MultiIndex.from_arrays([['a','a','b','b'], [1,2,1,2]])


MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [138]:
pd.MultiIndex.from_tuples([['a','a','b','b'], [1,2,1,2]])


MultiIndex([('a', 'a', 'b', 'b'),
            (  1,   2,   1,   2)],
           )

In [139]:
pd.MultiIndex.from_product([['a','a','b','b'], [1,2,1,2]])


MultiIndex([('a', 1),
            ('a', 2),
            ('a', 1),
            ('a', 2),
            ('a', 1),
            ('a', 2),
            ('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2),
            ('b', 1),
            ('b', 2),
            ('b', 1),
            ('b', 2),
            ('b', 1),
            ('b', 2)],
           )

In [141]:
# Corrected MultiIndex with unique levels
multi_index = pd.MultiIndex(levels=[['a', 'b'], [1, 2]],
                            codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

print(multi_index)

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )


In [140]:
population_df = pd.DataFrame({
    'state': ['California', 'New York', 'Texas'],
    'year': [2020, 2021, 2022],
    'population': [39538223, 20201249, 29145505]
})
population_df = population_df.set_index(['state', 'year'])
population_df.index.names = ['state', 'year']
pop


California  2010      3373462
            2020     63424324
New York    2010     19912301
            2020    681923132
Texas       2010     29123023
            2020    998515251
dtype: int64

In [142]:
import pandas as pd

# Create the MultiIndex for the index
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
                                   names=['year', 'visit'])

# Create the MultiIndex for the columns
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'],
                                      ['HR', 'Temp']],
                                     names=['subject', 'type'])

data = np.round(np.random.randn(4,6),1)
data[:, ::2] *= 10
data += 37

health_data = pd.DataFrame(data, index = index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,44.0,36.8,43.0,37.3,39.0,35.6
2013,2,50.0,38.4,30.0,35.6,33.0,37.1
2014,1,45.0,35.9,47.0,37.2,42.0,35.5
2014,2,50.0,36.4,38.0,35.0,36.0,36.6


In [143]:
health_data['Guido']

Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,1,43.0,37.3
2013,2,30.0,35.6
2014,1,47.0,37.2
2014,2,38.0,35.0


In [144]:
health_data['Guido']

Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,1,43.0,37.3
2013,2,30.0,35.6
2014,1,47.0,37.2
2014,2,38.0,35.0


In [146]:
pop

California  2010      3373462
            2020     63424324
New York    2010     19912301
            2020    681923132
Texas       2010     29123023
            2020    998515251
dtype: int64

In [147]:
pop['California', 2010]

3373462

In [148]:
pop['California']

2010     3373462
2020    63424324
dtype: int64

In [154]:


poploc['california':'new york']

california    1000
new york      2000
dtype: int64

In [155]:
pop[:,2010]

California     3373462
New York      19912301
Texas         29123023
dtype: int64

In [156]:
pop[pop>12312300]

California  2020     63424324
New York    2010     19912301
            2020    681923132
Texas       2010     29123023
            2020    998515251
dtype: int64

In [158]:
pop[['California', 'Texas']]

California  2010      3373462
            2020     63424324
Texas       2010     29123023
            2020    998515251
dtype: int64

In [159]:
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,44.0,36.8,43.0,37.3,39.0,35.6
2013,2,50.0,38.4,30.0,35.6,33.0,37.1
2014,1,45.0,35.9,47.0,37.2,42.0,35.5
2014,2,50.0,36.4,38.0,35.0,36.0,36.6


In [160]:
health_data['Guido','HR']

year  visit
2013  1        43.0
      2        30.0
2014  1        47.0
      2        38.0
Name: (Guido, HR), dtype: float64

In [161]:
health_data.iloc[:2, :2]

Unnamed: 0_level_0,subject,Bob,Bob
Unnamed: 0_level_1,type,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2
2013,1,44.0,36.8
2013,2,50.0,38.4


In [173]:
health_data.loc[:,('Bob', 'HR')]

year  visit
2013  1        44.0
      2        50.0
2014  1        45.0
      2        50.0
Name: (Bob, HR), dtype: float64

In [181]:
idx=pd.IndexSlice
health_data.loc[idx[:, 1], idx[:, 'HR']]

Unnamed: 0_level_0,subject,Bob,Guido,Sue
Unnamed: 0_level_1,type,HR,HR,HR
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2013,1,44.0,43.0,39.0
2014,1,45.0,47.0,42.0


In [184]:
index = pd.MultiIndex.from_product([['a','c','b'],[1,2]])
data = pd.Series(np.random.rand(6), index=index)
data.index.names = ['char', 'int']
data

char  int
a     1      0.517255
      2      0.789592
c     1      0.261500
      2      0.870195
b     1      0.408972
      2      0.626575
dtype: float64

In [188]:
try:
    data['a','b']
except KeyError as e:
    print("KeyError",e)
# KeyError 'Key length (1) was greater than MultiIndex lexsort depth (0)'

KeyError ('a', 'b')


In [189]:
data = data.sort_index()
data

char  int
a     1      0.517255
      2      0.789592
b     1      0.408972
      2      0.626575
c     1      0.261500
      2      0.870195
dtype: float64

In [192]:
data['a':'b']

char  int
a     1      0.517255
      2      0.789592
b     1      0.408972
      2      0.626575
dtype: float64

In [193]:
pop.unstack(level=0)

Unnamed: 0,California,New York,Texas
2010,3373462,19912301,29123023
2020,63424324,681923132,998515251


In [194]:
pop.unstack(level=1)

Unnamed: 0,2010,2020
California,3373462,63424324
New York,19912301,681923132
Texas,29123023,998515251


In [195]:
pop.unstack().stack()

California  2010      3373462
            2020     63424324
New York    2010     19912301
            2020    681923132
Texas       2010     29123023
            2020    998515251
dtype: int64

In [197]:
pop_flat = pop.reset_index(name='population')
pop_flat

Unnamed: 0,level_0,level_1,population
0,California,2010,3373462
1,California,2020,63424324
2,New York,2010,19912301
3,New York,2020,681923132
4,Texas,2010,29123023
5,Texas,2020,998515251


In [200]:
pop_flat.columns = pop_flat.columns.str.strip().str.lower()

In [201]:
print(pop_flat.head())


      level_0  level_1  population
0  California     2010     3373462
1  California     2020    63424324
2    New York     2010    19912301
3    New York     2020   681923132
4       Texas     2010    29123023


In [206]:
pop_flat.set_index(['level_0', 'level_1'])

#level_0 -- state 
#level_1 -- state 

Unnamed: 0_level_0,Unnamed: 1_level_0,population
level_0,level_1,Unnamed: 2_level_1
California,2010,3373462
California,2020,63424324
New York,2010,19912301
New York,2020,681923132
Texas,2010,29123023
Texas,2020,998515251
