In [5]:
import pandas as pd

# Defining hierarchical index
index = pd.MultiIndex.from_tuples([
    ('USA', 'New York'),
    ('USA', 'Los Angeles'),
    ('India', 'Delhi'),
    ('India', 'Mumbai'),
    ('UK', 'London'),
    ('UK', 'Manchester')
], names=['Country', 'City'])

# Creating the MultiIndex Series
population = pd.Series([8.4, 4.0, 18.9, 20.4, 9.0, 2.8], index=index)

print(population)


Country  City       
USA      New York        8.4
         Los Angeles     4.0
India    Delhi          18.9
         Mumbai         20.4
UK       London          9.0
         Manchester      2.8
dtype: float64


In [6]:
# Creating a MultiIndex
multi_index = pd.MultiIndex.from_tuples([
    ('USA', 'New York'),
    ('USA', 'Los Angeles'),
    ('India', 'Delhi'),
    ('India', 'Mumbai'),
    ('UK', 'London'),
    ('UK', 'Manchester')
], names=['Country', 'City'])

# Creating the DataFrame
data = pd.DataFrame({
    'Population (millions)': [8.4, 4.0, 18.9, 20.4, 9.0, 2.8],
    'GDP (Billion $)': [1700, 1000, 500, 400, 800, 300]
}, index=multi_index)

print(data)


                     Population (millions)  GDP (Billion $)
Country City                                               
USA     New York                       8.4             1700
        Los Angeles                    4.0             1000
India   Delhi                         18.9              500
        Mumbai                        20.4              400
UK      London                         9.0              800
        Manchester                     2.8              300


In [3]:
#access data for a second country
print(data.loc['USA'])

             Population (millions)  GDP (Billion $)
City                                               
New York                       8.4             1700
Los Angeles                    4.0             1000


In [10]:
# Access data for a specific city
print(data.loc[('India', 'Delhi')])

Population (millions)     18.9
GDP (Billion $)          500.0
Name: (India, Delhi), dtype: float64


In [None]:
# Get all cities under 'India'
print(data.xs('India'))

        Population (millions)  GDP (Billion $)
City                                          
Delhi                    18.9              500
Mumbai                   20.4              400


In [12]:
# Get all data for 'New York' across all countries
print(data.xs('New York', level='City'))

         Population (millions)  GDP (Billion $)
Country                                        
USA                        8.4             1700


In [13]:
#swapping the levels
swapped = data.swaplevel()
print(swapped)

                     Population (millions)  GDP (Billion $)
City        Country                                        
New York    USA                        8.4             1700
Los Angeles USA                        4.0             1000
Delhi       India                     18.9              500
Mumbai      India                     20.4              400
London      UK                         9.0              800
Manchester  UK                         2.8              300


In [14]:
#sorting the levels
sorted_data = data.sort_index()
print(sorted_data)

                     Population (millions)  GDP (Billion $)
Country City                                               
India   Delhi                         18.9              500
        Mumbai                        20.4              400
UK      London                         9.0              800
        Manchester                     2.8              300
USA     Los Angeles                    4.0             1000
        New York                       8.4             1700


In [15]:
#reset multi index to single index
df_reset = data.reset_index()
print(df_reset)

  Country         City  Population (millions)  GDP (Billion $)
0     USA     New York                    8.4             1700
1     USA  Los Angeles                    4.0             1000
2   India        Delhi                   18.9              500
3   India       Mumbai                   20.4              400
4      UK       London                    9.0              800
5      UK   Manchester                    2.8              300


In [16]:
#setting multiple columns as index
df_multi = df_reset.set_index(['Country', 'City'])
print(df_multi)

                     Population (millions)  GDP (Billion $)
Country City                                               
USA     New York                       8.4             1700
        Los Angeles                    4.0             1000
India   Delhi                         18.9              500
        Mumbai                        20.4              400
UK      London                         9.0              800
        Manchester                     2.8              300


In [17]:
#group by country and calculate mean
print(data.groupby('Country').mean())

         Population (millions)  GDP (Billion $)
Country                                        
India                    19.65            450.0
UK                        5.90            550.0
USA                       6.20           1350.0
