# Lesson 21a: Multiindex

## Import libraries and load data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as math

incidents = pd.read_csv("Canadian Railway Crossing Incidents.csv")
incidents.head()

Unnamed: 0,Region,EventType,Public automated,Public passive,Private,Farm
0,Newfoundland,Accidents,0,0,0,0
1,Newfoundland,Fatalities,0,0,0,0
2,Newfoundland,Serious injuries,0,0,0,0
3,Nova Scotia,Accidents,1,0,0,0
4,Nova Scotia,Fatalities,0,0,0,0


In [2]:
# Sometimes we need a few columns to unequivocally describe one row. For this we need multiindex.

# First a short reminder that to change an automatic index we use:

incidents.set_index("Region").head(10)

Unnamed: 0_level_0,EventType,Public automated,Public passive,Private,Farm
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Newfoundland,Accidents,0,0,0,0
Newfoundland,Fatalities,0,0,0,0
Newfoundland,Serious injuries,0,0,0,0
Nova Scotia,Accidents,1,0,0,0
Nova Scotia,Fatalities,0,0,0,0
Nova Scotia,Serious injuries,0,0,0,0
New Brunswick,Accidents,0,0,0,0
New Brunswick,Fatalities,0,0,0,0
New Brunswick,Serious injuries,0,0,0,0
Quebec,Accidents,4,0,1,0


In [4]:
# But we see that this index will not specify a row in a unique way. 
# Multiindex will be built from two columns using a property "keys" of set_index() in a form of list:

incidents.set_index(["Region","EventType"]).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Newfoundland,Accidents,0,0,0,0
Newfoundland,Fatalities,0,0,0,0
Newfoundland,Serious injuries,0,0,0,0
Nova Scotia,Accidents,1,0,0,0
Nova Scotia,Fatalities,0,0,0,0
Nova Scotia,Serious injuries,0,0,0,0
New Brunswick,Accidents,0,0,0,0
New Brunswick,Fatalities,0,0,0,0
New Brunswick,Serious injuries,0,0,0,0
Quebec,Accidents,4,0,1,0


In [5]:
incidents.set_index(keys=["EventType","Region"], inplace = True)

In [6]:
incidents.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
EventType,Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accidents,Newfoundland,0,0,0,0
Fatalities,Newfoundland,0,0,0,0
Serious injuries,Newfoundland,0,0,0,0
Accidents,Nova Scotia,1,0,0,0
Fatalities,Nova Scotia,0,0,0,0
Serious injuries,Nova Scotia,0,0,0,0
Accidents,New Brunswick,0,0,0,0
Fatalities,New Brunswick,0,0,0,0
Serious injuries,New Brunswick,0,0,0,0
Accidents,Quebec,4,0,1,0


In [9]:
# But now we see that data in the index column are not sorted, so let us do it:

incidents.sort_index(inplace=True)
incidents.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
EventType,Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accidents,Alberta,3,7,2,0
Accidents,British Columbia,4,1,0,0
Accidents,Manitoba,1,3,0,2
Accidents,New Brunswick,0,0,0,0
Accidents,Newfoundland,0,0,0,0
Accidents,Northwest Territories,0,0,0,0
Accidents,Nova Scotia,1,0,0,0
Accidents,Ontario,7,0,2,0
Accidents,Quebec,4,0,1,0
Accidents,Saskatchewan,1,3,0,0


In [12]:
# If we need our data to be sorted in a different order, we can use:

incidents.sort_index(ascending=[True,False], inplace=True)
incidents.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
EventType,Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accidents,Saskatchewan,1,3,0,0
Accidents,Quebec,4,0,1,0
Accidents,Ontario,7,0,2,0
Accidents,Nova Scotia,1,0,0,0
Accidents,Northwest Territories,0,0,0,0
Accidents,Newfoundland,0,0,0,0
Accidents,New Brunswick,0,0,0,0
Accidents,Manitoba,1,3,0,2
Accidents,British Columbia,4,1,0,0
Accidents,Alberta,3,7,2,0
