# Lesson 21b: Multiindex, transpose, swaplevel, stack and unstack

## Import libraries and load data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as math

incidents = pd.read_csv("Canadian Railway Crossing Incidents.csv")
incidents.set_index(["Region","EventType"], inplace=True)
incidents.sort_index(inplace=True)

incidents.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alberta,Accidents,3,7,2,0
Alberta,Fatalities,0,0,0,0
Alberta,Serious injuries,0,1,2,0
British Columbia,Accidents,4,1,0,0
British Columbia,Fatalities,0,0,0,0
British Columbia,Serious injuries,3,0,0,0
Manitoba,Accidents,1,3,0,2
Manitoba,Fatalities,0,1,0,0
Manitoba,Serious injuries,0,0,0,1
New Brunswick,Accidents,0,0,0,0


## Searching by multiindex

In [2]:
# Searching by multiindex:

incidents.loc[("Alberta","Accidents")]

# Note that the brackets "()" appear here, which means the this object is a tuple (the elements cannot be modified)

Public automated    3
Public passive      7
Private             2
Farm                0
Name: (Alberta, Accidents), dtype: int64

In [3]:
# We can also use only one value of the index:
incidents.loc[("Alberta")]

# Here do not have to use "()".

Unnamed: 0_level_0,Public automated,Public passive,Private,Farm
EventType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accidents,3,7,2,0
Fatalities,0,0,0,0
Serious injuries,0,1,2,0


In [4]:
# We can also use "iloc[]", which need the position (number) of a row:

incidents.iloc[2]

Public automated    0
Public passive      1
Private             2
Farm                0
Name: (Alberta, Serious injuries), dtype: int64

In [5]:
# If we nee a specific cell, we use:

incidents.loc[("Alberta","Accidents")].loc["Public passive"]

# which is equivalent to:

incidents.loc[("Alberta","Accidents"),"Public passive"]
# this is actually the address of the cell in Excell.

7

## Transposition

In [6]:
# To change columns into rows we use transpose():

events = incidents.transpose()
events

Region,Alberta,Alberta,Alberta,British Columbia,British Columbia,British Columbia,Manitoba,Manitoba,Manitoba,New Brunswick,...,Nova Scotia,Ontario,Ontario,Ontario,Quebec,Quebec,Quebec,Saskatchewan,Saskatchewan,Saskatchewan
EventType,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,...,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries
Public automated,3,0,0,4,0,3,1,0,0,0,...,0,7,1,1,4,1,0,1,0,0
Public passive,7,0,1,1,0,0,3,1,0,0,...,0,0,0,0,0,0,0,3,0,0
Private,2,0,2,0,0,0,0,0,0,0,...,0,2,0,0,1,0,0,0,0,0
Farm,0,0,0,0,0,0,2,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
events.loc["Private"]

Region                 EventType       
Alberta                Accidents           2
                       Fatalities          0
                       Serious injuries    2
British Columbia       Accidents           0
                       Fatalities          0
                       Serious injuries    0
Manitoba               Accidents           0
                       Fatalities          0
                       Serious injuries    0
New Brunswick          Accidents           0
                       Fatalities          0
                       Serious injuries    0
Newfoundland           Accidents           0
                       Fatalities          0
                       Serious injuries    0
Northwest Territories  Accidents           0
                       Fatalities          0
                       Serious injuries    0
Nova Scotia            Accidents           0
                       Fatalities          0
                       Serious injuries    0
Ontario        

In [8]:
# to show a specific cell, we use:
events.loc["Private",("Alberta","Accidents")]

2

In [9]:
# or
events.iloc[2,0]

2

In [10]:
# If we transpose events, we will get the original dataframe "incidents".

## Swaplevel method - changing the level of indexes

In [11]:
incidents.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alberta,Accidents,3,7,2,0
Alberta,Fatalities,0,0,0,0
Alberta,Serious injuries,0,1,2,0
British Columbia,Accidents,4,1,0,0
British Columbia,Fatalities,0,0,0,0


In [12]:
# If we want to get back to "EventType" as the first index (with inplace=True), we need to firt reset index:

incidents.reset_index(inplace=True)
incidents.set_index(["EventType","Region"], inplace=True)
incidents.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
EventType,Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accidents,Alberta,3,7,2,0
Fatalities,Alberta,0,0,0,0
Serious injuries,Alberta,0,1,2,0
Accidents,British Columbia,4,1,0,0
Fatalities,British Columbia,0,0,0,0


In [17]:
incidents.sort_index(inplace=True)

In [18]:
# If want to get back to original set of indexes, instead of reseting index we can use the method swaplevel()

incidents = incidents.swaplevel().sort_index()
incidents.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alberta,Accidents,3,7,2,0
Alberta,Fatalities,0,0,0,0
Alberta,Serious injuries,0,1,2,0
British Columbia,Accidents,4,1,0,0
British Columbia,Fatalities,0,0,0,0


## Stack and unstack methods

In [21]:
# The method stack() takes columns and changes them into rows so that they become 3rd level rows 
# (or 3rd level index):

incidents.stack().head(10)

Region   EventType                         
Alberta  Accidents         Public automated    3
                           Public passive      7
                           Private             2
                           Farm                0
         Fatalities        Public automated    0
                           Public passive      0
                           Private             0
                           Farm                0
         Serious injuries  Public automated    0
                           Public passive      1
dtype: int64

In [23]:
# to present these data in a form of dataframe, we use:

stackedIncidents = incidents.stack().to_frame()
stackedIncidents.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
Region,EventType,Unnamed: 2_level_1,Unnamed: 3_level_1
Alberta,Accidents,Public automated,3
Alberta,Accidents,Public passive,7
Alberta,Accidents,Private,2
Alberta,Accidents,Farm,0
Alberta,Fatalities,Public automated,0


In [24]:
# the method unstack() works in opposite way, the most internal rows are changed to columns:

stackedIncidents.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,0,0,0,0
Unnamed: 0_level_1,Unnamed: 1_level_1,Public automated,Public passive,Private,Farm
Region,EventType,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Alberta,Accidents,3,7,2,0
Alberta,Fatalities,0,0,0,0
Alberta,Serious injuries,0,1,2,0
British Columbia,Accidents,4,1,0,0
British Columbia,Fatalities,0,0,0,0
British Columbia,Serious injuries,3,0,0,0
Manitoba,Accidents,1,3,0,2
Manitoba,Fatalities,0,1,0,0
Manitoba,Serious injuries,0,0,0,1
New Brunswick,Accidents,0,0,0,0


In [25]:
# If use unstack() twice we go to the next level of rows:

stackedIncidents.unstack().unstack()

Unnamed: 0_level_0,0,0,0,0,0,0,0,0,0,0,0,0
Unnamed: 0_level_1,Public automated,Public automated,Public automated,Public passive,Public passive,Public passive,Private,Private,Private,Farm,Farm,Farm
EventType,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries
Region,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
Alberta,3,0,0,7,0,1,2,0,2,0,0,0
British Columbia,4,0,3,1,0,0,0,0,0,0,0,0
Manitoba,1,0,0,3,1,0,0,0,0,2,0,1
New Brunswick,0,0,0,0,0,0,0,0,0,0,0,0
Newfoundland,0,0,0,0,0,0,0,0,0,0,0,0
Northwest Territories,0,0,0,0,0,0,0,0,0,0,0,0
Nova Scotia,1,0,0,0,0,0,0,0,0,0,0,0
Ontario,7,1,1,0,0,0,2,0,0,0,0,0
Quebec,4,1,0,0,0,0,1,0,0,0,0,0
Saskatchewan,1,0,0,3,0,0,0,0,0,0,0,0


In [28]:
# We can also change the leading index "Region", a column (by choosing the level=0 or level="Region"):

stackedIncidents.unstack(0).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,0,0,0,0,0,0,0,0,0,0
Unnamed: 0_level_1,Region,Alberta,British Columbia,Manitoba,New Brunswick,Newfoundland,Northwest Territories,Nova Scotia,Ontario,Quebec,Saskatchewan
EventType,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Accidents,Public automated,3,4,1,0,0,0,1,7,4,1
Accidents,Public passive,7,1,3,0,0,0,0,0,0,3
Accidents,Private,2,0,0,0,0,0,0,2,1,0
Accidents,Farm,0,0,2,0,0,0,0,0,0,0
Fatalities,Public automated,0,0,0,0,0,0,0,1,1,0
Fatalities,Public passive,0,0,1,0,0,0,0,0,0,0
Fatalities,Private,0,0,0,0,0,0,0,0,0,0
Fatalities,Farm,0,0,0,0,0,0,0,0,0,0
Serious injuries,Public automated,0,3,0,0,0,0,0,1,0,0
Serious injuries,Public passive,1,0,0,0,0,0,0,0,0,0


In [29]:
stackedIncidents.unstack(level=["Region","EventType"]).head()

Unnamed: 0_level_0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Region,Alberta,Alberta,Alberta,British Columbia,British Columbia,British Columbia,Manitoba,Manitoba,Manitoba,New Brunswick,...,Nova Scotia,Ontario,Ontario,Ontario,Quebec,Quebec,Quebec,Saskatchewan,Saskatchewan,Saskatchewan
EventType,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,...,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries,Accidents,Fatalities,Serious injuries
Public automated,3,0,0,4,0,3,1,0,0,0,...,0,7,1,1,4,1,0,1,0,0
Public passive,7,0,1,1,0,0,3,1,0,0,...,0,0,0,0,0,0,0,3,0,0
Private,2,0,2,0,0,0,0,0,0,0,...,0,2,0,0,1,0,0,0,0,0
Farm,0,0,0,0,0,0,2,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
# Because now we have 3 indexes, to find a specific row we use:

stackedIncidents.loc[("Alberta","Accidents","Private"),0]

2