# Lesson 20c: Reindexing

## Import libraries and load the data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as math

frame = pd.read_csv("mammals.csv")
frame.head()

Unnamed: 0,name,body,brain
0,Arctic fox,3.385,44.5
1,Owl monkey,0.48,15.5
2,Mountain beaver,1.35,8.1
3,Cow,465.0,423.0
4,Grey wolf,36.33,119.5


In [2]:
# Setting a new index: 

frame = pd.read_csv("mammals.csv", index_col = "name")
frame.head()

# But this method is weak (we do not want to read the data again in the middle of some work)


Unnamed: 0_level_0,body,brain
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Arctic fox,3.385,44.5
Owl monkey,0.48,15.5
Mountain beaver,1.35,8.1
Cow,465.0,423.0
Grey wolf,36.33,119.5


In [3]:
# Let us set the index by hand:

frame = pd.read_csv("mammals.csv")
frame.head()

Unnamed: 0,name,body,brain
0,Arctic fox,3.385,44.5
1,Owl monkey,0.48,15.5
2,Mountain beaver,1.35,8.1
3,Cow,465.0,423.0
4,Grey wolf,36.33,119.5


In [4]:
frame.set_index("name", inplace = True)
frame.head()

Unnamed: 0_level_0,body,brain
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Arctic fox,3.385,44.5
Owl monkey,0.48,15.5
Mountain beaver,1.35,8.1
Cow,465.0,423.0
Grey wolf,36.33,119.5


In [5]:
# And we need to get back to the original index we use:

frame.reset_index(inplace = True)
frame.head()

Unnamed: 0,name,body,brain
0,Arctic fox,3.385,44.5
1,Owl monkey,0.48,15.5
2,Mountain beaver,1.35,8.1
3,Cow,465.0,423.0
4,Grey wolf,36.33,119.5


In [6]:
# Note that when we set the index a few times in a row by using "set_index()" we will lose data!!! 
# It is because when we use "set_index()" to choose a new index, the old index is thrown away.

# To avoid it, we simply need to use "reset_index()" after each change (each use of "set_index()")
# to return to the automatic index. Then all data are saved.

In [7]:
frame.set_index("name", inplace = True)
frame.head()

Unnamed: 0_level_0,body,brain
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Arctic fox,3.385,44.5
Owl monkey,0.48,15.5
Mountain beaver,1.35,8.1
Cow,465.0,423.0
Grey wolf,36.33,119.5


In [8]:
# How to improve efficiency of searching?

# I need to find data about "Cow", but to get them Python will search throughtout all rows.

frame.loc["Cow"]

# To avoid this procedure we just sort the data, then Pytho will stop when it finds "C..."

body     465.0
brain    423.0
Name: Cow, dtype: float64

In [10]:
frame.sort_index(inplace = True)

frame.head(20)

Unnamed: 0_level_0,body,brain
name,Unnamed: 1_level_1,Unnamed: 2_level_1
African elephant,6654.0,5712.0
African giant pouched rat,1.0,6.6
Arctic fox,3.385,44.5
Arctic ground squirrel,0.92,5.7
Asian elephant,2547.0,4603.0
Baboon,10.55,179.5
Big brown bat,0.023,0.3
Brazilian tapir,160.0,169.0
Cat,3.3,25.6
Chimpanzee,52.16,440.0


In [11]:
# To display all rows between some values we use the index as well (usually it is a number), 
# and any range of data can be displayed by using "[]"

frame["Cat":"Donkey"]

Unnamed: 0_level_0,body,brain
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Cat,3.3,25.6
Chimpanzee,52.16,440.0
Chinchilla,0.425,6.4
Cow,465.0,423.0
Desert hedgehog,0.55,2.4
Donkey,187.1,419.0


In [12]:
# To get animals between "A" and "C":
frame["A":"C"]

Unnamed: 0_level_0,body,brain
name,Unnamed: 1_level_1,Unnamed: 2_level_1
African elephant,6654.0,5712.0
African giant pouched rat,1.0,6.6
Arctic fox,3.385,44.5
Arctic ground squirrel,0.92,5.7
Asian elephant,2547.0,4603.0
Baboon,10.55,179.5
Big brown bat,0.023,0.3
Brazilian tapir,160.0,169.0
