## Learning Objectives:

1. Index of a Dataframe
2. Adding new values to specific indices
3. Index object in pandas
4. Reindexing a dataframe
5. Setting and Resetting Index

In [1]:
import numpy as np
import pandas as pd


In [2]:
##creating a dataframe using dictionary
scores_data = {"Student": ["Tom", "Katey", "Mak", "Bill", "Rahul"],
               "Age": np.random.randint(20, 25, size=5),
               "Sub1_Score": np.random.randint(40, 100, size=5),
               "Sub2_Score": np.random.randint(40, 100, size=5),
               "Sub3_Score": np.random.randint(40, 100, size=5)
              }

scores_df = pd.DataFrame(scores_data)
scores_df

Unnamed: 0,Student,Age,Sub1_Score,Sub2_Score,Sub3_Score
0,Tom,22,98,78,66
1,Katey,22,96,84,80
2,Mak,23,44,54,90
3,Bill,21,65,83,51
4,Rahul,23,41,47,75


In [3]:
scores_df.index

RangeIndex(start=0, stop=5, step=1)

In [4]:
list(scores_df.index)

[0, 1, 2, 3, 4]

In [5]:
##setting a custom index

scores_df.index = ['A', 'B', 'C', 'D', 'E']

scores_df

Unnamed: 0,Student,Age,Sub1_Score,Sub2_Score,Sub3_Score
A,Tom,22,98,78,66
B,Katey,22,96,84,80
C,Mak,23,44,54,90
D,Bill,21,65,83,51
E,Rahul,23,41,47,75


## Add new values to specific indices

In [6]:
scores_df['sub4_score'] = np.nan
scores_df

Unnamed: 0,Student,Age,Sub1_Score,Sub2_Score,Sub3_Score,sub4_score
A,Tom,22,98,78,66,
B,Katey,22,96,84,80,
C,Mak,23,44,54,90,
D,Bill,21,65,83,51,
E,Rahul,23,41,47,75,


In [7]:
## some of the scores for sub4 has come in

scores_df['sub4_score'] = pd.Series([98, 78, 65], index=['B', 'D', 'E'])
scores_df

Unnamed: 0,Student,Age,Sub1_Score,Sub2_Score,Sub3_Score,sub4_score
A,Tom,22,98,78,66,
B,Katey,22,96,84,80,98.0
C,Mak,23,44,54,90,
D,Bill,21,65,83,51,78.0
E,Rahul,23,41,47,75,65.0


In [8]:
index = pd.Index(np.arange(5))
index

Int64Index([0, 1, 2, 3, 4], dtype='int64')

In [11]:
index[3] = 'A'

TypeError: Index does not support mutable operations

In [12]:
scores_df.index = index
scores_df

Unnamed: 0,Student,Age,Sub1_Score,Sub2_Score,Sub3_Score,sub4_score
0,Tom,22,98,78,66,
1,Katey,22,96,84,80,98.0
2,Mak,23,44,54,90,
3,Bill,21,65,83,51,78.0
4,Rahul,23,41,47,75,65.0


In [14]:
'Student' in scores_df.columns

True

In [15]:
dummy_cols = pd.Index(['Names', 'Sub1_Score', 'Sub6_Score'])
dummy_cols

Index(['Names', 'Sub1_Score', 'Sub6_Score'], dtype='object')

In [16]:
cols = scores_df.columns
cols

Index(['Student', 'Age', 'Sub1_Score', 'Sub2_Score', 'Sub3_Score',
       'sub4_score'],
      dtype='object')

In [17]:
dummy_cols.intersection(cols)

Index(['Sub1_Score'], dtype='object')

In [18]:
dummy_cols.union(cols)

Index(['Age', 'Names', 'Student', 'Sub1_Score', 'Sub2_Score', 'Sub3_Score',
       'Sub6_Score', 'sub4_score'],
      dtype='object')

## Setting and resetting index


In [19]:
scores_df = scores_df.set_index('Student')
scores_df

Unnamed: 0_level_0,Age,Sub1_Score,Sub2_Score,Sub3_Score,sub4_score
Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Tom,22,98,78,66,
Katey,22,96,84,80,98.0
Mak,23,44,54,90,
Bill,21,65,83,51,78.0
Rahul,23,41,47,75,65.0


In [20]:
scores_df = scores_df.reset_index(drop=False)
scores_df

Unnamed: 0,Student,Age,Sub1_Score,Sub2_Score,Sub3_Score,sub4_score
0,Tom,22,98,78,66,
1,Katey,22,96,84,80,98.0
2,Mak,23,44,54,90,
3,Bill,21,65,83,51,78.0
4,Rahul,23,41,47,75,65.0
