In [1]:
# Import libraries

import pandas as pd
import numpy as np

### Load Excel File

In [2]:
filename = 'data/car_financing.xlsx'
df = pd.read_excel(filename)

## Renaming and Deleting Columns
It is often the case where you change your column names or remove unnecessary columns.

### Rename columns

Here are two popular ways to rename dataframe columns.
1. <b>dictionary substitution</b>: very useful if you only want to rename a few of the columns.
2. <b>list replacement</b>: requires a full list of names (in my experience, this is more error prone).

In [3]:
# DataFrame before renaming columns
df.head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


In [4]:
# This wont work as there is a space in the column name
# I want to fix that
df['Principal Paid']

0      484.30
1      487.13
2      489.98
3      492.85
4      495.73
        ...  
403    786.47
404    788.37
405    790.27
406    792.18
407    794.10
Name: Principal Paid, Length: 408, dtype: float64

In [5]:
# Approach 1 dictionary substitution using rename method

df = df.rename(columns={'Starting Balance': 'starting_balance',
                        'Interest Paid': 'interest_paid', 
                        'Principal Paid': 'principal_paid',
                        'New Balance': 'new_balance'})

In [6]:
# DataFrame after renaming columns

df.head()

Unnamed: 0,Month,starting_balance,Repayment,interest_paid,principal_paid,new_balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


In [7]:
# Approach 2 list replacement

# Only changing Month -> month, but we need to list the rest of the columns

df.columns = ['month',
              'starting_balance',
              'Repayment',
              'interest_paid',
              'principal_paid',
              'new_balance',
              'term',
              'interest_rate',
              'car_type']

In [8]:
df.head()

Unnamed: 0,month,starting_balance,Repayment,interest_paid,principal_paid,new_balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


### Deleting Columns

In [9]:
# Approach 1
# This approach allows you to drop multiple columns at a time 

df = df.drop(columns=['term'])

In [10]:
df.head()

Unnamed: 0,month,starting_balance,Repayment,interest_paid,principal_paid,new_balance,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,0.0702,Toyota Sienna


In [11]:
# Approach 2 use the del command

del df['Repayment']

In [12]:
df.head()

Unnamed: 0,month,starting_balance,interest_paid,principal_paid,new_balance,interest_rate,car_type
0,1,34689.96,202.93,484.3,34205.66,0.0702,Toyota Sienna
1,2,34205.66,200.1,487.13,33718.53,0.0702,Toyota Sienna
2,3,33718.53,197.25,489.98,33228.55,0.0702,Toyota Sienna
3,4,33228.55,194.38,492.85,32735.7,0.0702,Toyota Sienna
4,5,32735.7,191.5,495.73,32239.97,0.0702,Toyota Sienna


In [14]:
df = df.drop('new_balance', axis = 1)

In [15]:
df.head()

Unnamed: 0,month,starting_balance,interest_paid,principal_paid,interest_rate,car_type
0,1,34689.96,202.93,484.3,0.0702,Toyota Sienna
1,2,34205.66,200.1,487.13,0.0702,Toyota Sienna
2,3,33718.53,197.25,489.98,0.0702,Toyota Sienna
3,4,33228.55,194.38,492.85,0.0702,Toyota Sienna
4,5,32735.7,191.5,495.73,0.0702,Toyota Sienna
