# Collinearity Diagnostics

In [1]:
#=============================================================================================
# CODE NAME     : Collinearity Diagnostics.py
# AUTHOR        : Urvish Shah
# PURPOSE       : Demonstrate application of multicollinearity using Pandas and statsmodels 
# APPLICATION   : Analyzing Fitness dataset
#==============================================================================================

In [2]:
# Load Input data
import numpy as np
import pandas as pd

fitness = pd.read_excel("C:\\Users\\Urvish\\Data Science using SAS and Python\\Data\\fitness.xlsx")
fitness.head()

Unnamed: 0,Name,Gender,RunTime,Age,Weight,Oxygen_Consumption,Run_Pulse,Rest_Pulse,Maximum_Pulse,Performance
0,Donna,F,8.17,42,68.15,59.57,166,40,172,90
1,Gracie,F,8.63,38,81.87,60.06,170,48,186,94
2,Luanne,F,8.65,43,85.84,54.3,156,45,168,83
3,Mimi,F,8.92,50,70.87,54.63,146,48,155,67
4,Chris,M,8.95,49,81.42,49.16,180,44,185,72


In [3]:
# Fit the full model with all the predictor variables
from statsmodels.formula.api import ols
full_model = ols('Oxygen_Consumption ~ RunTime + Age + Weight + Run_Pulse + Rest_Pulse + Maximum_Pulse + Performance', 
                 data = fitness).fit()

# Get the VIF of all the predictors

In [6]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

variables = full_model.model.exog
vif = [variance_inflation_factor(variables, i) for i in range(variables.shape[1])]
pd.DataFrame(vif)

Unnamed: 0,0
0,28841.926715
1,88.862507
2,51.011759
3,1.763833
4,8.544976
5,1.444246
6,8.78755
7,162.853987


In [7]:
# Fit the full model with all the predictor variables
from statsmodels.formula.api import ols
without_performance_model = ols('Oxygen_Consumption ~ RunTime + Age + Weight + Run_Pulse + Rest_Pulse + Maximum_Pulse', 
                                data = fitness).fit()

In [8]:
variables = without_performance_model.model.exog
vif = [variance_inflation_factor(variables, i) for i in range(variables.shape[1])]
pd.DataFrame(vif)

Unnamed: 0,0
0,862.676948
1,1.584325
2,1.489533
3,1.159726
4,8.46034
5,1.410043
6,8.755346
