In [1]:
import warnings
warnings.simplefilter('ignore')
import numpy as np
import pandas as pd
import joblib
import os

In [2]:
# Prediction
# Load the model and scaler file now

model = joblib.load(r'Model/model.pickle')
scaler = joblib.load(r'Scaler/scaler.pickle')

In [3]:
# Create the data for predictions

new_data = pd.DataFrame([
    {'Name':'Ninad', 'OverallGrade':'F','Obedient':'N','ResearchScore':30,'ProjectScore':20},
    {'Name':'Thomas', 'OverallGrade':'A','Obedient':'Y','ResearchScore':78,'ProjectScore':80}
])
new_data

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore
0,Ninad,F,N,30,20
1,Thomas,A,Y,78,80


In [4]:
features_names = ['OverallGrade','Obedient','ResearchScore','ProjectScore']
prediction_features = new_data[features_names]

['OverallGrade', 'Obedient', 'ResearchScore', 'ProjectScore']

In [6]:
prediction_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,F,N,30,20
1,A,Y,78,80


In [7]:
# List down features based on type of data it holds

numeric_feature_names = ['ResearchScore','ProjectScore']
categorical_feature_names = ['OverallGrade','Obedient']

In [8]:
prediction_features[numeric_feature_names] = scaler.transform(prediction_features[numeric_feature_names])
prediction_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,F,N,-1.127647,-1.430636
1,A,Y,0.494137,1.160705


In [9]:
prediction_features = pd.get_dummies(prediction_features, columns=categorical_feature_names)
prediction_features

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_F,Obedient_N,Obedient_Y
0,-1.127647,-1.430636,0,1,1,0
1,0.494137,1.160705,1,0,0,1


In [10]:
set(prediction_features.columns)

{'Obedient_N',
 'Obedient_Y',
 'OverallGrade_A',
 'OverallGrade_F',
 'ProjectScore',
 'ResearchScore'}

In [11]:
set(numeric_feature_names)

{'ProjectScore', 'ResearchScore'}

In [12]:
# add a missing categorical feature column

current_categorical_engineered_features = set(prediction_features.columns) - set(numeric_feature_names)
current_categorical_engineered_features

{'Obedient_N', 'Obedient_Y', 'OverallGrade_A', 'OverallGrade_F'}

In [13]:
categorical_engineered_features = ['Obedient_Y', 'OverallGrade_C', 'OverallGrade_A', 'OverallGrade_B', 'OverallGrade_F', 'OverallGrade_E', 'Obedient_N']
categorical_engineered_features

['Obedient_Y',
 'OverallGrade_C',
 'OverallGrade_A',
 'OverallGrade_B',
 'OverallGrade_F',
 'OverallGrade_E',
 'Obedient_N']

In [14]:
missing_features = set(categorical_engineered_features) - current_categorical_engineered_features
missing_features

{'OverallGrade_B', 'OverallGrade_C', 'OverallGrade_E'}

In [16]:
for feature in missing_features:
    prediction_features[feature] = 0 * len(prediction_features)

In [17]:
prediction_features

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_F,Obedient_N,Obedient_Y,OverallGrade_C,OverallGrade_E,OverallGrade_B
0,-1.127647,-1.430636,0,1,1,0,0,0,0
1,0.494137,1.160705,1,0,0,1,0,0,0


In [18]:
# Now finally the data is in the expected form. And the reason for GIGO has vanished.
# Lets go for predictions now.

predictions = model.predict(prediction_features)
predictions

array(['No', 'Yes'], dtype=object)

In [20]:
new_data['Recommend'] = predictions
new_data

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Ninad,F,N,30,20,No
1,Thomas,A,Y,78,80,Yes
