# Connect

https://www.linkedin.com/in/dr-darshan-ingle-corporate-trainer/

In [1]:
import warnings
warnings.simplefilter('ignore')

In [2]:
# Data Retrieval
import pandas as pd

df = pd.read_csv("student_records.csv")
df

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Henry,A,Y,90,85,Yes
1,John,C,N,85,51,Yes
2,David,F,N,10,17,No
3,Holmes,B,Y,75,71,No
4,Marvin,E,N,20,30,No
5,Simon,A,Y,92,79,Yes
6,Robert,B,Y,60,59,No
7,Trent,C,Y,75,33,No


In [3]:
# Step2: Data preparation

In [4]:
# Step3: Feature Extraction and Engineering

features_names = ['OverallGrade','Obedient','ResearchScore','ProjectScore']
training_features = df[features_names]
training_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,90,85
1,C,N,85,51
2,F,N,10,17
3,B,Y,75,71
4,E,N,20,30
5,A,Y,92,79
6,B,Y,60,59
7,C,Y,75,33


In [5]:
outcome_name = ['Recommend']
outcome_labels = df[outcome_name]
outcome_labels

Unnamed: 0,Recommend
0,Yes
1,Yes
2,No
3,No
4,No
5,Yes
6,No
7,No


In [6]:
outcome_labels.shape

(8, 1)

In [7]:
# List down features based on type of data it holds

numeric_feature_names = ['ResearchScore','ProjectScore']
categorical_feature_names = ['OverallGrade','Obedient']

In [8]:
# Scaling/Normalization
# Try with MinMax Scaler and Robust Scaler
from sklearn.preprocessing import StandardScaler
ss = StandardScaler() # Z-Score
ss.fit(training_features[numeric_feature_names])
training_features[numeric_feature_names] = ss.transform(training_features[numeric_feature_names])
print(training_features)

  OverallGrade Obedient  ResearchScore  ProjectScore
0            A        Y       0.899583      1.376650
1            C        N       0.730648     -0.091777
2            F        N      -1.803390     -1.560203
3            B        Y       0.392776      0.772004
4            E        N      -1.465519     -0.998746
5            A        Y       0.967158      1.117516
6            B        Y      -0.114032      0.253735
7            C        Y       0.392776     -0.869179


In [9]:
# One Hot Encoding

training_features = pd.get_dummies(training_features, columns=categorical_feature_names)
print(training_features)

   ResearchScore  ProjectScore  OverallGrade_A  OverallGrade_B  \
0       0.899583      1.376650               1               0   
1       0.730648     -0.091777               0               0   
2      -1.803390     -1.560203               0               0   
3       0.392776      0.772004               0               1   
4      -1.465519     -0.998746               0               0   
5       0.967158      1.117516               1               0   
6      -0.114032      0.253735               0               1   
7       0.392776     -0.869179               0               0   

   OverallGrade_C  OverallGrade_E  OverallGrade_F  Obedient_N  Obedient_Y  
0               0               0               0           0           1  
1               1               0               0           1           0  
2               0               0               1           1           0  
3               0               0               0           0           1  
4               0        

In [10]:
training_features.columns

Index(['ResearchScore', 'ProjectScore', 'OverallGrade_A', 'OverallGrade_B',
       'OverallGrade_C', 'OverallGrade_E', 'OverallGrade_F', 'Obedient_N',
       'Obedient_Y'],
      dtype='object')

In [11]:
df[numeric_feature_names].columns

Index(['ResearchScore', 'ProjectScore'], dtype='object')

In [12]:
set(training_features.columns) - set(df[numeric_feature_names].columns)

{'Obedient_N',
 'Obedient_Y',
 'OverallGrade_A',
 'OverallGrade_B',
 'OverallGrade_C',
 'OverallGrade_E',
 'OverallGrade_F'}

In [13]:
# Find out the categorical_engineered_features
categorical_engineered_features = set(training_features.columns) - set(df[numeric_feature_names].columns)
categorical_engineered_features = list(categorical_engineered_features)
print(categorical_engineered_features)

['Obedient_Y', 'OverallGrade_C', 'OverallGrade_A', 'OverallGrade_B', 'OverallGrade_F', 'OverallGrade_E', 'Obedient_N']


In [14]:
# Modeling - Lets use the Logistic regression algorithm

from sklearn.linear_model import LogisticRegression
import numpy as np

# fit the model
lr = LogisticRegression()
model = lr.fit(training_features, np.array(outcome_labels['Recommend']))
print(model)

LogisticRegression()


In [15]:
# Model Evaluation

pred_labels = model.predict(training_features)
actual_labels = np.array(outcome_labels['Recommend'])

In [16]:
pred_labels

array(['Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'No'], dtype=object)

In [17]:
actual_labels

array(['Yes', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'No'], dtype=object)

In [18]:
# To check, how good is the model in case of a Classification Problem, we use metrics
# In classification problem, we use metrics like accuracy_score

from sklearn.metrics import accuracy_score
accuracy_score(actual_labels,pred_labels)

1.0

In [19]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(actual_labels,pred_labels))

[[5 0]
 [0 3]]


In [20]:
from sklearn.metrics import classification_report
print(classification_report(actual_labels,pred_labels))

              precision    recall  f1-score   support

          No       1.00      1.00      1.00         5
         Yes       1.00      1.00      1.00         3

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



In [27]:
# Model Deployment

import joblib
import os

# save the model on the laptop
if not os.path.exists('Model'):
    os.mkdir('Model')
if not os.path.exists('Scaler'):
    os.mkdir('Scaler')
    
joblib.dump(model,r'Model/model.pickle')
joblib.dump(ss,r'Scaler/scaler.pickle')

['Scaler/scaler.pickle']

In [28]:
os.getcwd()

'E:\\eBooks\\Data_Science\\Machine Learning\\INSAID\\Machine Learning\\ML-Foundation track\\Module 1_ Introduction to Machine Learning'

__Model is successfully saved. And now we can send it to the client either via mail or deploying it to server (GCP, AWS, Heroku,etc)__