In [59]:
# Import the required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.layers import InputLayer
from keras.optimizers import SGD,Adam

In [24]:
# Mount the google drive folder and load the Iris data set

from google.colab import drive

drive.mount('/content/gdrive', force_remount = True)
os.chdir('/content/gdrive/My Drive')

data = pd.read_csv("leads.csv")

Mounted at /content/gdrive


In [44]:
# Examine the dataframe for sanity

data.head(5)

Unnamed: 0,Lead_Origin,Lead_Source,Do_Not_Email,Do_Not_Call,Converted,TotalVisits,Total_Time_On_Website,Page_Views_Per_Visit,Last_Activity,Specialization,Current_Occupation,Search,Newspaper_Article,X_Forums,Newspaper,Digital_Advertisement,Through_Recommendations,City,Free_Copy,Last_Notable_Activity
0,API,Olark_Chat,No,No,0,0,0,0.0,Page_Visited_on_Website,Others,Unemployed,No,No,No,No,No,No,Mumbai,No,Modified
1,API,Organic_Search,No,No,0,5,674,2.5,Email_Opened,Others,Unemployed,No,No,No,No,No,No,Mumbai,No,Email_Opened
2,Landing_Page_Submission,Direct_Traffic,No,No,1,2,1532,2.0,Email_Opened,Business_Administration,Student,No,No,No,No,No,No,Mumbai,Yes,Email_Opened
3,Landing_Page_Submission,Direct_Traffic,No,No,0,1,305,1.0,Unreachable,Media_and_Advertising,Unemployed,No,No,No,No,No,No,Mumbai,No,Modified
4,Landing_Page_Submission,Google,No,No,1,2,1428,1.0,Converted_to_Lead,Others,Unemployed,No,No,No,No,No,No,Mumbai,No,Modified


In [45]:
# 1. Separate the predictor and label columns
# 2. Encode the label column using one hot encoding
# 3. Perform train - test split

X = data.drop('Converted', axis=1)
y = data['Converted']

X_train_temp, X_test_temp, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 100) 

In [46]:
# Label encoding of categorical columns in train and test data sets

Lead_Origin_LE = LabelEncoder()
Lead_Origin_LE.fit(X_train_temp['Lead_Origin'].astype(str))
X_train_temp_Lead_Origin = Lead_Origin_LE.transform(X_train_temp['Lead_Origin'].astype(str))
X_test_temp_Lead_Origin = Lead_Origin_LE.transform(X_test_temp['Lead_Origin'].astype(str))

Lead_Source_LE = LabelEncoder()
Lead_Source_LE.fit(X_train_temp['Lead_Source'].astype(str))
X_train_temp_Lead_Source = Lead_Source_LE.transform(X_train_temp['Lead_Source'].astype(str))
X_test_temp_Lead_Source = Lead_Source_LE.transform(X_test_temp['Lead_Source'].astype(str))

Do_Not_Email_LE = LabelEncoder()
Do_Not_Email_LE.fit(X_train_temp['Do_Not_Email'].astype(str))
X_train_temp_Do_Not_Email = Do_Not_Email_LE.transform(X_train_temp['Do_Not_Email'].astype(str))
X_test_temp_Do_Not_Email = Do_Not_Email_LE.transform(X_test_temp['Do_Not_Email'].astype(str))

Do_Not_Call_LE = LabelEncoder()
Do_Not_Call_LE.fit(X_train_temp['Do_Not_Call'].astype(str))
X_train_temp_Do_Not_Call = Do_Not_Call_LE.transform(X_train_temp['Do_Not_Call'].astype(str))
X_test_temp_Do_Not_Call = Do_Not_Call_LE.transform(X_test_temp['Do_Not_Call'].astype(str))

Last_Activity_LE = LabelEncoder()
Last_Activity_LE.fit(X_train_temp['Last_Activity'].astype(str))
X_train_temp_Last_Activity = Last_Activity_LE.transform(X_train_temp['Last_Activity'].astype(str))
X_test_temp_Last_Activity = Last_Activity_LE.transform(X_test_temp['Last_Activity'].astype(str))

Specialization_LE = LabelEncoder()
Specialization_LE.fit(X_train_temp['Specialization'].astype(str))
X_train_temp_Specialization = Specialization_LE.transform(X_train_temp['Specialization'].astype(str))
X_test_temp_Specialization = Specialization_LE.transform(X_test_temp['Specialization'].astype(str))

Current_Occupation_LE = LabelEncoder()
Current_Occupation_LE.fit(X_train_temp['Current_Occupation'].astype(str))
X_train_temp_Current_Occupation = Current_Occupation_LE.transform(X_train_temp['Current_Occupation'].astype(str))
X_test_temp_Current_Occupation = Current_Occupation_LE.transform(X_test_temp['Current_Occupation'].astype(str))

Search_LE = LabelEncoder()
Search_LE.fit(X_train_temp['Search'].astype(str))
X_train_temp_Search = Search_LE.transform(X_train_temp['Search'].astype(str))
X_test_temp_Search = Search_LE.transform(X_test_temp['Search'].astype(str))

Newspaper_Article_LE = LabelEncoder()
Newspaper_Article_LE.fit(X_train_temp['Newspaper_Article'].astype(str))
X_train_temp_Newspaper_Article = Newspaper_Article_LE.transform(X_train_temp['Newspaper_Article'].astype(str))
X_test_temp_Newspaper_Article = Newspaper_Article_LE.transform(X_test_temp['Newspaper_Article'].astype(str))

X_Forums_LE = LabelEncoder()
X_Forums_LE.fit(X_train_temp['X_Forums'].astype(str))
X_train_temp_X_Forums = X_Forums_LE.transform(X_train_temp['X_Forums'].astype(str))
X_test_temp_X_Forums = X_Forums_LE.transform(X_test_temp['X_Forums'].astype(str))

Newspaper_LE = LabelEncoder()
Newspaper_LE.fit(X_train_temp['Newspaper'].astype(str))
X_train_temp_Newspaper = Newspaper_LE.transform(X_train_temp['Newspaper'].astype(str))
X_test_temp_Newspaper = Newspaper_LE.transform(X_test_temp['Newspaper'].astype(str))

Digital_Advertisement_LE = LabelEncoder()
Digital_Advertisement_LE.fit(X_train_temp['Digital_Advertisement'].astype(str))
X_train_temp_Digital_Advertisement = Digital_Advertisement_LE.transform(X_train_temp['Digital_Advertisement'].astype(str))
X_test_temp_Digital_Advertisement = Digital_Advertisement_LE.transform(X_test_temp['Digital_Advertisement'].astype(str))

Through_Recommendations_LE = LabelEncoder()
Through_Recommendations_LE.fit(X_train_temp['Through_Recommendations'].astype(str))
X_train_temp_Through_Recommendations = Through_Recommendations_LE.transform(X_train_temp['Through_Recommendations'].astype(str))
X_test_temp_Through_Recommendations = Through_Recommendations_LE.transform(X_test_temp['Through_Recommendations'].astype(str))

City_LE = LabelEncoder()
City_LE.fit(X_train_temp['City'].astype(str))
X_train_temp_City = City_LE.transform(X_train_temp['City'].astype(str))
X_test_temp_City = City_LE.transform(X_test_temp['City'].astype(str))

Free_Copy_LE = LabelEncoder()
Free_Copy_LE.fit(X_train_temp['Free_Copy'].astype(str))
X_train_temp_Free_Copy = Free_Copy_LE.transform(X_train_temp['Free_Copy'].astype(str))
X_test_temp_Free_Copy = Free_Copy_LE.transform(X_test_temp['Free_Copy'].astype(str))

Last_Notable_Activity_LE = LabelEncoder()
Last_Notable_Activity_LE.fit(X_train_temp['Last_Notable_Activity'].astype(str))
X_train_temp_Last_Notable_Activity = Last_Notable_Activity_LE.transform(X_train_temp['Last_Notable_Activity'].astype(str))
X_test_temp_Last_Notable_Activity = Last_Notable_Activity_LE.transform(X_test_temp['Last_Notable_Activity'].astype(str))

In [47]:
# Compose the train data set with the categorical columns

X_train = X_train_temp.copy(deep = True)

X_train['Lead_Origin'] = X_train_temp_Lead_Origin
X_train['Lead_Source'] = X_train_temp_Lead_Source
X_train['Do_Not_Email'] = X_train_temp_Do_Not_Email
X_train['Do_Not_Call'] = X_train_temp_Do_Not_Call

X_train['Last_Activity'] = X_train_temp_Last_Activity
X_train['Specialization'] = X_train_temp_Specialization
X_train['Current_Occupation'] = X_train_temp_Current_Occupation
X_train['Search'] = X_train_temp_Search

X_train['Newspaper_Article'] = X_train_temp_Newspaper_Article
X_train['X_Forums'] = X_train_temp_X_Forums
X_train['Newspaper'] = X_train_temp_Newspaper
X_train['Digital_Advertisement'] = X_train_temp_Digital_Advertisement

X_train['Through_Recommendations'] = X_train_temp_Through_Recommendations
X_train['City'] = X_train_temp_City
X_train['Free_Copy'] = X_train_temp_Free_Copy
X_train['Last_Notable_Activity'] = X_train_temp_Last_Notable_Activity

In [52]:
# Compose the test data set with the categorical columns

X_test = X_test_temp.copy(deep = True)

X_test['Lead_Origin'] = X_test_temp_Lead_Origin
X_test['Lead_Source'] = X_test_temp_Lead_Source
X_test['Do_Not_Email'] = X_test_temp_Do_Not_Email
X_test['Do_Not_Call'] = X_test_temp_Do_Not_Call

X_test['Last_Activity'] = X_test_temp_Last_Activity
X_test['Specialization'] = X_test_temp_Specialization
X_test['Current_Occupation'] = X_test_temp_Current_Occupation
X_test['Search'] = X_test_temp_Search

X_test['Newspaper_Article'] = X_test_temp_Newspaper_Article
X_test['X_Forums'] = X_test_temp_X_Forums
X_test['Newspaper'] = X_test_temp_Newspaper
X_test['Digital_Advertisement'] = X_test_temp_Digital_Advertisement

X_test['Through_Recommendations'] = X_test_temp_Through_Recommendations
X_test['City'] = X_test_temp_City
X_test['Free_Copy'] = X_test_temp_Free_Copy
X_test['Last_Notable_Activity'] = X_test_temp_Last_Notable_Activity

In [96]:
# Define and compose model

model_1 = Sequential()

# Define the layers
model_1.add(InputLayer(input_shape = 19))
model_1.add(BatchNormalization())
model_1.add(Dense(100, activation = 'relu'))
model_1.add(Dense(100, activation = 'relu'))
model_1.add(Dense(20, activation = 'relu'))
model_1.add(Dense(10, activation = 'relu'))
model_1.add(Dense(1, activation = 'sigmoid'))

# Compile the model with the required optimizer, loss function and metric
model_1.compile(SGD(learning_rate = 0.001, momentum = 0.7), 'binary_crossentropy', metrics = ['accuracy'])

# Print the model summary
model_1.summary()

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_23 (Batc (None, 19)                76        
_________________________________________________________________
dense_115 (Dense)            (None, 100)               2000      
_________________________________________________________________
dense_116 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_117 (Dense)            (None, 20)                2020      
_________________________________________________________________
dense_118 (Dense)            (None, 10)                210       
_________________________________________________________________
dense_119 (Dense)            (None, 1)                 11        
Total params: 14,417
Trainable params: 14,379
Non-trainable params: 38
________________________________________________

In [97]:
# Fit the model

history_1 = model_1.fit(X_train, y_train, epochs = 100, validation_data = (X_test, y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [98]:
# Saving the simple model in  format

model_1.save("model_1.h5")

In [99]:
# Convert the keras model to tensorflow.js

!pip install tensorflowjs 

Collecting tensorflowjs
[?25l  Downloading https://files.pythonhosted.org/packages/0f/55/e84ad85e325d239e46b7630a4be48cea9909bf57bd082cd5fe0efe7006de/tensorflowjs-2.8.2-py3-none-any.whl (63kB)
[K     |█████▏                          | 10kB 18.6MB/s eta 0:00:01[K     |██████████▍                     | 20kB 21.9MB/s eta 0:00:01[K     |███████████████▌                | 30kB 10.8MB/s eta 0:00:01[K     |████████████████████▊           | 40kB 9.2MB/s eta 0:00:01[K     |█████████████████████████▉      | 51kB 8.8MB/s eta 0:00:01[K     |███████████████████████████████ | 61kB 7.4MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 4.1MB/s 
Collecting tensorflow-hub<0.10,>=0.7.0
[?25l  Downloading https://files.pythonhosted.org/packages/ac/83/a7df82744a794107641dad1decaad017d82e25f0e1f761ac9204829eef96/tensorflow_hub-0.9.0-py2.py3-none-any.whl (103kB)
[K     |███▏                            | 10kB 18.1MB/s eta 0:00:01[K     |██████▍                         | 20kB 24.

In [100]:
# Convert the model to tfjs

!mkdir model_1

!tensorflowjs_converter --input_format keras model_1.h5 model/

2021-01-04 15:30:43.447718: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [101]:
# Zip and download the model

!zip -r model.zip model 

from google.colab import files
files.download('model.zip')

updating: model/ (stored 0%)
updating: model/group1-shard1of1.bin (deflated 7%)
updating: model/model.json (deflated 82%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [103]:
# Classes for predictors

print("\nLead Origin Classes: ", Lead_Origin_LE.classes_)
print("\nLead Source Classes: ", Lead_Source_LE.classes_)
print("\nDo Not Email Classes: ", Do_Not_Email_LE.classes_)
print("\nDo Not Call Classes: ", Do_Not_Call_LE.classes_)
print("\nLast Activity Classes: ", Last_Activity_LE.classes_)
print("\nSpecialization Classes: ", Specialization_LE.classes_)
print("\nCurrent Occupation Classes: ", Current_Occupation_LE.classes_)
print("\nSearch Classes: ", Search_LE.classes_)
print("\nNewspaper Article Classes: ", Newspaper_Article_LE.classes_)
print("\nX Forums Classes: ", X_Forums_LE.classes_)
print("\nNewspaper Classes: ", Newspaper_LE.classes_)
print("\nDigital Advertisement Classes: ", Digital_Advertisement_LE.classes_)
print("\nThrough Recommendations Classes: ", Through_Recommendations_LE.classes_)
print("\nCity Classes: ", City_LE.classes_)
print("\nFree Copy Classes: ", Free_Copy_LE.classes_)
print("\nLast Notable Activity Classes: ", Last_Notable_Activity_LE.classes_)


Lead Origin Classes:  ['API' 'Landing_Page_Submission' 'Lead_Add_Form' 'Lead_Import']

Lead Source Classes:  ['Bing' 'Click2call' 'Direct_Traffic' 'Facebook' 'Google' 'Olark_Chat'
 'Organic_Search' 'Reference' 'Referral_Sites' 'Welingak_Website' 'google']

Do Not Email Classes:  ['No' 'Yes']

Do Not Call Classes:  ['No' 'Yes']

Last Activity Classes:  ['Approached_upfront' 'Converted_to_Lead' 'Email_Bounced'
 'Email_Link_Clicked' 'Email_Opened' 'Form_Submitted_on_Website'
 'Had_a_Phone_Conversation' 'Olark_Chat_Conversation'
 'Page_Visited_on_Website' 'SMS_Sent' 'Unreachable' 'Unsubscribed'
 'View_in_browser_link_Clicked']

Specialization Classes:  ['Banking_Investment_And_Insurance' 'Business_Administration' 'E-Business'
 'E-COMMERCE' 'Finance_Management' 'Healthcare_Management'
 'Hospitality_Management' 'Human_Resource_Management'
 'IT_Projects_Management' 'International_Business' 'Marketing_Management'
 'Media_and_Advertising' 'Operations_Management' 'Others'
 'Retail_Management' '