In [1]:
import torch

In [2]:
print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Number of GPU:  1
GPU Name:  NVIDIA GeForce RTX 2070 Super with Max-Q Design
Using device: cuda


In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

## Read Data

In [2]:
main_data = pd.read_csv('Churn_Modelling.csv')
df = pd.DataFrame(main_data)

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### Remove unwanted features

In [4]:
df.drop(["RowNumber","Surname", "CustomerId"],axis = 1, inplace = True)

In [5]:
tran_Gen = LabelEncoder().fit_transform(df["Gender"])
tran_Gen = pd.DataFrame(tran_Gen, columns = ["Gender"])

In [6]:
tran_Gen

Unnamed: 0,Gender
0,0
1,0
2,0
3,0
4,0
...,...
9995,1
9996,1
9997,0
9998,1


#### <font color = 'red'> -- Not applying LabelEncoder on Geography as it will convert items --> 1,2,3..</color> 

In [7]:
# Both will do the same thing
#tran_Geo = pd.get_dummies(df['Geography']).replace({True:1,False:0})
tran_Geo = pd.get_dummies(df['Geography'],drop_first = False).astype(int)
tran_Geo = pd.DataFrame(tran_Geo)

#### <font color = 'red'> -- OneHotEncoder Expected a 2-dimensional container</color>

In [8]:
tran_Geo_One = OneHotEncoder().fit_transform([df['Geography']])
tran_Geo_One.toarray()

array([[1., 1., 1., ..., 1., 1., 1.]])

In [9]:
with open('tran_Geo.pkl', 'wb') as file:
    pickle.dump(tran_Geo,file)
with open('tran_gen.pkl','wb') as file:
    pickle.dump(tran_Gen,file)

#### Drop Geography, Gender

In [10]:
df.drop(["Geography","Gender"],axis = 1, inplace = True)

#### Concatination

In [11]:
result = pd.concat([df, tran_Gen, tran_Geo],axis = 1)

In [12]:
result

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Gender,France,Germany,Spain
0,619,42,2,0.00,1,1,1,101348.88,1,0,1,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,0,1
2,502,42,8,159660.80,3,1,0,113931.57,1,0,1,0,0
3,699,39,1,0.00,2,0,0,93826.63,0,0,1,0,0
4,850,43,2,125510.82,1,1,1,79084.10,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,0,1,1,0,0
9996,516,35,10,57369.61,1,1,1,101699.77,0,1,1,0,0
9997,709,36,7,0.00,1,0,1,42085.58,1,0,1,0,0
9998,772,42,3,75075.31,2,1,0,92888.52,1,1,0,1,0


#### Train Test Split

In [13]:
y = df['Exited']
x = result.drop('Exited',axis=1)

In [14]:
x.columns

Index(['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Gender', 'France', 'Germany',
       'Spain'],
      dtype='object')

In [15]:
with open ('Feature.pkl','wb') as file:
    pickle.dump(y,file)
with open ('Label.pkl','wb') as file:
    pickle.dump(x,file)

In [18]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.3, random_state = 12)

In [19]:
x_train = StandardScaler().fit_transform(x_train)
x_test = StandardScaler().fit_transform(x_test)

In [20]:
x_test

array([[-0.02618629, -0.57636637,  0.71445295, ..., -1.002002  ,
         1.73051257, -0.57529746],
       [ 0.41689225, -0.19210078,  1.40197783, ...,  0.998002  ,
        -0.57786347, -0.57529746],
       [ 2.05525243,  0.28823122,  1.05821539, ...,  0.998002  ,
        -0.57786347, -0.57529746],
       ...,
       [ 1.20000781,  0.86462962,  1.40197783, ...,  0.998002  ,
        -0.57786347, -0.57529746],
       [-0.23226933, -1.63309677, -0.66059683, ..., -1.002002  ,
        -0.57786347,  1.7382312 ],
       [ 1.80795278,  0.19216482,  0.02692806, ..., -1.002002  ,
        -0.57786347,  1.7382312 ]])

In [21]:
x_train

array([[-0.78281587,  0.77104149,  1.36905231, ..., -1.00314781,
         1.7267882 , -0.5731713 ],
       [-2.22357946,  0.58093246, -0.71149902, ..., -1.00314781,
         1.7267882 , -0.5731713 ],
       [ 0.0774962 ,  0.10565988, -0.01798191, ...,  0.99686207,
        -0.57910982, -0.5731713 ],
       ...,
       [-1.28034574, -0.65477624,  0.6755352 , ...,  0.99686207,
        -0.57910982, -0.5731713 ],
       [-1.31144136,  0.48587794,  1.71581086, ...,  0.99686207,
        -0.57910982, -0.5731713 ],
       [ 0.8134258 , -1.22510333, -0.36474046, ..., -1.00314781,
         1.7267882 , -0.5731713 ]])

In [22]:
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [23]:
model = Sequential([
    Dense(60, activation = 'relu', input_shape = (x_train.shape[1],)), # Layer 1 Hidden, Input -- x_train parameters {13}
    Dense(24, activation = 'relu'), # Hidden 2
    Dense(1, activation = 'sigmoid') # Output Layer
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
model.summary()

In [25]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [26]:
log_dir="logs/fit/" + datetime.datetime.now().strftime("%d_%m_%y--%H_%M_")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [27]:
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=20,restore_best_weights=True)

In [28]:
model_ = model.fit(
    x_train, y_train, 
    validation_data=(x_test, y_test),  # make sure to use y_test for validation
    epochs=1000, 
    callbacks=[tensorflow_callback, early_stopping_callback]
)

Epoch 1/1000
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7501 - loss: 0.5242 - val_accuracy: 0.8107 - val_loss: 0.4291
Epoch 2/1000
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8317 - loss: 0.4023 - val_accuracy: 0.8410 - val_loss: 0.3914
Epoch 3/1000
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8524 - loss: 0.3635 - val_accuracy: 0.8540 - val_loss: 0.3673
Epoch 4/1000
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8626 - loss: 0.3445 - val_accuracy: 0.8530 - val_loss: 0.3655
Epoch 5/1000
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8600 - loss: 0.3377 - val_accuracy: 0.8550 - val_loss: 0.3551
Epoch 6/1000
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8604 - loss: 0.3390 - val_accuracy: 0.8573 - val_loss: 0.3590
Epoch 7/1000
[1

In [30]:
model.save('model.h5')



In [30]:
model.save('model.keras')

In [38]:
!pip install shap

Collecting shap
  Downloading shap-0.46.0-cp39-cp39-win_amd64.whl.metadata (25 kB)
Collecting tqdm>=4.27.0 (from shap)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting slicer==0.0.8 (from shap)
  Downloading slicer-0.0.8-py3-none-any.whl.metadata (4.0 kB)
Collecting numba (from shap)
  Downloading numba-0.60.0-cp39-cp39-win_amd64.whl.metadata (2.8 kB)
Collecting cloudpickle (from shap)
  Downloading cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB)
Collecting llvmlite<0.44,>=0.43.0dev0 (from numba->shap)
  Downloading llvmlite-0.43.0-cp39-cp39-win_amd64.whl.metadata (4.9 kB)
Downloading shap-0.46.0-cp39-cp39-win_amd64.whl (456 kB)
Downloading slicer-0.0.8-py3-none-any.whl (15 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Downloading cloudpickle-3.1.0-py3-none-any.whl (22 kB)
Downloading numba-0.60.0-cp39-cp39-win_amd64.whl (2.7 MB)
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   --------------- ------------------------ 1.0/2.7 M

In [None]:
import shap

# Assuming your model is a trained TensorFlow model and X_train is your training data
explainer = shap.KernelExplainer(model.predict, x_train)
shap_values = explainer.shap_values(x_train)

# SHAP summary plot to visualize feature importance
shap.summary_plot(shap_values, X_train)


  from .autonotebook import tqdm as notebook_tqdm


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 881us/step


  0%|                                                                                                                                                                                                                                        | 0/7000 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m318s[0m 701us/step


  0%|                                                                                                                                                                                                                          | 1/7000 [08:39<1010:14:51, 519.63s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m353s[0m 779us/step


  0%|                                                                                                                                                                                                                           | 2/7000 [16:57<985:10:57, 506.81s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 624us/step


  0%|                                                                                                                                                                                                                           | 3/7000 [24:03<913:30:14, 470.00s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m293s[0m 647us/step


  0%|▏                                                                                                                                                                                                                          | 4/7000 [31:34<898:56:27, 462.58s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 608us/step


  0%|▏                                                                                                                                                                                                                          | 5/7000 [38:31<866:48:47, 446.11s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 633us/step


  0%|▏                                                                                                                                                                                                                          | 6/7000 [45:40<855:08:27, 440.16s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m333s[0m 734us/step


  0%|▏                                                                                                                                                                                                                          | 7/7000 [53:36<877:49:03, 451.90s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m295s[0m 651us/step


  0%|▏                                                                                                                                                                                                                        | 8/7000 [1:00:54<869:03:50, 447.46s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 636us/step


  0%|▎                                                                                                                                                                                                                        | 9/7000 [1:08:06<859:24:28, 442.55s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 577us/step


  0%|▎                                                                                                                                                                                                                       | 10/7000 [1:14:49<835:43:44, 430.42s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 604us/step


  0%|▎                                                                                                                                                                                                                       | 11/7000 [1:21:44<826:37:23, 425.79s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16586s[0m 37ms/step


  0%|▎                                                                                                                                                                                                                    | 12/7000 [6:00:42<10459:00:08, 5388.15s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 676us/step


  0%|▍                                                                                                                                                                                                                     | 13/7000 [6:08:15<7556:01:27, 3893.19s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 641us/step


  0%|▍                                                                                                                                                                                                                     | 14/7000 [6:15:32<5528:34:38, 2848.97s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 637us/step


  0%|▍                                                                                                                                                                                                                     | 15/7000 [6:22:46<4115:44:30, 2121.21s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 606us/step


  0%|▍                                                                                                                                                                                                                     | 16/7000 [6:29:45<3121:07:03, 1608.82s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 563us/step


  0%|▌                                                                                                                                                                                                                     | 17/7000 [6:36:22<2413:49:23, 1244.42s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m645s[0m 1ms/step


  0%|▌                                                                                                                                                                                                                     | 18/7000 [6:49:30<2147:30:40, 1107.28s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 598us/step


  0%|▌                                                                                                                                                                                                                      | 19/7000 [6:56:29<1746:18:44, 900.55s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m453250/453250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 607us/step


  0%|▌                                                                                                                                                                                                                      | 20/7000 [7:03:33<1468:43:17, 757.51s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m301441/453250[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m1:35[0m 632us/step

In [42]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [43]:
%tensorboard --logdir logs/fit