In [1]:
#Import the necessary libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten,Dense

In [2]:
#Check the version of tensorflow
print(tf.__version__)

2.0.0-rc0


In [3]:
#Import necessary libraries for EDA
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [4]:
#Read the data
dataset=pd.read_csv('Churn_Modelling.csv')

In [5]:
#View the head of read data
dataset.head(5)
#Last Column Exited contains values 1 and 0
#If 1----> Customer will exit the bank
#If 0----> Customer will stay in the bank
#Exited column will be the dependent variable i.e. y

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
#Create the independent variable
#Remove the columns that are not necessary(CustomerId,Surname,RowNumber)
#These are situated in axis 1
X=dataset.drop(labels=['CustomerId','Surname','RowNumber','Exited'],axis=1)
#Create dependent variable
y=dataset['Exited']

In [7]:
#Visualize X
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [8]:
#Artificial Neural Networks work on Numerical Data and not String Data
#Geography,Gender Columns are having string values
#Import Label Encoder
from sklearn.preprocessing import LabelEncoder


In [9]:
#Apply label encoder on Geography column
label1=LabelEncoder()
X['Geography']=label1.fit_transform(X['Geography'])

In [10]:
#Check what happenned to Geography column now
X.head(10)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,Female,42,2,0.0,1,1,1,101348.88
1,608,2,Female,41,1,83807.86,1,0,1,112542.58
2,502,0,Female,42,8,159660.8,3,1,0,113931.57
3,699,0,Female,39,1,0.0,2,0,0,93826.63
4,850,2,Female,43,2,125510.82,1,1,1,79084.1
5,645,2,Male,44,8,113755.78,2,1,0,149756.71
6,822,0,Male,50,7,0.0,2,1,1,10062.8
7,376,1,Female,29,4,115046.74,4,1,0,119346.88
8,501,0,Male,44,4,142051.07,2,0,1,74940.5
9,684,0,Male,27,2,134603.88,1,1,1,71725.73


In [11]:
#Apply Label Encoder on Gender Column
label2=LabelEncoder()
X['Gender']=label2.fit_transform(X['Gender'])

In [12]:
#Let us see what happenned to Gender Column
X.head(10)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1
5,645,2,1,44,8,113755.78,2,1,0,149756.71
6,822,0,1,50,7,0.0,2,1,1,10062.8
7,376,1,0,29,4,115046.74,4,1,0,119346.88
8,501,0,1,44,4,142051.07,2,0,1,74940.5
9,684,0,1,27,2,134603.88,1,1,1,71725.73


In [13]:
#Apply get.dummies to avoid the dummy variable trap
X= pd.get_dummies(X,drop_first=True,columns=['Geography'])
#We do not aplly this concept to Gender as we have only two classes under the Gender

In [14]:
#View what happenned to Geography data
X.head(10)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,1
5,645,1,44,8,113755.78,2,1,0,149756.71,0,1
6,822,1,50,7,0.0,2,1,1,10062.8,0,0
7,376,0,29,4,115046.74,4,1,0,119346.88,1,0
8,501,1,44,4,142051.07,2,0,1,74940.5,0,0
9,684,1,27,2,134603.88,1,1,1,71725.73,0,0


## Train Test Data Creation

In [15]:
#stratify means the y data will be divided symmetrically between 80% and 20%
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0,stratify=y)

## Feature Standardization
- Scales of the featuresi.e columns are varying
- This variation is not recommended in Machine Learning
- All the features should be in same scale

In [16]:
#import the libraries required to perform Feature Scaling
from sklearn.preprocessing import StandardScaler

In [17]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
#Scaling need not be performed on dependent variable y
#This is because y has only two classes of outputs i.e. zero and one

In [18]:
#View the X_train
X_train

array([[-1.24021723, -1.09665089,  0.77986083, ...,  1.64099027,
        -0.57812007, -0.57504086],
       [ 0.75974873,  0.91186722, -0.27382717, ..., -1.55587522,
         1.72974448, -0.57504086],
       [-1.72725557, -1.09665089, -0.9443559 , ...,  1.1038111 ,
        -0.57812007, -0.57504086],
       ...,
       [-0.51484098,  0.91186722,  0.87565065, ..., -1.01507508,
         1.72974448, -0.57504086],
       [ 0.73902369, -1.09665089, -0.36961699, ..., -1.47887193,
        -0.57812007, -0.57504086],
       [ 0.95663657,  0.91186722, -1.32751517, ...,  0.50945854,
        -0.57812007,  1.73900686]])

In [19]:
#View the range of X_train after Feature Scaling
np.max(X_train),np.min(X_train)

(5.090402652968747, -3.1158329756545826)

In [20]:
#View the range of X_test after Feature Scaling
np.max(X_test),np.min(X_test)

(4.707243379956429, -3.1158329756545826)

## Build ANN

In [41]:
model = Sequential()
#X.shape[1] is the number of features i.e. columns of X
model.add(Dense(X.shape[1], activation='relu', input_dim = X.shape[1]))

#Add another hidden layer with 128 neurons
model.add(Dense(128, activation='relu'))

#Add output layer
#It should have only one node because its values are either 0 or one
model.add(Dense(1, activation = 'sigmoid'))

In [42]:
type(X.shape[1])

int

# Compile Model

In [43]:
#binary_crossentropy because the output has only two classes i.e. zero and one
model.compile(optimizer='adam', loss = 'binary_crossentropy', metrics=['accuracy'])


In [44]:
type(X_train),type(y_train)

(numpy.ndarray, pandas.core.series.Series)

## Fit the Model

In [45]:
#X_train is an array
#So, y_train should also be an array
model.fit(X_train,y_train.to_numpy(),batch_size=10,epochs=10,verbose=1)
#If the loss is not decreasing even after increasing, that means the model is overfitting
#So, keep the epochs as 10

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2681985bb48>

## Model Prediction

In [46]:
y_pred=model.predict_classes(X_test)

In [47]:
y_pred

(array([[0],
        [0],
        [0],
        ...,
        [0],
        [1],
        [0]]),
 1344    1
 8167    0
 4747    0
 5004    1
 3124    1
 1940    1
 2090    0
 3298    0
 8364    1
 9485    0
 3088    0
 617     1
 7211    0
 9253    0
 9743    0
 1546    0
 3475    0
 6623    0
 7594    0
 2347    1
 9583    0
 6473    0
 8104    0
 6486    0
 9747    1
 6187    0
 2458    0
 5875    0
 9317    1
 7170    0
        ..
 9086    0
 2249    0
 1971    0
 8379    1
 6450    0
 7223    1
 6143    0
 7582    0
 4679    0
 7060    0
 9709    0
 1045    0
 7141    0
 5148    0
 4868    0
 3407    0
 5400    0
 9241    0
 9454    0
 1046    1
 7791    0
 959     0
 6488    0
 1487    1
 9574    0
 9107    0
 8249    0
 8337    0
 6279    1
 412     0
 Name: Exited, Length: 2000, dtype: int64)

In [48]:
y_test

1344    1
8167    0
4747    0
5004    1
3124    1
1940    1
2090    0
3298    0
8364    1
9485    0
3088    0
617     1
7211    0
9253    0
9743    0
1546    0
3475    0
6623    0
7594    0
2347    1
9583    0
6473    0
8104    0
6486    0
9747    1
6187    0
2458    0
5875    0
9317    1
7170    0
       ..
9086    0
2249    0
1971    0
8379    1
6450    0
7223    1
6143    0
7582    0
4679    0
7060    0
9709    0
1045    0
7141    0
5148    0
4868    0
3407    0
5400    0
9241    0
9454    0
1046    1
7791    0
959     0
6488    0
1487    1
9574    0
9107    0
8249    0
8337    0
6279    1
412     0
Name: Exited, Length: 2000, dtype: int64

## Model Evaluation

In [49]:
#Calculate Accuracy of test data
model.evaluate(X_test,y_test.to_numpy())



[0.3564716470241547, 0.8545]

In [50]:
#Calculate Confusion Matrix
from sklearn.metrics import confusion_matrix,accuracy_score

In [52]:
confusion_matrix(y_test,y_pred)

array([[1523,   70],
       [ 221,  186]], dtype=int64)

In [53]:
accuracy_score(y_test,y_pred)

0.8545