# Artificial Neural Network - Predict bank customer churn

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.20.0'

## Data Preprocessing

### Loading the dataset

In [3]:
dataset = pd.read_csv('Churn_Modelling.csv')
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [4]:
df = dataset.copy()
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


### Dropping irrelevant column

In [5]:
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)
df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


### Check missing data

In [6]:
df.isna().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

### Create dependent & independent variable

In [7]:
X = df.iloc[:,:-1]
y = df.iloc[:, -1]
X, y

(      CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
 0             619    France  Female   42       2       0.00              1   
 1             608     Spain  Female   41       1   83807.86              1   
 2             502    France  Female   42       8  159660.80              3   
 3             699    France  Female   39       1       0.00              2   
 4             850     Spain  Female   43       2  125510.82              1   
 ...           ...       ...     ...  ...     ...        ...            ...   
 9995          771    France    Male   39       5       0.00              2   
 9996          516    France    Male   35      10   57369.61              1   
 9997          709    France  Female   36       7       0.00              1   
 9998          772   Germany    Male   42       3   75075.31              2   
 9999          792    France  Female   28       4  130142.79              1   
 
       HasCrCard  IsActiveMember  EstimatedSalary 

### Encoding categorical data

#### One hot encoding of independent categorical variable

In [8]:
X = pd.get_dummies(X, columns=['Gender', 'Geography'], drop_first=True)
X

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Gender_Male,Geography_Germany,Geography_Spain
0,619,42,2,0.00,1,1,1,101348.88,False,False,False
1,608,41,1,83807.86,1,0,1,112542.58,False,False,True
2,502,42,8,159660.80,3,1,0,113931.57,False,False,False
3,699,39,1,0.00,2,0,0,93826.63,False,False,False
4,850,43,2,125510.82,1,1,1,79084.10,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,True,False,False
9996,516,35,10,57369.61,1,1,1,101699.77,True,False,False
9997,709,36,7,0.00,1,0,1,42085.58,False,False,False
9998,772,42,3,75075.31,2,1,0,92888.52,True,True,False


In [9]:
train_columns = X.columns.tolist()
train_columns

['CreditScore',
 'Age',
 'Tenure',
 'Balance',
 'NumOfProducts',
 'HasCrCard',
 'IsActiveMember',
 'EstimatedSalary',
 'Gender_Male',
 'Geography_Germany',
 'Geography_Spain']

### Splitting the dataset into the Training and Test

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [11]:
X_train, X_test

(      CreditScore  Age  Tenure    Balance  NumOfProducts  HasCrCard  \
 5525          624   51      10  123401.43              2          1   
 5034          631   29       2   96863.52              2          1   
 4709          554   31       1       0.00              2          0   
 4218          589   19       9   83495.11              1          1   
 917           646   45       3   47134.75              1          1   
 ...           ...  ...     ...        ...            ...        ...   
 664           661   41       1       0.00              2          0   
 7540          553   37       1       0.00              1          1   
 7221          625   41       6   97663.16              2          1   
 1318          598   43       5       0.00              3          1   
 8915          606   36       1  155655.46              1          1   
 
       IsActiveMember  EstimatedSalary  Gender_Male  Geography_Germany  \
 5525               1        127825.25         True         

In [12]:
y_train, y_test

(5525    0
 5034    0
 4709    0
 4218    1
 917     0
        ..
 664     0
 7540    0
 7221    0
 1318    1
 8915    1
 Name: Exited, Length: 8000, dtype: int64,
 2374    0
 1784    0
 6301    0
 1600    0
 7920    0
        ..
 8623    0
 5928    0
 6714    0
 5885    0
 7289    0
 Name: Exited, Length: 2000, dtype: int64)

### Feature Scaling

In [13]:
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()
X_train = std_scaler.fit_transform(X_train)
X_test = std_scaler.transform(X_test)

In [14]:
X_train, X_test

(array([[-0.28222475,  1.14919188,  1.72112731, ...,  0.91393836,
         -0.57985213, -0.57407859],
        [-0.2101288 , -0.95483262, -1.04325013, ..., -1.0941657 ,
          1.7245776 , -0.57407859],
        [-1.00318425, -0.76355766, -1.38879731, ...,  0.91393836,
         -0.57985213, -0.57407859],
        ...,
        [-0.27192532,  0.19281711,  0.33893859, ...,  0.91393836,
         -0.57985213, -0.57407859],
        [-0.5500097 ,  0.38409206, -0.00660859, ..., -1.0941657 ,
         -0.57985213, -0.57407859],
        [-0.46761433, -0.28537028, -1.38879731, ...,  0.91393836,
         -0.57985213, -0.57407859]], shape=(8000, 11)),
 array([[-0.12773342, -0.95483262, -0.35215577, ...,  0.91393836,
         -0.57985213,  1.74192178],
        [ 0.06795558, -0.28537028,  1.03003295, ...,  0.91393836,
         -0.57985213, -0.57407859],
        [ 1.1802931 ,  0.57536702,  0.33893859, ..., -1.0941657 ,
          1.7245776 , -0.57407859],
        ...,
        [ 1.78795896, -0.28537028, -

## Designing the ANN

### Initialize the ANN (Sequence of Layers)

In [15]:
from keras.models import Sequential
ann_model = Sequential()

### Adding the I/P layer and hidden layer

In [16]:
from keras.layers import Input, Dense
ann_model.add(Input(shape=(11,)))
ann_model.add(Dense(units=16, activation='relu'))

In [17]:
import visualkeras
# visualkeras.layered_view(ann_model)

### Adding the second hidden layer

In [18]:
ann_model.add(Dense(units=8, activation='relu'))
# visualkeras.layered_view(ann_model)

### Adding the output layer

In [19]:
ann_model.add(Dense(units=1, activation='sigmoid'))
# visualkeras.layered_view(ann_model)

In [20]:
# from tensorflow.keras.utils import plot_model
# plot_model(ann_model, to_file='model.png', show_shapes=True, show_layer_names=True)

## Training the model

### Compliling the ANN

In [21]:
ann_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', 'AUC'])

### Training the ANN on the training set

In [22]:
ann_model.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 493us/step - AUC: 0.6750 - accuracy: 0.7969 - loss: 0.4783
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464us/step - AUC: 0.7594 - accuracy: 0.7969 - loss: 0.4371
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456us/step - AUC: 0.7817 - accuracy: 0.7968 - loss: 0.4231
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 451us/step - AUC: 0.7998 - accuracy: 0.8119 - loss: 0.4108
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452us/step - AUC: 0.8152 - accuracy: 0.8235 - loss: 0.3997
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 460us/step - AUC: 0.8261 - accuracy: 0.8320 - loss: 0.3904
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455us/step - AUC: 0.8339 - accuracy: 0.8409 - loss: 0.3830
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457us/step - AUC: 0.8805 - accuracy: 0.8709 - loss: 0.3163
Epoch 61/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455us/step - AUC: 0.8804 - accuracy: 0.8726 - loss: 0.3160
Epoch 62/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443us/step - AUC: 0.8809 - accuracy: 0.8714 - loss: 0.3161
Epoch 63/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 439us/step - AUC: 0.8809 - accuracy: 0.8712 - loss: 0.3158
Epoch 64/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443us/step - AUC: 0.8820 - accuracy: 0.8734 - loss: 0.3148
Epoch 65/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 453us/step - AUC: 0.8811 - accuracy: 0.8705 - loss: 0.3152
Epoch 66/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452us/step - AUC: 0.8816 - accuracy: 0.8731 - loss: 0.3146
Epoch 67/100
[1m250/250[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x164609dc0>

## Making the prediction

### Prediction for Test dataset

In [23]:
accuracy_test = ann_model.evaluate(X_test, y_test)
accuracy_test

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590us/step - AUC: 0.8516 - accuracy: 0.8525 - loss: 0.3507


[0.3507489562034607, 0.8525000214576721, 0.8516314625740051]

In [24]:
y_pred = ann_model.predict(X_test)
y_pred = (y_pred > 0.5)
np.concatenate([y_pred.reshape(len(y_pred), -1), y_test.values.reshape(len(y_test), -1)], axis=1)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 530us/step


array([[0, 0],
       [0, 0],
       [1, 0],
       ...,
       [0, 0],
       [0, 0],
       [1, 0]], shape=(2000, 2))

### Evaluate the confusion matrix and accuracy_score

In [25]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test.values, y_pred)

array([[1503,   85],
       [ 210,  202]])

In [26]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test.values, y_pred)

0.8525

### Predicting the result of single observation

#### Create dataframe for prediction

In [27]:
df_prediction = pd.DataFrame([{
    "Geography": "France",
    "CreditScore": 600,
    "Gender": "Male",
    "Age": 40,
    "Tenure": 3,
    "Balance": 60000,
    "NumOfProducts": 2,
    "EstimatedSalary": 50000,
    "HasCrCard": 1,
    "IsActiveMember":1
}])

#### Implementing and one hot encoding to categorical variable

In [28]:
df_prediction = pd.get_dummies(df_prediction, columns=['Gender', 'Geography'])
df_prediction = df_prediction.reindex(columns=train_columns, fill_value=0)
df_prediction

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Gender_Male,Geography_Germany,Geography_Spain
0,600,40,3,60000,2,1,1,50000,True,0,0


#### Implement scaling to match the train data row and columns

In [29]:
df_prediction_scaled = std_scaler.transform(df_prediction)
df_prediction_scaled

array([[-0.52941086,  0.09717963, -0.69770295, -0.26388705,  0.82546853,
         0.64434291,  0.97725852, -0.87812086,  0.91393836, -0.57985213,
        -0.57407859]])

#### Predict the probabilities (due to sigmoid o/p layer)

In [30]:
df_prediction_prob = ann_model.predict(df_prediction_scaled) 
df_prediction_prob

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


array([[0.05307873]], dtype=float32)

In [31]:
pred = (df_prediction_prob > 0.5)[0]
pred

array([False])