### Data pre-processing part

In [1]:
# Import necessary libraries
# pandas for data manipulation, sklearn for preprocessing and splitting, pickle for saving encoders and scalers

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle

In [2]:
# Step 1: Load the dataset
# Load the customer churn dataset into a pandas DataFrame

data = pd.read_csv("Churn_Modelling.csv")
data


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [3]:
# Step 2: Drop irrelevant columns
# Remove unnecessary columns like RowNumber, CustomerId, and Surname as they don't contribute to predictions

data = data.drop(['RowNumber','CustomerId','Surname'],axis=1)
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [4]:
# Step 3: Encode categorical variables
# Convert the Gender column from text (Male/Female) to numbers (1/0) using LabelEncoder

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


In [5]:
# One-hot encoding for Geography column
# Geography has more than two categories, so use OneHotEncoder to create separate columns for each country
# This prevents the model from misinterpreting one category as "greater" than another

from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data['Geography'].values.reshape(-1, 1))

geo_encoder


<10000x3 sparse matrix of type '<class 'numpy.float64'>'
	with 10000 stored elements in Compressed Sparse Row format>

In [6]:
# Display the one-hot encoded column names for Geography

onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [7]:
# Convert the one-hot encoded array into a DataFrame with appropriate column names

geo_encoder_df = pd.DataFrame(
    geo_encoder.toarray(),
    columns=onehot_encoder_geo.get_feature_names_out(['Geography'])
)
geo_encoder_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [8]:
# combine one hot result with original data

data = pd.concat([data.drop('Geography',axis=1),geo_encoder_df],axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [9]:
# Save encoders for future use
# Save the LabelEncoder for Gender and OneHotEncoder for Geography using pickle

with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)

In [10]:
#divide the data into independent and dependent features
x = data.drop('Exited',axis=1)
y = data['Exited']

#split to train and test
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

# scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
# Save the StandardScaler using pickle to apply the same scaling to new data

with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

### ANN implementation

Steps to Code an Artificial Neural Network (ANN)

1. Define Network Architecture:
Use a Sequential model.
Add Dense layers (e.g., 64 units per layer).

2. Choose Activation Functions:
Options: Sigmoid, tanh, ReLU, Leaky ReLU.

3. Set Optimizer:
Use an optimizer (e.g., Adam, SGD) for backpropagation to update weights.

4. Specify Loss Function:
Select a loss function suitable for the problem (e.g., Mean Squared Error, Cross-Entropy).

5. Define Metrics:
Example metrics: accuracy, MAE (Mean Absolute Error), MSE (Mean Squared Error).

6. Train the Model:
Log training progress to a folder.
Use TensorBoard for visualization of metrics like loss and accuracy.

In [12]:
'''
TensorFlow: A popular library for building and training machine learning models, including neural networks.
Sequential: A type of model in Keras where layers are added one after another (sequentially).
Dense Layer: A layer where each neuron is connected to every neuron in the next layer.
EarlyStopping: A callback that stops training if the model's performance stops improving (to avoid overfitting).
TensorBoard: A visualization tool to monitor training progress, including metrics and losses.
datetime: Used to create unique folder names based on the current date and time.
'''
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [13]:
# build our ANN model

#Define Network Architecture

'''
Sequential([ ]): Combines multiple layers into a single model.
Dense(64, activation='relu', input_shape=...):
64: Number of neurons in the first hidden layer.
activation='relu': Activation function that outputs positive values (helps introduce non-linearity).
input_shape=(X_train.shape[1],): Specifies the number of input features (e.g., number of columns in the dataset).
Dense(32, activation='relu'): The second hidden layer with 32 neurons and ReLU activation.
Dense(1, activation='sigmoid'):
1: The output layer has one neuron since it's a binary classification (churn or not).
sigmoid: Outputs a probability between 0 and 1 for classification tasks.
'''

model = Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), #hiddenlayer1 connected with input
    Dense(32,activation='relu'), # hl2
    Dense(1,activation='sigmoid') # output layer
]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.summary()

In [15]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
loss=tensorflow.keras.losses.BinaryCrossentropy()
loss

'''  
Optimizer (Adam): Adjusts the weights of the model to reduce error during training.
learning_rate=0.01: Controls how big the steps are when adjusting weights.
BinaryCrossentropy: A loss function used for binary classification problems.
It calculates the difference between the predicted probability and the actual label.
'''

'  \nOptimizer (Adam): Adjusts the weights of the model to reduce error during training.\nlearning_rate=0.01: Controls how big the steps are when adjusting weights.\nBinaryCrossentropy: A loss function used for binary classification problems.\nIt calculates the difference between the predicted probability and the actual label.\n'

In [16]:
# compule the model
model.compile(optimizer=opt,loss='binary_crossentropy',metrics=['accuracy'])

'''
Prepares the model for training by specifying:
optimizer: Adam optimizer to adjust weights.
loss: Binary cross-entropy to calculate errors.
metrics: Measures performance during training (e.g., accuracy).
'''

'\nPrepares the model for training by specifying:\noptimizer: Adam optimizer to adjust weights.\nloss: Binary cross-entropy to calculate errors.\nmetrics: Measures performance during training (e.g., accuracy).\n'

In [22]:
# set up the Tensorboard

import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
import datetime

log_dir = 'logs/fit/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)


# Optionally initialize EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10,restore_best_weights=True)

In [23]:
# train

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[tensorboard_callback, early_stopping]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8560 - loss: 0.3417 - val_accuracy: 0.8570 - val_loss: 0.3470
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8631 - loss: 0.3371 - val_accuracy: 0.8545 - val_loss: 0.3487
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8608 - loss: 0.3368 - val_accuracy: 0.8625 - val_loss: 0.3374
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8642 - loss: 0.3287 - val_accuracy: 0.8590 - val_loss: 0.3476
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8710 - loss: 0.3212 - val_accuracy: 0.8610 - val_loss: 0.3407
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8678 - loss: 0.3141 - val_accuracy: 0.8620 - val_loss: 0.3458
Epoch 7/100
[1m250/2

In [24]:
model.save('model.h5')



In [27]:
#load Tensorboard Extension
%reload_ext tensorboard


In [28]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6008 (pid 23800), started 2 days, 21:43:15 ago. (Use '!kill 23800' to kill it.)