# ANN-Classification-model-to-predict-the-Customer-Churn

Step 1: Import Libraries

In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from keras.models import Sequential
from keras.layers import Dense

Step 2: Load the Dataset

In [28]:
url = "https://drive.google.com/file/d/............."  # Your File drive link
path = 'https://drive.google.com/uc?id=' + url.split('/')[-2]
df = pd.read_csv(path)

In [29]:
print(df.head())

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

Step 3: Data Preprocessing

In [30]:
# Remove null values
df = df.dropna()

# Drop unnecessary columns
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# Label Encoding for categorical columns
label_encoder = LabelEncoder()
df['Geography'] = label_encoder.fit_transform(df['Geography'])
df['Gender'] = label_encoder.fit_transform(df['Gender'])

print(df.head())

   CreditScore  Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0          619          0       0   42       2       0.00              1   
1          608          2       0   41       1   83807.86              1   
2          502          0       0   42       8  159660.80              3   
3          699          0       0   39       1       0.00              2   
4          850          2       0   43       2  125510.82              1   

   HasCrCard  IsActiveMember  EstimatedSalary  Exited  
0          1               1        101348.88       1  
1          0               1        112542.58       0  
2          1               0        113931.57       1  
3          0               0         93826.63       0  
4          1               1         79084.10       0  


In [31]:
# Handling duplicate records
df = df.drop_duplicates()

# Splitting into x (independent variables) and y (dependent variable)
x = df.drop('Exited', axis=1)
y = df['Exited']

# Standardize the data
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [32]:
print("x (Independent Variables):")
print(x[:5])  # Displaying the first 5 rows
print("\ny (Dependent Variable):")
print(y[:5])  # Displaying the first 5 rows

x (Independent Variables):
[[-0.32622142 -0.90188624 -1.09598752  0.29351742 -1.04175968 -1.22584767
  -0.91158349  0.64609167  0.97024255  0.02188649]
 [-0.44003595  1.51506738 -1.09598752  0.19816383 -1.38753759  0.11735002
  -0.91158349 -1.54776799  0.97024255  0.21653375]
 [-1.53679418 -0.90188624 -1.09598752  0.29351742  1.03290776  1.33305335
   2.52705662  0.64609167 -1.03067011  0.2406869 ]
 [ 0.50152063 -0.90188624 -1.09598752  0.00745665 -1.38753759 -1.22584767
   0.80773656 -1.54776799 -1.03067011 -0.10891792]
 [ 2.06388377  1.51506738 -1.09598752  0.38887101 -1.04175968  0.7857279
  -0.91158349  0.64609167  0.97024255 -0.36527578]]

y (Dependent Variable):
0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64


Step 4: Split Data into Training and Test Sets

In [33]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print("Training set shape:", x_train.shape, y_train.shape)
print("Test set shape:", x_test.shape, y_test.shape)

Training set shape: (8000, 10) (8000,)
Test set shape: (2000, 10) (2000,)


Step 5: Build and Train the ANN Model

In [34]:
# Step 5: Build and Train the ANN Model
model = Sequential()
model.add(Dense(units=8, activation='relu', input_dim=10))  # Adjust input_dim to 10
model.add(Dense(units=8, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a5e9e987c0>

Step 6: Predict the Exited Status for Test Data

In [35]:
y_pred = model.predict(x_test)
y_pred = (y_pred > 0.5)

print(y_pred)

[[False]
 [False]
 [False]
 ...
 [False]
 [False]
 [False]]


Step 7: Compute Classification Metrics

In [36]:
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [37]:
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)

Confusion Matrix:
 [[1559   48]
 [ 269  124]]

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.97      0.91      1607
           1       0.72      0.32      0.44       393

    accuracy                           0.84      2000
   macro avg       0.79      0.64      0.67      2000
weighted avg       0.83      0.84      0.82      2000



In [38]:
_, accuracy = model.evaluate(x_test, y_test)
print(f"\nStep 8: Report the ANN Model Accuracy\nANN Model Accuracy: {accuracy * 100:.2f}%")


Step 8: Report the ANN Model Accuracy
ANN Model Accuracy: 84.15%
