## **Predicting Customer Churn**

Data Load and Preprocessing

In [1]:
import pandas as pd 
import numpy as np 

In [2]:
df = pd.read_csv('BankChurners.csv')
df

Unnamed: 0,CLIENTNUM,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,...,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1,Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2
0,768805383,Existing Customer,45,M,3,High School,Married,$60K - $80K,Blue,39,...,12691.0,777,11914.0,1.335,1144,42,1.625,0.061,0.000093,0.999910
1,818770008,Existing Customer,49,F,5,Graduate,Single,Less than $40K,Blue,44,...,8256.0,864,7392.0,1.541,1291,33,3.714,0.105,0.000057,0.999940
2,713982108,Existing Customer,51,M,3,Graduate,Married,$80K - $120K,Blue,36,...,3418.0,0,3418.0,2.594,1887,20,2.333,0.000,0.000021,0.999980
3,769911858,Existing Customer,40,F,4,High School,Unknown,Less than $40K,Blue,34,...,3313.0,2517,796.0,1.405,1171,20,2.333,0.760,0.000134,0.999870
4,709106358,Existing Customer,40,M,3,Uneducated,Married,$60K - $80K,Blue,21,...,4716.0,0,4716.0,2.175,816,28,2.500,0.000,0.000022,0.999980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10122,772366833,Existing Customer,50,M,2,Graduate,Single,$40K - $60K,Blue,40,...,4003.0,1851,2152.0,0.703,15476,117,0.857,0.462,0.000191,0.999810
10123,710638233,Attrited Customer,41,M,2,Unknown,Divorced,$40K - $60K,Blue,25,...,4277.0,2186,2091.0,0.804,8764,69,0.683,0.511,0.995270,0.004729
10124,716506083,Attrited Customer,44,F,1,High School,Married,Less than $40K,Blue,36,...,5409.0,0,5409.0,0.819,10291,60,0.818,0.000,0.997880,0.002118
10125,717406983,Attrited Customer,30,M,2,Graduate,Unknown,$40K - $60K,Blue,36,...,5281.0,0,5281.0,0.535,8395,62,0.722,0.000,0.996710,0.003294


Remove Unnecessary columns

In [3]:
df = df.drop(columns=[
    'CLIENTNUM',
    'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1',
    'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2'
])

In [None]:
leak_check = (df['Avg_Open_To_Buy'] == df['Credit_Limit'] - df['Total_Revolving_Bal']).mean()
print(f"Leakage check: {leak_check * 100:.2f}% of Avg_Open_To_Buy = Credit_Limit - Total_Revolving_Bal")

Leakage check: 99.60% of Avg_Open_To_Buy = Credit_Limit - Total_Revolving_Bal


Encoding 

In [12]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [7]:
categorical_columns = df.select_dtypes(include = 'object').columns
le = LabelEncoder()
for col in categorical_columns:
    df[col] = le.fit_transform(df[col])

Split ans scale 

In [14]:
from sklearn.model_selection import train_test_split

In [8]:
x = df.drop(columns = ['Avg_Open_To_Buy'])
y = df['Avg_Open_To_Buy']

In [13]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [17]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

Build ANN Model

In [24]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

from sklearn.metrics import r2_score

In [19]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(x_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='linear')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics=['mae'])

In [21]:
model.fit(x_train,y_train,epochs=8, batch_size=32, validation_split=0.2)

Epoch 1/8
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 140095552.0000 - mae: 7539.4302 - val_loss: 141150192.0000 - val_mae: 7411.9839
Epoch 2/8
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 122345688.0000 - mae: 6982.6260 - val_loss: 117995312.0000 - val_mae: 6678.7637
Epoch 3/8
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 104464248.0000 - mae: 6326.8066 - val_loss: 74684560.0000 - val_mae: 5224.3955
Epoch 4/8
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 59441208.0000 - mae: 4724.7549 - val_loss: 37417904.0000 - val_mae: 3807.6289
Epoch 5/8
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 31243308.0000 - mae: 3555.0049 - val_loss: 21522264.0000 - val_mae: 3006.5432
Epoch 6/8
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 20537180.0000 - mae: 3015.6428 - val_loss: 1520

<keras.src.callbacks.history.History at 0x2662ff30110>

In [22]:
model.evaluate(x_test, y_test)

[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 956us/step - loss: 7664838.0000 - mae: 1973.2972


[7856127.0, 1967.6387939453125]

In [25]:
r2_score(y_test,model.predict(x_test))

[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  


0.9034602744606124