In [27]:
import pandas as pd
import pickle

### Load Dataset

In [28]:
df_raw = pd.read_csv('Churn_Modelling.csv')
df_raw.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [29]:
# Drop unnecessary features
df_raw.drop(labels=['RowNumber','CustomerId','Surname','Exited'],inplace=True,axis=1)
df_raw.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [30]:
# identify Feature types
feats_target = 'EstimatedSalary'
feats_catg = [col for col in df_raw.columns if df_raw[col].dtypes == 'O']
feats_numr = [col for col in df_raw.columns if df_raw[col].dtypes != 'O' and col != feats_target]
print(feats_catg,feats_numr)

['Geography', 'Gender'] ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember']


In [31]:
# Train test split
from sklearn.model_selection import train_test_split

X = df_raw.drop(feats_target,axis=1)
Y = df_raw[feats_target]
x_train,x_vald,y_train,y_vald = train_test_split(X,Y,test_size=0.3,random_state=22)
x_vald,x_test,y_vald,y_test = train_test_split(x_vald,y_vald,test_size=0.5,random_state=22)

In [32]:
x_test

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember
5880,537,France,Male,28,0,88963.31,2,1,1
9903,606,France,Female,36,10,0.00,2,0,1
486,641,France,Male,37,7,0.00,2,1,0
1171,693,Germany,Male,40,0,120711.73,1,0,0
2111,642,France,Male,25,7,0.00,2,1,0
...,...,...,...,...,...,...,...,...,...
7395,721,Germany,Female,45,7,138523.20,1,0,0
2640,625,Spain,Female,31,8,0.00,2,1,0
1943,728,Germany,Male,39,6,152182.83,1,0,0
6288,689,France,Male,40,8,160272.27,1,1,0


In [33]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Create a preprocessing pipeline object
ppln_prpc = ColumnTransformer(transformers=[
    ('Categorical',OneHotEncoder(sparse_output=False,drop='if_binary'),feats_catg),
    ('Numerical',StandardScaler(),feats_numr),
],remainder="passthrough", verbose_feature_names_out=False, force_int_remainder_cols=False, n_jobs=-1).set_output(transform='pandas')

ppln_prpc   

0,1,2
,transformers,"[('Categorical', ...), ('Numerical', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,-1
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,False

0,1,2
,categories,'auto'
,drop,'if_binary'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,copy,True
,with_mean,True
,with_std,True


In [34]:
# Data Transformation
x_train_tf = ppln_prpc.fit_transform(x_train)
x_vald_tf = ppln_prpc.transform(x_vald)
x_test_tf = ppln_prpc.transform(x_test)

with open('ppln_rg.pkl','wb') as file:
    pickle.dump(ppln_prpc,file)



In [35]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from datetime import datetime

In [None]:
# Setup ANN model

model = Sequential([
    Dense(64,activation='relu',input_shape=(x_train_tf.shape[1],)),
    Dense(32,activation='relu'),
    Dense(1),
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-08-08 12:19:45.122104: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [37]:
model.summary()

In [None]:
# optm = tf.keras.optimizers.Adam(learning_rate=0.01)
# loss = tf.keras.losses.BinaryCrossentropy()
# mtrc = [tf.keras.metrics.Accuracy()]

In [38]:
# model compilation

model.compile(optimizer='adam',loss='mean_squared_error',metrics=['mse'])


In [39]:
# Setup Tensorboard
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard


log_dir = f'logs/fit/{datetime.now().strftime('%Y-%m-%d_%H-%S-%M')}'
tf_callback = TensorBoard(log_dir=log_dir,histogram_freq=1)


In [40]:
# Setup Earlystop
es_callback = EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)


In [41]:
# Model Training
history = model.fit(x_train_tf,y_train,validation_data=(x_vald_tf,y_vald),epochs=100,callbacks=[tf_callback,es_callback])



Epoch 1/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 13436465152.0000 - mse: 13436465152.0000 - val_loss: 12603882496.0000 - val_mse: 12603882496.0000
Epoch 2/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 13270909952.0000 - mse: 13270909952.0000 - val_loss: 12273647616.0000 - val_mse: 12273647616.0000
Epoch 3/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 12655650816.0000 - mse: 12655650816.0000 - val_loss: 11405624320.0000 - val_mse: 11405624320.0000
Epoch 4/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 11425360896.0000 - mse: 11425360896.0000 - val_loss: 9951380480.0000 - val_mse: 9951380480.0000
Epoch 5/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 9658695680.0000 - mse: 9658695680.0000 - val_loss: 8126912000.0000 - val_mse: 8126912000.0000
Epoch 6/100
[1m219/219[0m [32m━━━━━━━━

In [42]:
model.save('model_rg.keras')

In [18]:
# # Tensorboard

# %load_ext tensorboard
# %tensorboard --logdir logs/fit

In [43]:
# predict using model
from tensorflow.keras.models import load_model

model = load_model('model_rg.keras')


In [44]:
y_train_pred = model.predict(x_train_tf)
y_train_pred = pd.Series(y_train_pred.flatten())
y_train_pred = (y_train_pred > 0.7).astype(int)
# y_train_pred

[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [45]:
y_vald_pred = model.predict(x_vald_tf)
y_vald_pred = pd.Series(y_vald_pred.flatten())
y_vald_pred = (y_vald_pred > 0.7).astype(int)
# y_vald_pred

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [46]:
y_test_pred = model.predict(x_test_tf)
y_test_pred = pd.Series(y_test_pred.flatten())
y_test_pred = (y_test_pred > 0.7).astype(int)
# y_test_pred

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [49]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

print(f'MSE_Train: {mean_squared_error(y_train,y_train_pred)}')
print(f'MAE_Train: {mean_absolute_error(y_train,y_train_pred)}')

print(f'MSE_Vald: {mean_squared_error(y_vald,y_vald_pred)}')
print(f'MAE_Vald: {mean_absolute_error(y_vald,y_vald_pred)}')

print(f'MSE_Test: {mean_squared_error(y_test,y_test_pred)}')
print(f'MAE_Test: {mean_absolute_error(y_test,y_test_pred)}')

MSE_Train: 13446509854.834337
MAE_Train: 100691.44123571429
MSE_Vald: 12643897548.601
MAE_Vald: 96522.23034
MSE_Test: 13438936318.255152
MAE_Test: 100845.97643333334
