In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from keras.wrappers import SKLearnClassifier
from scikeras.wrappers import KerasClassifier

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
df_raw = pd.read_csv('./Churn_Modelling.csv')
df_raw.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Drop unnecessary features
df_raw.drop(labels=['RowNumber','CustomerId','Surname'],inplace=True,axis=1)
df_raw.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
# identify Feature types
feats_target = 'Exited'
feats_catg = [col for col in df_raw.columns if df_raw[col].dtypes == 'O']
feats_numr = [col for col in df_raw.columns if df_raw[col].dtypes != 'O' and col != feats_target]
print(feats_catg,feats_numr)

['Geography', 'Gender'] ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary']


In [6]:
# Train test split

X = df_raw.drop(feats_target,axis=1)
Y = df_raw[feats_target]
x_train,x_inter,y_train,y_inter = train_test_split(X,Y,test_size=0.3,random_state=22)
x_vald,x_test,y_vald,y_test = train_test_split(x_inter,y_inter,test_size=0.5,random_state=22)

In [7]:
x_test

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
5880,537,France,Male,28,0,88963.31,2,1,1,189839.93
9903,606,France,Female,36,10,0.00,2,0,1,155641.46
486,641,France,Male,37,7,0.00,2,1,0,75248.30
1171,693,Germany,Male,40,0,120711.73,1,0,0,27345.18
2111,642,France,Male,25,7,0.00,2,1,0,102083.78
...,...,...,...,...,...,...,...,...,...,...
7395,721,Germany,Female,45,7,138523.20,1,0,0,59604.45
2640,625,Spain,Female,31,8,0.00,2,1,0,151843.54
1943,728,Germany,Male,39,6,152182.83,1,0,0,161203.60
6288,689,France,Male,40,8,160272.27,1,1,0,49656.24


In [8]:
# Create a preprocessing pipeline object
ppln_prpc = ColumnTransformer(transformers=[
    ('Categorical',OneHotEncoder(sparse_output=False,drop='if_binary'),feats_catg),
    ('Numerical',StandardScaler(),feats_numr),
],remainder="passthrough", verbose_feature_names_out=False, n_jobs=-1).set_output(transform='pandas')

ppln_prpc

0,1,2
,transformers,"[('Categorical', ...), ('Numerical', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,-1
,transformer_weights,
,verbose,False
,verbose_feature_names_out,False
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,'if_binary'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,copy,True
,with_mean,True
,with_std,True


In [9]:
# Data Transformation
x_train_tf = ppln_prpc.fit_transform(x_train)
x_vald_tf = ppln_prpc.transform(x_vald)
x_test_tf = ppln_prpc.transform(x_test)

x_train_tf

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain,Gender_Male,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
1261,0.0,1.0,0.0,1.0,0.963410,-0.092953,0.350589,-0.061374,0.790944,0.650203,-1.026054,-0.381159
4448,0.0,1.0,0.0,0.0,-0.881748,0.098280,-1.384753,0.210579,0.790944,-1.537981,0.974608,0.354560
2390,0.0,1.0,0.0,1.0,-0.500347,-0.666655,0.697657,1.728398,0.790944,0.650203,-1.026054,0.227593
1622,1.0,0.0,0.0,0.0,0.478927,-1.335973,-0.343548,1.423464,0.790944,0.650203,-1.026054,-1.481830
438,1.0,0.0,0.0,0.0,1.540666,-0.092953,-1.037685,1.463592,0.790944,0.650203,-1.026054,-0.567530
...,...,...,...,...,...,...,...,...,...,...,...,...
4587,1.0,0.0,0.0,0.0,-1.551778,-0.475421,0.350589,-1.231503,-0.910011,0.650203,0.974608,0.688123
6646,0.0,0.0,1.0,0.0,0.355229,0.289514,-1.037685,-1.231503,0.790944,-1.537981,-1.026054,1.726567
5478,0.0,1.0,0.0,1.0,-0.026172,0.480748,1.391794,0.553095,0.790944,0.650203,0.974608,-0.152047
8548,0.0,0.0,1.0,1.0,-1.376540,0.671982,-0.343548,-1.231503,-0.910011,0.650203,-1.026054,-1.361774


In [10]:
# Model creation
from typing import List, Tuple, Literal

nn_arch_dict ={
    'neurons_layer_wise': [
        [64, 32, 16, 4, 1],
        [64, 32, 1],
        [128, 64, 1]
    ],
    'activation_layer_wise': [
        ['elu', 'elu', 'elu', 'elu', 'elu', 'sigmoid'],
        ['elu', 'elu', 'sigmoid'],
        ['elu', 'elu', 'sigmoid']
    ],
}

def create_model(
    index: int = 0,
    neurons_layer_wise: List[int] = [64,32,32,1],
    activation_layer_wise : List[Literal['relu', 'elu', 'sigmoid','softmax','linear']] = ['relu','relu','relu','sigmoid'],
    input_shape: Tuple[int,...] = (10,),
    optimizer : Literal['adam','sgd'] = 'adam',
    loss : Literal['binary_crossentropy','categorical_crossentropy','mean_squared_error'] = 'binary_crossentropy',
    metrics : List[Literal['accuracy','mse','mae']] = ['accuracy'],
    ):
    neurons_layer_wise = nn_arch_dict['neurons_layer_wise'][index]
    activation_layer_wise = nn_arch_dict['activation_layer_wise'][index]
    
    model = Sequential(
        [Input(shape=input_shape)] + [
            Dense(
            neurons_layer_wise[i],
            activation=activation_layer_wise[i],
            )
            for i in range(len(neurons_layer_wise))
    ])
    model.compile(optimizer=optimizer,loss=loss,metrics=metrics)

    return model



In [None]:
# Creat Keras Classifier

param_grid = {
    'index': range(len(nn_arch_dict['activation_layer_wise'])),
    'input_shape': [(x_train_tf.shape[1],)],
    'epochs': [100],
    'batch_size': [10],
}

gs = GridSearchCV(estimator=KerasClassifier(build_fn=create_model,input_shape=(12,),index=0,verbose=0),param_grid=param_grid,n_jobs=-1,cv=3)
gs.fit(x_train_tf,y_train)
print(gs.best_params_,gs.best_score_)


2025-08-08 15:27:28.742634: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-08 15:27:28.749361: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-08 15:27:28.769969: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754647048.803930   16929 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754647048.814138   16929 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754647048.839265   16929 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

{'batch_size': 10, 'epochs': 100, 'index': 1, 'input_shape': (12,)} 0.8469997488930051


TypeError: BaseSearchCV.predict() takes 2 positional arguments but 3 were given

In [14]:
y_pred_train = gs.predict(x_train_tf)
y_pred_vald = gs.predict(x_vald_tf)
y_pred_test = gs.predict(x_test_tf)

In [18]:
from sklearn.metrics import accuracy_score

print(f'Accuracy_train= {accuracy_score(y_train,y_pred_train)}')
print(f'Accuracy_vald= {accuracy_score(y_vald,y_pred_vald)}')
print(f'Accuracy_test= {accuracy_score(y_test,y_pred_test)}')


Accuracy_train= 0.879
Accuracy_vald= 0.8506666666666667
Accuracy_test= 0.8686666666666667
