### Experiments

#### 0 Load libraries

In [94]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau
import datetime
import pandas as pd
import pickle
import tensorflow.python  as tf
import tensorflow


#### 1 Load the dataset

In [15]:
## Load CSV file
data=pd.read_csv('Churn_Modelling.csv')

In [16]:
## Show all data into CSV file
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


#### 2 Preprocessing data

##### Initial Step - Drop columns

In [17]:
## Preprocess the data, dropping irrelevant columns.
data=data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [30]:
## Show all data into CSV file
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


##### Encode categorical variables - Gender

In [19]:
## Encode gender - Transforming text to number.
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])

In [20]:
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


##### Encode categorical variables - Geography

In [23]:
## Onehot encode 'Geography'
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']])

In [24]:
geo_encoder

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [27]:
geo_encoder.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [25]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [28]:
geo_encoded_df=pd.DataFrame(geo_encoder.toarray(), columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

In [29]:
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


##### Combine columns with orginal data

In [31]:
## Drop Geography Column
data=pd.concat([data.drop('Geography', axis=1), geo_encoded_df],axis=1)

In [32]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


##### 3 Save encoders

In [34]:
## Saving encoders to pickle file
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo, file)

##### 4 Scaler

###### Head

In [52]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


###### Divide dataset into indepent and dependent features

In [38]:
X=data.drop('Exited', axis=1)
y=data['Exited']

###### Split data in training and testing sets

In [39]:
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2, random_state=42)

###### Scale these features

In [40]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [44]:
#X_train

In [43]:
#X_test

###### Save scaler to file

In [45]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

#### 3 ANN Implementation - Tensorflow

##### Build Our ANN Model

In [55]:
model=Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)), ## HL1 connected with input layer
    Dense(32, activation='relu'), ## Hide layer 2
    Dense(1, activation='sigmoid') ## output layer
])

In [57]:
#model.summary()

##### Compile ANN Model

In [81]:
## creating optimizer to mode.compile
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.001)

In [82]:
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

##### Set up TensorBoard for ANN Model

In [108]:
##Variable to dir
log_dir='logs/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')



##### Tensorflow Callback

In [110]:
##Callback
tensorflow_callback=TensorBoard(log_dir=log_dir, histogram_freq=1)

##### Set up Early Stopping 

In [111]:
##  Set up Early Stopping 
early_stopping_callback=EarlyStopping(
    monitor='val_loss',  # Monitora a perda no conjunto de validação
    patience=10,          # Para se não houver melhora por 5 épocas consecutivas
    restore_best_weights=True  # Restaura os melhores pesos ao final
)

# Configurando Redução da taxa de aprendizado
reduce_lr_callback = ReduceLROnPlateau(
    monitor='val_loss',    # Monitora a perda no conjunto de validação
    factor=0.5,            # Reduz a taxa de aprendizado pela metade
    patience=5,            # Espera 5 épocas sem melhora
    min_lr=0.00001         # Limite mínimo da taxa de aprendizado
)

##### Training model

In [112]:
# Treinando o modelo com Early Stopping
model.fit(
    X_train, y_train, 
    validation_data=(X_test, y_test), 
    epochs=50,       # Número máximo de épocas (será interrompido antes se necessário)
    batch_size=32,
    callbacks=[tensorflow_callback, early_stopping_callback, reduce_lr_callback]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


<keras.src.callbacks.History at 0x1a6a593fad0>

##### Save Model

In [113]:
model.save('model.keras')

##### Load Tensorboard Extension

In [121]:
#import tensorboard

In [122]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [120]:
%tensorboard --logdir logs/fit/20241021-194101/

Reusing TensorBoard on port 6014 (pid 10228), started 0:00:26 ago. (Use '!kill 10228' to kill it.)