# Load related library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
from math import floor
from sklearn.metrics import make_scorer, accuracy_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
from keras.layers import LeakyReLU
LeakyReLU = LeakyReLU(alpha=0.1)
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)

In [2]:
# Make scorer accuracy
score_acc = make_scorer(accuracy_score)

# Load dataset

In [3]:
trainSet = pd.read_csv("./Data/train.csv", header=0)

# Data cleaning

## Drop useless columns

In [4]:
train = trainSet.drop(columns=['Name', 'PassengerId', 'Ticket', 'Cabin'])

## Drop rows with missing values

In [5]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 8 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   Survived  100000 non-null  int64  
 1   Pclass    100000 non-null  int64  
 2   Sex       100000 non-null  object 
 3   Age       96708 non-null   float64
 4   SibSp     100000 non-null  int64  
 5   Parch     100000 non-null  int64  
 6   Fare      99866 non-null   float64
 7   Embarked  99750 non-null   object 
dtypes: float64(2), int64(4), object(2)
memory usage: 6.1+ MB


In [6]:
train = train.dropna(axis=0)

In [7]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96332 entries, 2 to 99999
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  96332 non-null  int64  
 1   Pclass    96332 non-null  int64  
 2   Sex       96332 non-null  object 
 3   Age       96332 non-null  float64
 4   SibSp     96332 non-null  int64  
 5   Parch     96332 non-null  int64  
 6   Fare      96332 non-null  float64
 7   Embarked  96332 non-null  object 
dtypes: float64(2), int64(4), object(2)
memory usage: 6.6+ MB


## Dummy coding of the categorical variables

In [8]:
train = pd.get_dummies(train)

# Train, validation data set split

In [9]:
X_train, X_val, y_train, y_val = train_test_split(train.drop(columns="Survived", axis=0)
                                                , train['Survived']
                                                , test_size=0.2
                                                , random_state=2023
                                                , stratify=train['Survived'])

# Tuning hyperparameters (**NOT including `layers`**)

- `Optimizer`
- `Activation`
- `Neurons`
- `Batch size`
- `Epochs`

## Create function

In [10]:
# Create function
def nn_cl_bo1(neurons, activation, optimizer, learning_rate,  batch_size, epochs ):
    optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl']
    optimizerD= {'Adam':Adam(lr=learning_rate), 'SGD':SGD(lr=learning_rate),
                 'RMSprop':RMSprop(lr=learning_rate), 'Adadelta':Adadelta(lr=learning_rate),
                 'Adagrad':Adagrad(lr=learning_rate), 'Adamax':Adamax(lr=learning_rate),
                 'Nadam':Nadam(lr=learning_rate), 'Ftrl':Ftrl(lr=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU,'relu']
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    def nn_cl_fun():
        nn = Sequential()
        nn.add(Dense(neurons, input_dim=10, activation=activation))
        nn.add(Dense(neurons, activation=activation))
        nn.add(Dense(1, activation='sigmoid'))
        nn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        return nn
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=0, patience=20)
    nn = KerasClassifier(build_fn=nn_cl_fun, epochs=epochs, batch_size=batch_size,
                         verbose=0)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

## Set parameters

In [11]:
params_nn = {
    'neurons': (10, 100)
  , 'activation': (0, 9) # `activationL` has 10 elements
  , 'optimizer': (0, 7) # `optimizerL` has 8 elements
  , 'learning_rate': (0.01, 1)
  , 'batch_size': (200, 1000)
  , "epochs": (20, 100)
}

## Run Bayesian Optimization

In [12]:
nn_bo1 = BayesianOptimization(nn_cl_bo1, params_nn, random_state=2023)
nn_bo1.maximize(init_points=25, n_iter=4)

|   iter    |  target   | activa... | batch_... |  epochs   | learni... |  neurons  | optimizer |
-------------------------------------------------------------------------------------------------


2023-02-12 22:05:46.578805: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


| [0m1        [0m | [0m0.7675   [0m | [0m2.898    [0m | [0m912.3    [0m | [0m67.04    [0m | [0m0.1353   [0m | [0m22.72    [0m | [0m3.275    [0m |
| [0m2        [0m | [0m0.7641   [0m | [0m0.1988   [0m | [0m781.8    [0m | [0m61.95    [0m | [0m0.5495   [0m | [0m51.07    [0m | [0m3.51     [0m |
| [0m3        [0m | [0m0.7279   [0m | [0m3.55     [0m | [0m320.9    [0m | [0m48.87    [0m | [0m0.1705   [0m | [0m40.42    [0m | [0m1.262    [0m |
| [95m4        [0m | [95m0.7687   [0m | [95m3.519    [0m | [95m228.5    [0m | [95m65.19    [0m | [95m0.2114   [0m | [95m38.85    [0m | [95m2.636    [0m |
| [0m5        [0m | [0m0.6947   [0m | [0m1.656    [0m | [0m283.2    [0m | [0m56.39    [0m | [0m0.2039   [0m | [0m44.07    [0m | [0m6.514    [0m |
| [0m6        [0m | [0m0.5719   [0m | [0m6.841    [0m | [0m816.6    [0m | [0m67.74    [0m | [0m0.7937   [0m | [0m82.93    [0m | [0m6.864    [0m |
| [0m7        [0m 

## Best hyperparameters

In [14]:
params_nn_1 = nn_bo1.max['params']

activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
               'elu', 'exponential', LeakyReLU,'relu']
params_nn_1['activation'] = activationL[round(params_nn_1['activation'])]

optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl']
params_nn_1['optimizer'] = optimizerL[round(params_nn_1['optimizer'])]

params_nn_1['bach_size'] = round(params_nn_1['batch_size'])
params_nn_1['epochs'] = round(params_nn_1['epochs'])

params_nn_1

{'activation': 'selu',
 'batch_size': 210.56450490241733,
 'epochs': 77,
 'learning_rate': 0.19787923530180895,
 'neurons': 76.82203661959858,
 'optimizer': 'Adagrad',
 'bach_size': 211}

# Tuning hyperparameters (**Including `layers`**)

- `Optimizer`
- `Activation`
- `Neurons`
- `Batch size`
- `Epochs`
- `Layers`

## Create function

In [15]:
# Create function
def nn_cl_bo2(neurons, activation, optimizer, learning_rate,  batch_size, epochs, layers1, layers2, normalization, dropout, dropout_rate):
    optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl']
    optimizerD= {'Adam':Adam(lr=learning_rate), 'SGD':SGD(lr=learning_rate),
                 'RMSprop':RMSprop(lr=learning_rate), 'Adadelta':Adadelta(lr=learning_rate),
                 'Adagrad':Adagrad(lr=learning_rate), 'Adamax':Adamax(lr=learning_rate),
                 'Nadam':Nadam(lr=learning_rate), 'Ftrl':Ftrl(lr=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU,'relu']
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)

    def nn_cl_fun():
        nn = Sequential()
        nn.add(Dense(neurons, input_dim=10, activation=activation))
        if normalization > 0.5:
            nn.add(BatchNormalization())

        for i in range(layers1):
            nn.add(Dense(neurons, activation=activation))

        if dropout > 0.5:
            nn.add(Dropout(dropout_rate, seed=2023))

        for i in range(layers2):
            nn.add(Dense(neurons, activation=activation))

        nn.add(Dense(1, activation='sigmoid'))
        nn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        return nn
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=0, patience=20)
    nn = KerasClassifier(build_fn=nn_cl_fun, epochs=epochs, batch_size=batch_size,
                         verbose=0)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

## Set parameters

In [16]:
params_nn2 ={
    'neurons': (10, 100),
    'activation':(0, 9),
    'optimizer':(0,7),
    'learning_rate':(0.01, 1),
    'batch_size':(200, 1000),
    'epochs':(20, 100),
    'layers1':(1,3),
    'layers2':(1,3),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}

## Run Bayesian Optimization

In [17]:
nn_bo2 = BayesianOptimization(nn_cl_bo2, params_nn2, random_state=2023)
nn_bo2.maximize(init_points=25, n_iter=4)

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m0.7675   [0m | [0m2.898    [0m | [0m912.3    [0m | [0m0.5881   [0m | [0m0.03798  [0m | [0m31.31    [0m | [0m1.936    [0m | [0m1.044    [0m | [0m0.73     [0m | [0m57.19    [0m | [0m0.5449   [0m | [0m3.195    [0m |
| [0m2        [0m | [0m0.7527   [0m | [0m4.512    [0m | [0m515.6    [0m | [0m0.1512   [0m | [0m0.1083   [0m | [0m32.97    [0m | [0m1.676    [0m | [0m1.361    [0m | [0m0.3971   [0m | [0m13.21    [0m | [0m0.5649   [0m | [0m1.424    [0m |
| [0m3        [0m | [0m0.7584   [0m | [0m2.885    [0m | [0m501.3    [0m | [0m0.1841   [0m | [0m0.03119  [0m | [0m56.39    [0m | [0m1.392    [0m 

## Best hyperparameters

In [19]:
params_nn_2 = nn_bo2.max['params']

activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
               'elu', 'exponential', LeakyReLU,'relu']
params_nn_2['activation'] = activationL[round(params_nn_2['activation'])]

optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl']
learning_rate = params_nn_2['learning_rate']
optimizerD= {'Adam':Adam(lr=learning_rate), 'SGD':SGD(lr=learning_rate),
                 'RMSprop':RMSprop(lr=learning_rate), 'Adadelta':Adadelta(lr=learning_rate),
                 'Adagrad':Adagrad(lr=learning_rate), 'Adamax':Adamax(lr=learning_rate),
                 'Nadam':Nadam(lr=learning_rate), 'Ftrl':Ftrl(lr=learning_rate)}
params_nn_2['optimizer'] = optimizerD[optimizerL[round(params_nn_2['optimizer'])]]

params_nn_2['batch_size'] = round(params_nn_2['batch_size'])
params_nn_2['epochs'] = round(params_nn_2['epochs'])
params_nn_2['layers1'] = round(params_nn_2['layers1'])
params_nn_2['layers2'] = round(params_nn_2['layers2'])
params_nn_2['neurons'] = round(params_nn_2['neurons'])
params_nn_2['dropout'] = round(params_nn_2['dropout'])
params_nn_2['normalization'] = round(params_nn_2['normalization'])

params_nn_2

{'activation': 'softsign',
 'batch_size': 912,
 'dropout': 1,
 'dropout_rate': 0.03797882805128737,
 'epochs': 31,
 'layers1': 2,
 'layers2': 1,
 'learning_rate': 0.7300019655294788,
 'neurons': 57,
 'normalization': 1,
 'optimizer': <keras.optimizer_v2.adadelta.Adadelta at 0x7f9946340be0>}

# Build and fit the model using the best hyperparameters

In [20]:
def nn_cl_fun_final():
    nn_final = Sequential()
    nn_final.add(Dense(params_nn_2['neurons'], input_dim=10, activation=params_nn_2['activation']))
    if params_nn_2['normalization'] > 0.5:
        nn_final.add(BatchNormalization())

    for i in range(params_nn_2['layers1']):
        nn_final.add(Dense(params_nn_2['neurons'], activation=params_nn_2['activation']))

    if params_nn_2['dropout'] > 0.5:
        nn_final.add(Dropout(params_nn_2['dropout_rate'], seed=2023))

    for i in range(params_nn_2['layers2']):
        nn_final.add(Dense(params_nn_2['neurons'], activation=params_nn_2['activation']))

    nn_final.add(Dense(1, activation='sigmoid'))
    nn_final.compile(loss='binary_crossentropy', optimizer=params_nn_2['optimizer'], metrics=['accuracy'])

    return nn_final

es = EarlyStopping(monitor='accuracy', mode='max', verbose=0, patience=20)
nn = KerasClassifier(build_fn=nn_cl_fun_final, epochs=params_nn_2['epochs'], batch_size=params_nn_2['batch_size'],
                         verbose=0)

In [21]:
nn.fit(X_train, y_train, validation_data= (X_val, y_val), verbose = 1)

Epoch 1/31
Epoch 2/31
Epoch 3/31
Epoch 4/31
Epoch 5/31
Epoch 6/31
Epoch 7/31
Epoch 8/31
Epoch 9/31
Epoch 10/31
Epoch 11/31
Epoch 12/31
Epoch 13/31
Epoch 14/31
Epoch 15/31
Epoch 16/31
Epoch 17/31
Epoch 18/31
Epoch 19/31
Epoch 20/31
Epoch 21/31
Epoch 22/31
Epoch 23/31
Epoch 24/31
Epoch 25/31
Epoch 26/31
Epoch 27/31
Epoch 28/31
Epoch 29/31
Epoch 30/31
Epoch 31/31


<keras.callbacks.History at 0x7f9933683e50>