<a href="https://colab.research.google.com/github/zhh25/Titanic/blob/main/Fine_Tune_Neural_Network_Using_CrossValidation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import os
path_root = '/content/drive/MyDrive/titanic'
train = pd.read_csv(os.path.join(path_root, 'data/train.csv'))
test =  pd.read_csv(os.path.join(path_root, 'data/test.csv'))
sample_submission =  pd.read_csv(os.path.join(path_root, 'data/sample_submission.csv'))
titanic = train.drop('Survived', axis = 1)
titanic_label = train['Survived'].copy()

# Transformation Pipeline




In [None]:
exec(open('/content/drive/MyDrive/Colab Notebooks/titanic_data_pipeline.py' ).read())


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
titanic_prepared = full_pipeline.fit_transform(titanic)
test_prepared = full_pipeline.transform(test)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(titanic_prepared, titanic_label, test_size = 0.25,  )

# Fine Tune Neural Network

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
! pip install keras_tuner
import keras_tuner as kt
from tensorflow.keras.regularizers import l2

### Define the search space

In [None]:
def call_existing_code(units= 2, hiddens=3, lr=0.001, input_shape =  [titanic_prepared.shape[-1]],
                       #penalty_rate = 0.01
                       ):
    model = Sequential()
    options = {'input_shape': input_shape}
    for i in range(hiddens -1, -1, -1):
        model.add(Dense(units * (2 **i), 
                        activation = 'selu', 
                        kernel_initializer = 'lecun_normal',
                        #kernel_regularizer = l2(penalty_rate),
                        **options
                       ))
        options = {}

        
    model.add(Dense(1, activation = 'sigmoid'))
    
    model.compile(
        optimizer = tf.keras.optimizers.Nadam(lr),
        loss = tf.keras.losses.BinaryCrossentropy(),
        metrics = ['accuracy']
    )
    return model

In [None]:
#call_existing_code(units = 3, hiddens = 4).summary()

### Sublass HyperModel 

In [None]:
from sklearn import model_selection
class CVHyperModel(kt.HyperModel):
    def build(self, hp):
        units = hp.Int('units', min_value = 3, max_value = 20, step = 1)
        hiddens = hp.Int('hiddens', min_value = 2, max_value = 10)
        lr = hp.Float('lr', min_value = 5e-4, max_value = 1e-2, sampling = 'log')
        #penalty_rate = hp.Float('pr', min_value = 1e-4, max_value = 1, sampling = 'log')
        # call existing model-building code with the hyperparameter values
        model = call_existing_code(
            units=units, hiddens = hiddens, 
            lr = lr,
            #penalty_rate=penalty_rate
        )
        return model       

    def fit(self, hp, model, x, y, *args, **kwargs):
        cv = model_selection.KFold(4, shuffle=True)
        val_acc = []
        for train_indices, test_indices in cv.split(x):
            x_train, x_test = x[train_indices], x[test_indices]
            y_train, y_test = y[train_indices], y[test_indices]

            model = self.build(hp)
            model.fit(x_train, y_train, 
                      validation_data = (x_test, y_test), 
                      *args, **kwargs)
        
            val_acc.append(model.evaluate(x_test, y_test)[1])
        # Return a single float to minimize.    
        return -np.mean(val_acc)

In [None]:
tuner = kt.BayesianOptimization(
    hypermodel= CVHyperModel(),
    # No objective to specify.
    # Objective is the return value of `HyperModel.fit()`.
    max_trials= 40,
    overwrite = False,
    directory = os.path.join(path_root, 'tuner'),
    project_name = 'keras_tuner_7_cv',
    executions_per_trial=3
)

INFO:tensorflow:Reloading Oracle from existing project /content/drive/MyDrive/titanic/tuner/keras_tuner_7_cv/oracle.json
INFO:tensorflow:Reloading Tuner from /content/drive/MyDrive/titanic/tuner/keras_tuner_7_cv/tuner0.json


### Start the search

All the arguments passed to search is passed to model.fit() in each execution

In [None]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience = 20, restore_best_weights=True, mode = 'max')
lr_schedule = ReduceLROnPlateau('val_loss', factor = 0.5, patience = 5, mode = 'min')

In [None]:
tuner.search(titanic_prepared, titanic_label,
            batch_size=32,
            epochs = 1000,
            callbacks=[early_stopping, lr_schedule],
            verbose = 2)

INFO:tensorflow:Oracle triggered exit


In [None]:
tuner.results_summary()

Results summary
Results in /content/drive/MyDrive/titanic/tuner/keras_tuner_7_cv
Showing 10 best trials
<keras_tuner.engine.objective.DefaultObjective object at 0x7fdb8cc0bbd0>
Trial summary
Hyperparameters:
units: 10
hiddens: 5
lr: 0.0005911057170266328
Score: -0.8421454081932703
Trial summary
Hyperparameters:
units: 2
hiddens: 3
lr: 0.01
Score: -0.8417498171329498
Trial summary
Hyperparameters:
units: 10
hiddens: 5
lr: 0.0005730157594327513
Score: -0.8413811773061752
Trial summary
Hyperparameters:
units: 10
hiddens: 5
lr: 0.0006339556224852895
Score: -0.8406388560930887
Trial summary
Hyperparameters:
units: 10
hiddens: 5
lr: 0.0005
Score: -0.8406169613202413
Trial summary
Hyperparameters:
units: 10
hiddens: 5
lr: 0.0005
Score: -0.8398931423823038
Trial summary
Hyperparameters:
units: 2
hiddens: 3
lr: 0.01
Score: -0.839145764708519
Trial summary
Hyperparameters:
units: 10
hiddens: 5
lr: 0.00475165515741009
Score: -0.8391205171744028
Trial summary
Hyperparameters:
units: 3
hiddens: 3
l

In [None]:
best_model = tuner.get_best_models(num_models=2)[0]
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 160)               4320      
                                                                 
 dense_1 (Dense)             (None, 80)                12880     
                                                                 
 dense_2 (Dense)             (None, 40)                3240      
                                                                 
 dense_3 (Dense)             (None, 20)                820       
                                                                 
 dense_4 (Dense)             (None, 10)                210       
                                                                 
 dense_5 (Dense)             (None, 1)                 11        
                                                                 
Total params: 21,481
Trainable params: 21,481
Non-traina

### Retrain the model


Find the optimal number of epochs to train the model with the hyperparameters obtained from the search.

In [None]:
best_hps = tuner.get_best_hyperparameters(2)[0]
print(best_hps.get('units'), best_hps.get('hiddens'))

10 5


In [None]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, 
                    batch_size=32,
                    epochs = 1000,
                    callbacks=[early_stopping, lr_schedule],
                    #validation_split=0.2,
                    validation_data = (X_val, y_val),
                    verbose = 1)


In [None]:
val_acc_per_epoch = history.history['val_accuracy']
max(val_acc_per_epoch )
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Best epoch: 11


If you want to train the model with the entire dataset, you may retrieve the best hyperparameters and retrain the model by yourself.

In [None]:
lr_schedule_retrain = ReduceLROnPlateau('loss', factor = 0.5, patience = 3, mode = 'min')
checkpoint_retrain = ModelCheckpoint(os.path.join(path_root, 'tuner'),
                                     save_best_only = True,
                                     monitor = 'accuracy',
                                     mode = 'max')

In [None]:
# Build the model with the best hp.
best_model = tuner.hypermodel.build(best_hps)
# Fit with the entire dataset.
history = best_model.fit(titanic_prepared, titanic_label, 
                batch_size=32,
                epochs = best_epoch,
                callbacks=[lr_schedule_retrain, checkpoint_retrain],
                #validation_split=0.2,
                #validation_data = (X_val, y_val),
                verbose = 1
                )

Epoch 1/11
INFO:tensorflow:Assets written to: /content/drive/MyDrive/titanic/tuner/assets
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11


### Predict

In [None]:
test_prob = best_model.predict(test_prepared)

In [None]:
np.sum(test_prob >= 0.5)

137

In [None]:
sample_submission['Survived'] = (test_prob >= 0.5)
sample_submission['Survived'] = sample_submission['Survived'].astype('int8')
sample_submission

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [None]:
sample_submission.to_csv(os.path.join(path_root, 'submission/neural_network_11_cv.csv'), index = False)

# Ensemble

In [None]:
best_hps = tuner.get_best_hyperparameters(2)[0]
print(best_hps.get('units'), best_hps.get('hiddens'))

10 5


In [None]:
trial_num = 3
val_acc = []
pred = None
for i in range(trial_num):
    cv = model_selection.KFold(4, shuffle=True)
    for train_indices, test_indices in cv.split(titanic_prepared):
        x_train, x_test = titanic_prepared[train_indices], titanic_prepared[test_indices]
        y_train, y_test = titanic_label[train_indices], titanic_label[test_indices]

        model = tuner.hypermodel.build(best_hps)
        model.fit(x_train, y_train, 
            validation_data = (x_test, y_test), 
            batch_size=32,
            epochs = 1000,
            callbacks=[early_stopping, lr_schedule],
            verbose = 0 )

        val_acc.append(model.evaluate(x_test, y_test)[1])
        
        if pred is None:
            pred = model.predict(test_prepared) 
        else:
            pred = np.concatenate((pred, model.predict(test_prepared) ), axis= 1)        

        print(val_acc, pred.shape)
print(np.mean(val_acc))

[0.8565022349357605] (418, 1)
[0.8565022349357605, 0.8520179390907288] (418, 2)
[0.8565022349357605, 0.8520179390907288, 0.8385650515556335] (418, 3)
[0.8565022349357605, 0.8520179390907288, 0.8385650515556335, 0.7972972989082336] (418, 4)
[0.8565022349357605, 0.8520179390907288, 0.8385650515556335, 0.7972972989082336, 0.8206278085708618] (418, 5)
[0.8565022349357605, 0.8520179390907288, 0.8385650515556335, 0.7972972989082336, 0.8206278085708618, 0.8699551820755005] (418, 6)
[0.8565022349357605, 0.8520179390907288, 0.8385650515556335, 0.7972972989082336, 0.8206278085708618, 0.8699551820755005, 0.8565022349357605] (418, 7)
[0.8565022349357605, 0.8520179390907288, 0.8385650515556335, 0.7972972989082336, 0.8206278085708618, 0.8699551820755005, 0.8565022349357605, 0.8468468189239502] (418, 8)
[0.8565022349357605, 0.8520179390907288, 0.8385650515556335, 0.7972972989082336, 0.8206278085708618, 0.8699551820755005, 0.8565022349357605, 0.8468468189239502, 0.8385650515556335] (418, 9)
[0.8565022

In [None]:
sample_submission['Survived'] =(pred.mean(axis = 1) >= 0.5).astype('int8')


sample_submission.sum(axis = 0)

PassengerId    460009
Survived          157
dtype: int64

In [None]:
sample_submission.to_csv(os.path.join(path_root, 'submission/neural_network_12_softvote.csv'), index = False)

# Save model

In [None]:
best_model.save(os.path.join(path_root, 'models/best_nn'))

INFO:tensorflow:Assets written to: /content/drive/MyDrive/titanic/models/best_nn/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/titanic/models/best_nn/assets


In [None]:
best_nn = tf.keras.models.load_model(os.path.join(path_root, 'models/best_nn'))

In [None]:
test_prob = best_nn.predict(test_prepared) 
sample_submission['Survived'] = (test_prob >= 0.5)
sample_submission['Survived'] = sample_submission['Survived'].astype('int8')
sample_submission.to_csv(os.path.join(path_root, 'submission/neural_network_best.csv'), index = False)