In [1]:
import pandas as pd
import numpy as np

from keras import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import SGD, Adam

import tensorflow as tf

import optuna

  from .autonotebook import tqdm as notebook_tqdm


#### **Getting training & validation data**

In [2]:
df_train = pd.read_csv('../databases/training.csv')
df_valid = pd.read_csv('../databases/validation.csv')

In [3]:
X_train, y_train = df_train.drop(['IND_BOM_1_1'], axis=1), df_train['IND_BOM_1_1']
X_val, y_val = df_valid.drop(['IND_BOM_1_1'], axis=1), df_valid['IND_BOM_1_1']

In [4]:
X_train = np.array(X_train)
X_val = np.array(X_val)

#### **Parameters selection**

In [5]:
params = {
    'hidden_layer_units': [32, 128, 256],
    'hidden_layers': {
        'low': 1,
        'high': 2
    },
    'alpha': {
        'low': 0.001,
        'high': 0.1
    },
    'max_iter': [100, 150, 300],
    'batch_size': [32, 64, 128],
    'activation': ['tanh', 'relu', 'sigmoid'],
    'optimizer': ['SGD', 'Adam'],
    'loss_function': ['binary_crossentropy', 'mse'],
    'dropout_rate': {
        'low': 0.1,
        'high': 0.5
    }
}

reduce_lr = ReduceLROnPlateau(factor=0.5, patience=5, verbose=1)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True,
    min_delta=0.001
)

def objective(trial):
    model = Sequential()
    
    model.add(
        tf.keras.Input(shape=X_train.shape[1:])
    )

    hidden_layers = trial.suggest_int('hidden_layers', params['hidden_layers']['low'], params['hidden_layers']['high'])
    
    for _ in range(hidden_layers):
        model.add(
            Dense(
                units=trial.suggest_categorical(
                    'hidden_layer_units',
                    params['hidden_layer_units']
                ),
                activation=trial.suggest_categorical(
                    'activation',
                    params['activation']
                )
            )
        )

    model.add(
        Dropout(
            trial.suggest_float(
                'dropout_rate',
                params['dropout_rate']['low'],
                params['dropout_rate']['high'],
            )
        )
    )
    
    model.add(
        Dense(
            units=1,
            activation='sigmoid'
        )
    )

    alpha = trial.suggest_float(
        'alpha',
        params['alpha']['low'],
        params['alpha']['high']
    )
    optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])

    if optimizer_name == 'SGD':
        optimizer = SGD(learning_rate=alpha)
    elif optimizer_name == 'Adam':
        optimizer = Adam(learning_rate=alpha)

    model.compile(
        optimizer=optimizer,
        loss=trial.suggest_categorical('loss_function', params['loss_function']),
        metrics=['binary_accuracy']
    )

    model.fit(
        X_train,
        y_train,
        batch_size=trial.suggest_categorical(
            'batch_size',
            params['batch_size']
        ),
        epochs=trial.suggest_categorical(
            'max_iter',
            params['max_iter']
        ),
        validation_data=(X_val, y_val),
        callbacks=[early_stopping]
    )

    loss, accuracy = model.evaluate(X_val, y_val)

    return loss

In [6]:
n_trials = 20

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials)

[32m[I 2023-04-29 11:43:22,362][0m A new study created in memory with name: no-name-847fea20-bf04-4010-a628-21d61712ae18[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 18: early stopping


[32m[I 2023-04-29 11:45:15,166][0m Trial 0 finished with value: 0.21986554563045502 and parameters: {'hidden_layers': 2, 'hidden_layer_units': 32, 'activation': 'relu', 'dropout_rate': 0.43113765575570817, 'alpha': 0.03444872012912931, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 64, 'max_iter': 150}. Best is trial 0 with value: 0.21986554563045502.[0m


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 32: early stopping


[32m[I 2023-04-29 11:54:35,342][0m Trial 1 finished with value: 0.22074295580387115 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 32, 'activation': 'sigmoid', 'dropout_rate': 0.23872141021158866, 'alpha': 0.04887805248668813, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 100}. Best is trial 0 with value: 0.21986554563045502.[0m


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 32: early stopping


[32m[I 2023-04-29 11:58:28,254][0m Trial 2 finished with value: 0.22869907319545746 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 128, 'activation': 'sigmoid', 'dropout_rate': 0.2560219032793872, 'alpha': 0.021093308101432298, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 128, 'max_iter': 100}. Best is trial 0 with value: 0.21986554563045502.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 32: early stopping


[32m[I 2023-04-29 12:04:26,177][0m Trial 3 finished with value: 0.22766849398612976 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 32, 'activation': 'sigmoid', 'dropout_rate': 0.4360227924299974, 'alpha': 0.04304684018413363, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 64, 'max_iter': 300}. Best is trial 0 with value: 0.21986554563045502.[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 24: early stopping


[32m[I 2023-04-29 12:07:36,965][0m Trial 4 finished with value: 0.21679122745990753 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 128, 'activation': 'sigmoid', 'dropout_rate': 0.15427649234273072, 'alpha': 0.08792649533054528, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 64, 'max_iter': 150}. Best is trial 4 with value: 0.21679122745990753.[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 11: early stopping


[32m[I 2023-04-29 12:10:30,951][0m Trial 5 finished with value: 0.344547837972641 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 256, 'activation': 'sigmoid', 'dropout_rate': 0.407890749961101, 'alpha': 0.061461908063882166, 'optimizer': 'Adam', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 150}. Best is trial 4 with value: 0.21679122745990753.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 11: early stopping


[32m[I 2023-04-29 12:13:45,805][0m Trial 6 finished with value: 0.6554521918296814 and parameters: {'hidden_layers': 2, 'hidden_layer_units': 256, 'activation': 'relu', 'dropout_rate': 0.21921498769087464, 'alpha': 0.07422169322292364, 'optimizer': 'Adam', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 300}. Best is trial 4 with value: 0.21679122745990753.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 11: early stopping


[32m[I 2023-04-29 12:16:43,774][0m Trial 7 finished with value: 0.344547837972641 and parameters: {'hidden_layers': 2, 'hidden_layer_units': 128, 'activation': 'relu', 'dropout_rate': 0.2855557763377695, 'alpha': 0.060275433161892655, 'optimizer': 'Adam', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 300}. Best is trial 4 with value: 0.21679122745990753.[0m


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 23: early stopping


[32m[I 2023-04-29 12:20:00,349][0m Trial 8 finished with value: 0.6255545616149902 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 128, 'activation': 'relu', 'dropout_rate': 0.39179624784777367, 'alpha': 0.028850228959888688, 'optimizer': 'SGD', 'loss_function': 'binary_crossentropy', 'batch_size': 64, 'max_iter': 100}. Best is trial 4 with value: 0.21679122745990753.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 15: early stopping


[32m[I 2023-04-29 12:23:50,210][0m Trial 9 finished with value: 0.2129877209663391 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 256, 'activation': 'relu', 'dropout_rate': 0.17692317531246174, 'alpha': 0.08692594001033588, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 300}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 19: early stopping


[32m[I 2023-04-29 12:25:49,508][0m Trial 10 finished with value: 0.6451866626739502 and parameters: {'hidden_layers': 2, 'hidden_layer_units': 256, 'activation': 'tanh', 'dropout_rate': 0.10659998344274094, 'alpha': 0.09587317261432643, 'optimizer': 'Adam', 'loss_function': 'binary_crossentropy', 'batch_size': 128, 'max_iter': 300}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 12: early stopping


[32m[I 2023-04-29 12:27:20,416][0m Trial 11 finished with value: 0.6126320958137512 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 128, 'activation': 'tanh', 'dropout_rate': 0.14535877385797674, 'alpha': 0.09682279756827701, 'optimizer': 'SGD', 'loss_function': 'binary_crossentropy', 'batch_size': 64, 'max_iter': 150}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 25: early stopping


[32m[I 2023-04-29 12:33:56,832][0m Trial 12 finished with value: 0.22406095266342163 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 256, 'activation': 'sigmoid', 'dropout_rate': 0.16845021173422786, 'alpha': 0.004445996345245264, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 150}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 12: early stopping


[32m[I 2023-04-29 12:36:04,428][0m Trial 13 finished with value: 0.21965989470481873 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 256, 'activation': 'relu', 'dropout_rate': 0.17787772848306782, 'alpha': 0.08358559344314806, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 64, 'max_iter': 150}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 24: early stopping


[32m[I 2023-04-29 12:38:08,438][0m Trial 14 finished with value: 0.2187960147857666 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 128, 'activation': 'tanh', 'dropout_rate': 0.10074543259490526, 'alpha': 0.07998995086309096, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 128, 'max_iter': 300}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 28: early stopping


[32m[I 2023-04-29 12:44:05,575][0m Trial 15 finished with value: 0.620918869972229 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 256, 'activation': 'relu', 'dropout_rate': 0.3486957952212951, 'alpha': 0.09994649917208756, 'optimizer': 'SGD', 'loss_function': 'binary_crossentropy', 'batch_size': 64, 'max_iter': 300}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 17: early stopping


[32m[I 2023-04-29 12:50:10,260][0m Trial 16 finished with value: 0.22214536368846893 and parameters: {'hidden_layers': 1, 'hidden_layer_units': 128, 'activation': 'sigmoid', 'dropout_rate': 0.486822111554875, 'alpha': 0.07447169521774907, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 150}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 15: early stopping


[32m[I 2023-04-29 12:56:55,127][0m Trial 17 finished with value: 0.21424543857574463 and parameters: {'hidden_layers': 2, 'hidden_layer_units': 256, 'activation': 'sigmoid', 'dropout_rate': 0.19462650156548966, 'alpha': 0.08718431503862642, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 150}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 30: early stopping


[32m[I 2023-04-29 13:10:35,643][0m Trial 18 finished with value: 0.6648998856544495 and parameters: {'hidden_layers': 2, 'hidden_layer_units': 256, 'activation': 'relu', 'dropout_rate': 0.19568878823973251, 'alpha': 0.06647238049565501, 'optimizer': 'Adam', 'loss_function': 'binary_crossentropy', 'batch_size': 32, 'max_iter': 300}. Best is trial 9 with value: 0.2129877209663391.[0m


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 23: early stopping


[32m[I 2023-04-29 13:21:39,793][0m Trial 19 finished with value: 0.20740830898284912 and parameters: {'hidden_layers': 2, 'hidden_layer_units': 256, 'activation': 'tanh', 'dropout_rate': 0.28993682622002964, 'alpha': 0.08742105156554487, 'optimizer': 'SGD', 'loss_function': 'mse', 'batch_size': 32, 'max_iter': 100}. Best is trial 19 with value: 0.20740830898284912.[0m


#### **Saving study**

In [7]:
import joblib

In [8]:
save_path = './optuna_studies/mlp_study.pkl'

joblib.dump(study, save_path)

['./optuna_studies/mlp_study.pkl']