In [2]:
import keras
import random
import tensorflow as tf
import keras.backend as K
from keras.models import Model
from keras.layers import Dense, Input, Dropout, BatchNormalization, Activation
from keras.optimizers import Adam, Nadam
from keras.callbacks import Callback
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
np.random.seed(42) # NumPy
random.seed(42) # Python
tf.random.set_seed(42) # Tensorflow

In [3]:
import optuna

In [4]:
train_data = pd.read_pickle('final_train_data.pkl')

In [5]:
# Feature selection
X = train_data.drop(['isFraud'], axis=1)
y = train_data['isFraud']
# split the data
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)


In [6]:
from imblearn.over_sampling import SMOTE

In [7]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
print(X_train_smote.shape)
print(y_train_smote.shape)

(911646, 171)
(911646,)


In [10]:
import optuna
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential

def objective(trial):
    # Define the hyperparameters to optimize
    num_layers = trial.suggest_int('num_layers', 1, 5)
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    optimizer = trial.suggest_categorical('optimizer', ['adam', 'sgd'])
    layer_units = []
    
    # Define the number of neurons in each layer
    for i in range(num_layers):
        layer_units.append(trial.suggest_int(f'layer_{i}_units', 32, 512, log=True))
    
    # Define the model architecture
    model = Sequential()
    for i, units in enumerate(layer_units):
        if i == 0:
            model.add(Dense(units, activation='relu', input_shape=[X_train_smote.shape[1]]))
        else:
            model.add(Dense(units, activation='relu'))
        model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile the model
    auc = tf.keras.metrics.AUC(name='auc')
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=[auc])
    
    # Train the model
    model.fit(X_train_smote, y_train_smote, epochs=50, batch_size=512, validation_data=(X_val, y_val), verbose=1)    
    # Predict probabilities for validation set
    y_pred_proba = model.predict(X_val)
    
    # Calculate AUC
    auc = roc_auc_score(y_val, y_pred_proba)
    
    return auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

best_params = study.best_params
print("Best Hyperparameters:", best_params)

[32m[I 2023-05-30 22:32:36,687][0m A new study created in memory with name: no-name-b46680ef-07e6-4f32-b3e0-0cd8d31902b7[0m


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

[33m[W 2023-05-30 22:48:14,866][0m Trial 0 failed with parameters: {'num_layers': 3, 'dropout_rate': 0.41557737461959254, 'learning_rate': 1.538965638842784e-05, 'optimizer': 'sgd', 'layer_0_units': 111, 'layer_1_units': 43, 'layer_2_units': 343} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/Users/tugus/miniconda3/envs/tf-dev/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/t3/cqddr6dn4s596__8jnjm6s700000gn/T/ipykernel_33451/384906920.py", line 36, in objective
    model.fit(X_train_smote, y_train_smote, epochs=50, batch_size=512, validation_data=(X_val, y_val), verbose=1)
  File "/Users/tugus/miniconda3/envs/tf-dev/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
    return fn(*args, **kwargs)
  File "/Users/tugus/miniconda3/envs/tf-dev/lib/python3.10/site-packages/keras/engine/training.py", line 1685, in

KeyboardInterrupt: 