# NN hyperparameters
### * n layers
### * n units
### * activation fn
### * dropout pct
### * loss fn
### * optimizer
### * ...

### load, sample, split data

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('fraud_0.csv').sample(10000)

y = df['isFraud']
X = df.drop(columns = ['isFraud'], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

del df, X, y, train_test_split

In [2]:
import optuna

In [155]:
pg_url = 'postgresql+pg8000://admin@localhost:5433/optuna'
study = optuna.create_study(study_name='nn_ieee_fraud_mac', storage=pg_url)

[32m[I 2020-01-23 22:53:31,753][0m A new study created with name: nn_ieee_fraud_mac[0m


In [156]:
# https://keras.io/activations/
activations = ['relu','tanh','sigmoid','softplus'] #,'elu','softmax','selu','softsign','hard_sigmoid','exponential','linear']
# https://keras.io/optimizers/
optimizers = ['sgd','adam']#,'rmsprop','adadelta','adamax','nadam']
# https://keras.io/losses/
losses = ['binary_crossentropy']#,'mean_squared_error','mean_absolute_error','mean_absolute_percentage_error','mean_squared_logarithmic_error','squared_hinge','hinge','categorical_hinge','logcosh']

In [157]:
import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, InputLayer, Lambda
from sklearn.metrics import roc_curve, auc, roc_auc_score, average_precision_score
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.base import BaseEstimator
import numpy as np
import math

In [158]:
def nn_auc_objective(trial):
        
    # n Layers
    n_layers =  trial.suggest_int('n_estimators', 1, 4)    
    layers = []
    in_lyer = Input(shape=(X_train.shape[1],))
    layers.append(in_lyer)
    
    for i in range(n_layers):
        
        # n Units
        n_units = int(trial.suggest_loguniform('n_units_l{}'.format(i), 2, 410))        
        # activation fn
        activation = trial.suggest_categorical('act_fn_l{}'.format(i), activations)
        
        layers.append(Dense(n_units, activation=activation)(layers[-1]))
        
        # dropout pct
        if i < (n_layers-1):
            drop_pct = np.nan_to_num(trial.suggest_uniform('drop_pct_l{}'.format(i), 0.00,0.99))
            layers.append(Dropout(drop_pct)(layers[-1]))
    
    # OUTPUT LAYER
    #output_activation = trial.suggest_categorical('output_activation', activations)
    out_lyr = Dense(1, activation='sigmoid')(layers[-1])
    
    # loss fn
    #loss_fn = trial.suggest_categorical('loss_fn', losses)
    # optimizer
    optimizer = trial.suggest_categorical('optimizer', optimizers)
    
    
    model = Model(inputs=[in_lyer], outputs=[out_lyr])
    model.summary()
    
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc'])
    
    epchs = trial.suggest_int('epchs', 1, 10)
    btch_sz = trial.suggest_int('btch_sz', 1, 1024)
    
    history = model.fit(
        X_train.values, 
        y_train.values, 
        epochs=epchs, 
        batch_size=btch_sz)
    
    rnd_lyr = Lambda(lambda x: K.round(x)) (out_lyr)    
    m1 = Model(inputs=[in_lyer], outputs=[rnd_lyr])
    # print(m1.predict(X_test.values))
    
    class EST(BaseEstimator):
        def predict(self, X):
            a = m1.predict(X)
            a = a.reshape(a.shape[0],)
            a = [np.nan_to_num(i) for i in a]
            return [int(round(i,0)) for i in a]
    
    print()
    
    fpr, tpr, _ = metrics.roc_curve(y_test,EST().predict(X_test.values))
    return 1 - np.nan_to_num(metrics.auc(fpr, tpr) )

In [159]:
study.optimize(nn_auc_objective, n_trials=100, timeout=300)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_118 (InputLayer)       (None, 401)               0         
_________________________________________________________________
dense_2215 (Dense)           (None, 19)                7638      
_________________________________________________________________
dropout_1981 (Dropout)       (None, 19)                0         
_________________________________________________________________
dense_2216 (Dense)           (None, 310)               6200      
_________________________________________________________________
dense_2217 (Dense)           (None, 1)                 311       
Total params: 14,149
Trainable params: 14,149
Non-trainable params: 0
_________________________________________________________________
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6



[32m[I 2020-01-23 22:56:20,489][0m Finished trial#0 resulted in value: 0.5. Current best value is 0.5 with parameters: {'act_fn_l0': 'sigmoid', 'act_fn_l1': 'sigmoid', 'btch_sz': 579, 'drop_pct_l0': 0.41722633926567626, 'epchs': 6, 'n_estimators': 2, 'n_units_l0': 19.149210984678923, 'n_units_l1': 310.1030458069705, 'optimizer': 'sgd'}.[0m


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_119 (InputLayer)       (None, 401)               0         
_________________________________________________________________
dense_2218 (Dense)           (None, 40)                16080     
_________________________________________________________________
dropout_1982 (Dropout)       (None, 40)                0         
_________________________________________________________________
dense_2219 (Dense)           (None, 340)               13940     
_________________________________________________________________
dropout_1983 (Dropout)       (None, 340)               0         
_________________________________________________________________
dense_2220 (Dense)           (None, 3)                 1023      
_________________________________________________________________
dropout_1984 (Dropout)       (None, 3)                 0         
__________

[32m[I 2020-01-23 22:57:30,290][0m Finished trial#1 resulted in value: 0.5. Current best value is 0.5 with parameters: {'act_fn_l0': 'sigmoid', 'act_fn_l1': 'sigmoid', 'btch_sz': 579, 'drop_pct_l0': 0.41722633926567626, 'epchs': 6, 'n_estimators': 2, 'n_units_l0': 19.149210984678923, 'n_units_l1': 310.1030458069705, 'optimizer': 'sgd'}.[0m


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_120 (InputLayer)       (None, 401)               0         
_________________________________________________________________
dense_2223 (Dense)           (None, 351)               141102    
_________________________________________________________________
dropout_1985 (Dropout)       (None, 351)               0         
_________________________________________________________________
dense_2224 (Dense)           (None, 6)                 2112      
_________________________________________________________________
dropout_1986 (Dropout)       (None, 6)                 0         
_________________________________________________________________
dense_2225 (Dense)           (None, 4)                 28        
_________________________________________________________________
dense_2226 (Dense)           (None, 1)                 5         
Total para

[32m[I 2020-01-23 22:58:40,516][0m Finished trial#2 resulted in value: 0.49904831901100555. Current best value is 0.49904831901100555 with parameters: {'act_fn_l0': 'relu', 'act_fn_l1': 'relu', 'act_fn_l2': 'relu', 'btch_sz': 677, 'drop_pct_l0': 0.2840917857728824, 'drop_pct_l1': 0.7245909014674915, 'epchs': 5, 'n_estimators': 3, 'n_units_l0': 351.1866872613872, 'n_units_l1': 6.009419583724631, 'n_units_l2': 4.123964346359425, 'optimizer': 'adam'}.[0m


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_121 (InputLayer)       (None, 401)               0         
_________________________________________________________________
dense_2227 (Dense)           (None, 27)                10854     
_________________________________________________________________
dropout_1987 (Dropout)       (None, 27)                0         
_________________________________________________________________
dense_2228 (Dense)           (None, 331)               9268      
_________________________________________________________________
dropout_1988 (Dropout)       (None, 331)               0         
_________________________________________________________________
dense_2229 (Dense)           (None, 157)               52124     
_________________________________________________________________
dense_2230 (Dense)           (None, 1)                 158       
Total para

[32m[I 2020-01-23 22:59:50,359][0m Finished trial#3 resulted in value: 0.5. Current best value is 0.49904831901100555 with parameters: {'act_fn_l0': 'relu', 'act_fn_l1': 'relu', 'act_fn_l2': 'relu', 'btch_sz': 677, 'drop_pct_l0': 0.2840917857728824, 'drop_pct_l1': 0.7245909014674915, 'epchs': 5, 'n_estimators': 3, 'n_units_l0': 351.1866872613872, 'n_units_l1': 6.009419583724631, 'n_units_l2': 4.123964346359425, 'optimizer': 'adam'}.[0m


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_122 (InputLayer)       (None, 401)               0         
_________________________________________________________________
dense_2231 (Dense)           (None, 24)                9648      
_________________________________________________________________
dense_2232 (Dense)           (None, 1)                 25        
Total params: 9,673
Trainable params: 9,673
Non-trainable params: 0
_________________________________________________________________
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8



[32m[I 2020-01-23 23:00:58,805][0m Finished trial#4 resulted in value: 0.5. Current best value is 0.49904831901100555 with parameters: {'act_fn_l0': 'relu', 'act_fn_l1': 'relu', 'act_fn_l2': 'relu', 'btch_sz': 677, 'drop_pct_l0': 0.2840917857728824, 'drop_pct_l1': 0.7245909014674915, 'epchs': 5, 'n_estimators': 3, 'n_units_l0': 351.1866872613872, 'n_units_l1': 6.009419583724631, 'n_units_l2': 4.123964346359425, 'optimizer': 'adam'}.[0m


In [153]:
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.sort_values(by='value')

Unnamed: 0,number,value,params_act_fn_l0,params_act_fn_l1,params_act_fn_l2,params_act_fn_l3,params_btch_sz,params_drop_pct_l0,params_drop_pct_l1,params_drop_pct_l2,params_epchs,params_loss_fn,params_n_estimators,params_n_units_l0,params_n_units_l1,params_n_units_l2,params_n_units_l3,params_optimizer,params_output_activation,state
0,0,0.5,tanh,tanh,sigmoid,softplus,288,0.043032,0.791751,0.793716,2,binary_crossentropy,4,23.512683,57.871778,259.387902,47.381963,sgd,tanh,COMPLETE
1,1,0.5,softplus,sigmoid,sigmoid,relu,728,0.25478,0.085958,0.46541,2,binary_crossentropy,4,245.4182,47.452048,63.835692,46.468002,sgd,softplus,COMPLETE
2,2,0.5,softplus,tanh,tanh,,1002,0.267952,0.372788,,1,binary_crossentropy,3,28.783537,6.49367,158.603137,,adam,tanh,COMPLETE
3,3,0.5,softplus,tanh,relu,,452,0.355564,0.961752,,7,binary_crossentropy,3,37.47015,10.055097,169.69868,,sgd,softplus,COMPLETE
4,4,0.5,tanh,sigmoid,tanh,,288,0.402552,0.225885,,2,binary_crossentropy,3,175.839399,10.270365,88.316813,,adam,tanh,COMPLETE


### visualizations

In [154]:
optuna.visualization.plot_optimization_history(study)