In [None]:
# https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73

In [1]:
import pandas as pd 
import numpy as np 
import tensorflow as tf 
import keras_tuner as kt 
from keras_tuner import HyperParameters as hp 
from sklearn.model_selection import train_test_split

## Step 1 - Loading Data

In [2]:
X = np.loadtxt('Processed_Files/X_MinMaxScaler.csv', delimiter = ',')
y = np.loadtxt('Processed_Files/y_LabelEncoder.csv', delimiter = ',')

In [3]:
# Creating a train and test split
globals()['train_X'], test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42, shuffle = True)

## Step 2 - Defining the function that builds the VAE + its hyperparameters

In [8]:
from tensorflow.keras import backend as K

def tune_VAE(hp):

    ## DEFINING THE HYPERPARAMETERS TO BE TUNED

    # Number of hiddewn layers
    n_hidden = hp.Int('Hidden_Layers', min_value = 3, max_value = 7)
    # Drop between each layer, which will define the size of the subsequent layer
    layers_drop = []
    for i in range(n_hidden):
        layers_drop.append(hp.Float(f"drop_{i}-{i+1}", min_value = 1.2, max_value = 1.8))
    # Layer dimensions, which depend on drop between layers
    layers_dims = []
    for i in range(n_hidden):
        if i == 0:      # first layer
            layers_dims.append(int(globals()['train_X'].shape[1]/layers_drop[i]))
        else:
            layers_dims.append(int(layers_dims[i-1]/layers_drop[i]))
    # Activation function - https://keras.io/2.15/api/layers/activations/
    activation_function = hp.Choice('Activation_Function', values = ['relu', 'sigmoid', 'softmax', 'softplus', 'softsign', 'tanh', 'selu', 'elu'])
    # # Optimizer - https://keras.io/api/optimizers/
    # optimizer = hp.Choice('Optimizer', values = ['SGD', 'RMSprop', 'Adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl'])
    # Batch sizes
    globals()['batch_size'] = hp.Choice('Batch_Size', values = [16, 32, 48, 64])
    # batch_size = hp.Choice('Batch_Size', values = [16, 32, 48, 64])
    # Learning rates
    learning_rate = hp.Choice('Learning_Rate', values = [0.1, 0.01, 0.001, 0.0001, 0.00001])
    # # Cost function weight
    # weight = hp.Float('Reconstruction_Weight', min_value = 0.1, max_value = 0.9)


    ## DEFINE THE SAMPLING FUNCTION FOR THE LATENT SPACE SAMPLE GENERATION
    def sampling(args):
        z_mean, z_log_sigma, latent_dim = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.)
        return z_mean + K.exp(z_log_sigma) * epsilon


    ## BUILDING THE VAE MODEL

    # Initialiser function
    initializer = tf.keras.initializers.GlorotNormal(seed = 15)

    # Defining the input
    input = tf.keras.Input(shape = (globals()['train_X'].shape[1], ), name = 'Input_Layer')
    x = input

    # Defining the encoder structure
    for i in range(n_hidden-1):
        x = tf.keras.layers.Dense(layers_dims[i], activation = activation_function, kernel_initializer = initializer, name = f'Encoder_{i+1}')(x)
    # Defining the last hidden layer -> latent space
    z_mean = tf.keras.layers.Dense(layers_dims[-1], name = 'Z_mean')(x)
    z_log_sigma = tf.keras.layers.Dense(layers_dims[-1], name = 'Z_Log_Sigma')(x)
    z = tf.keras.layers.Lambda(sampling, name = 'Z_Sampling_Layer')([z_mean, z_log_sigma, layers_dims[-1]])

    # Building the decoder
    latent_inputs = tf.keras.Input(shape = (layers_dims[-1], ), name = 'Input_Z_Sampling')
    x = latent_inputs
    # Decoder layers
    for i in range(len(layers_dims)-1, 0, -1):
        x = tf.keras.layers.Dense(layers_dims[i], activation = activation_function, kernel_initializer = initializer, name = f'Decoder_{len(layers_dims)-i}')(x)
    # Defining the last hidden layer -> output
    output = tf.keras.layers.Dense(globals()['train_X'].shape[1], activation = activation_function, kernel_initializer = initializer, name = 'Decoder_Output')(x)

    # # Splitting also the encoder and decoder structures
    encoder = tf.keras.Model(input, [z_mean, z_log_sigma, z], name = 'Encoder')
    decoder = tf.keras.Model(latent_inputs, output, name = 'Decoder')

    # Defining our VAE
    output_vae = decoder(encoder(input)[2])
    vae = tf.keras.Model(input, output_vae, name = 'VAE')

    # Calculating the losses
    reconstruction = layers_dims[0] * tf.keras.losses.mse(input, output_vae)
    kl = -0.5 * K.sum(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis = 1)

    # Total loss function
    # vae_loss = reconstruction*weight + kl*(1 - weight)
    vae_loss = K.mean(reconstruction + kl)    
    vae.add_loss(vae_loss)

    # Compiling the model
    vae.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate))

    return vae

## Step 3 - Using tuner to tune the VAE

In [46]:
tuner = kt.BayesianOptimization(tune_VAE,
                    objective = 'val_loss',
                    max_trials = 10, 
                    directory = 'AutoML_Experiments',
                    project_name = 'Initial_Trial',
                    overwrite = True
                    )

# Defining a callback that stops the search if the results aren't improving
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    min_delta = 0.0001,
    patience = 20,
    verbose = 1, 
    mode = 'min',
    restore_best_weights = True)
# Defining a callback that saves our model
cp = tf.keras.callbacks.ModelCheckpoint(filepath = 'Best_Model/best_model.h5',
                                mode = 'min', monitor = 'val_loss', verbose = 2 , save_best_only = True)

# Initializing the tuner search - that will basically iterate over a certain number of different combinations (defined in the tuner above)
tuner.search(globals()['train_X'], globals()['train_X'], epochs = 5, batch_size = globals()['batch_size'], validation_split = 0.1, callbacks = [early_stop])

# Printing a summary with the results obtained during the tuning process
tuner.results_summary()

Trial 10 Complete [00h 00m 19s]
val_loss: 1.4730570316314697

Best val_loss So Far: 0.7556254863739014
Total elapsed time: 00h 02m 58s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in AutoML_Experiments\Initial_Trial
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x0000019B1DF3FEB0>
Trial summary
Hyperparameters:
Hidden_Layers: 4
drop_0-1: 1.7894110598404538
drop_1-2: 1.276538942828444
drop_2-3: 1.5740210084928214
Activation_Function: tanh
Batch_Size: 64
Learning_Rate: 0.01
drop_3-4: 1.3579238577556503
drop_4-5: 1.502892558608145
drop_5-6: 1.387799462100131
drop_6-7: 1.694266214675848
Score: 0.7556254863739014
Trial summary
Hyperparameters:
Hidden_Layers: 7
drop_0-1: 1.752892779860785
drop_1-2: 1.762303800060789
drop_2-3: 1.3001244185052072
Activation_Function: softsign
Batch_Size: 64
Learning_Rate: 0.01
drop_3-4: 1.2499061359256132
drop_4-5: 1.2
drop_5-6: 1.2
drop_6-7: 1.2
Score: 0.7862278819084167
Trial summary
Hyperparameters:
Hidden_Layers:

In [47]:
## RETRIEVING THE BEST MODEL

# Getting the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)

{'Hidden_Layers': 4, 'drop_0-1': 1.7894110598404538, 'drop_1-2': 1.276538942828444, 'drop_2-3': 1.5740210084928214, 'Activation_Function': 'tanh', 'Batch_Size': 64, 'Learning_Rate': 0.01, 'drop_3-4': 1.3579238577556503, 'drop_4-5': 1.502892558608145, 'drop_5-6': 1.387799462100131, 'drop_6-7': 1.694266214675848}


In [48]:
# Retrieving the best model
model = tuner.hypermodel.build(best_hps)
model.summary()
# Final fitting of the model
history = model.fit(globals()['train_X'], globals()['train_X'], epochs = 20, batch_size = best_hps.values['Batch_Size'], validation_split = 0.1, callbacks = [early_stop, cp]).history


Model: "VAE"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input_Layer (InputLayer)       [(None, 48)]         0           []                               
                                                                                                  
 Encoder (Functional)           [(None, 8),          2274        ['Input_Layer[0][0]']            
                                 (None, 8),                                                       
                                 (None, 8)]                                                       
                                                                                                  
 Decoder (Functional)           (None, 48)           1448        ['Encoder[0][2]']                
                                                                                                

In [49]:
# encoder = tf.keras.Model(model.input, [model.get_layer('Z_mean'), model.get_layer('Z_Log_Sigma'), model.get_layer('tf.__operators__.add_2').output], name = 'Encoder')
encoder = tf.keras.Model(model.input, model.layers[1].output, name = 'Encoder')
encoder.summary()

Model: "Encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input_Layer (InputLayer)       [(None, 48)]         0           []                               
                                                                                                  
 Encoder_1 (Dense)              (None, 26)           1274        ['Input_Layer[0][0]']            
                                                                                                  
 Encoder_2 (Dense)              (None, 20)           540         ['Encoder_1[0][0]']              
                                                                                                  
 Encoder_3 (Dense)              (None, 12)           252         ['Encoder_2[0][0]']              
                                                                                            

## Step 4 - Proceeding with Dimensionality Reduction study and comparison

In [75]:
# Getting the encoder reduced data 
vae_reduced_train = encoder.predict(globals()['train_X'])
vae_reduced_test = encoder.predict(test_X)



In [76]:
# I only want the z_mean layer, so I would want only the last output of the encoder
vae_reduced_train = vae_reduced_train[0]
vae_reduced_test = vae_reduced_test[0]

In [70]:
# Importing all the classifiers to be used
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

# Importing metrics
from sklearn.metrics import f1_score

In [71]:
# Loading each classifier (with their default hyperparameters)
svm = SVC()
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
nb = GaussianNB()
log_reg = LogisticRegression()

In [59]:
# Fitting and applying all classifiers to the original sized dataset
svm.fit(globals()['train_X'], train_y)
rf.fit(globals()['train_X'], train_y)
gb.fit(globals()['train_X'], train_y)
nb.fit(globals()['train_X'], train_y)
log_reg.fit(globals()['train_X'], train_y)

# Predicting the results of the test set
y_pred_svm = svm.predict(test_X)
y_pred_rf = rf.predict(test_X)
y_pred_gb = gb.predict(test_X)
y_pred_nb = nb.predict(test_X)
y_pred_log_reg = log_reg.predict(test_X)

# Calculating the metrics for each classifier
f1_svm = f1_score(test_y, y_pred_svm, average = 'weighted')
f1_rf = f1_score(test_y, y_pred_rf, average = 'weighted')
f1_gb = f1_score(test_y, y_pred_gb, average = 'weighted')
f1_nb = f1_score(test_y, y_pred_nb, average = 'weighted')
f1_log_reg = f1_score(test_y, y_pred_log_reg, average = 'weighted')

# Storing the metrics under a dataframe
metrics = pd.DataFrame(columns = ['N_vars', 'SVM', 'RF', 'GB', 'NB', 'LogReg'])
metrics.loc[0] = ['All', f1_svm, f1_rf, f1_gb, f1_nb, f1_log_reg]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [77]:
# Fitting and applying all classifiers to the reduced dataset
svm.fit(vae_reduced_train, train_y)
rf.fit(vae_reduced_train, train_y)
gb.fit(vae_reduced_train, train_y)
nb.fit(vae_reduced_train, train_y)
log_reg.fit(vae_reduced_train, train_y)

# Predicting the results of the test set
y_pred_svm_vae = svm.predict(vae_reduced_test)
y_pred_rf_vae = rf.predict(vae_reduced_test)
y_pred_gb_vae = gb.predict(vae_reduced_test)
y_pred_nb_vae = nb.predict(vae_reduced_test)
y_pred_log_reg_vae = log_reg.predict(vae_reduced_test)

# Calculating the metrics for each classifier
f1_svm_vae = f1_score(test_y, y_pred_svm_vae, average = 'weighted')
f1_rf_vae = f1_score(test_y, y_pred_rf_vae, average = 'weighted')
f1_gb_vae = f1_score(test_y, y_pred_gb_vae, average = 'weighted')
f1_nb_vae = f1_score(test_y, y_pred_nb_vae, average = 'weighted')
f1_log_reg_vae = f1_score(test_y, y_pred_log_reg_vae, average = 'weighted')

# Storing the metrics under a dataframe
metrics.loc[1] = [encoder.output[2][1].shape[0], f1_svm_vae, f1_rf_vae, f1_gb_vae, f1_nb_vae, f1_log_reg_vae]

In [78]:
metrics

Unnamed: 0,N_vars,SVM,RF,GB,NB,LogReg
0,All,0.978157,0.987697,0.990248,0.978652,0.983199
1,8,0.928305,0.94771,0.934095,0.82636,0.463874
