Identical notebook to 'autoencoders.ipynb', but in this one I use the MinMaxScaler instead of StandardScaler for the autoencoder data

In [7]:
import pandas as pd 
import numpy as np
import tensorflow as tf
import keras_tuner as kt
from keras_tuner import HyperParameters as hp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

## Step 1 - Loading data

In [8]:
X = np.loadtxt('Processed_Files/X_MinMaxScaler.csv', delimiter = ',')
y = np.loadtxt('Processed_Files/y_LabelEncoder.csv', delimiter = ',')

In [9]:
# Creating a train and test split
globals()['train_X'], test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42, shuffle = True)

## Step 2 - Defining the function that builds the autoencoder + its hyperparameters

In [10]:
def tune_autoencoder(hp):

    ## DEFINING THE HYPERPARAMETERS TO BE TUNED
    # # Latent space size, i.e., number of reduced dimensions
    # latent_space = hp.Int('Latent_Dimension', min_value = 2, max_value = X.shhape[1])
    # Number of hiddewn layers
    n_hidden = hp.Int('Hidden_Layers', min_value = 3, max_value = 7)
    # Drop between each layer, which will define the size of the subsequent layer
    layers_drop = []
    for i in range(n_hidden):
        layers_drop.append(hp.Float(f"drop_{i}-{i+1}", min_value = 1.2, max_value = 1.8))
    # Layer dimensions, which depend on drop between layers
    layers_dims = []
    for i in range(n_hidden):
        if i == 0:      # first layer
            layers_dims.append(int(globals()['train_X'].shape[1]/layers_drop[i]))
        else:
            layers_dims.append(int(layers_dims[i-1]/layers_drop[i]))
    # Activation function - https://keras.io/2.15/api/layers/activations/
    activation_function = hp.Choice('Activation_Function', values = ['relu', 'sigmoid', 'softmax', 'softplus', 'softsign', 'tanh', 'selu', 'elu'])
    # # Optimizer - https://keras.io/api/optimizers/
    # optimizer = hp.Choice('Optimizer', values = ['SGD', 'RMSprop', 'Adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl'])
    # Batch sizes
    globals()['batch_size'] = hp.Choice('Batch_Size', values = [16, 32, 48, 64])
    # batch_size = hp.Choice('Batch_Size', values = [16, 32, 48, 64])
    # Learning rates
    learning_rate = hp.Choice('Learning_Rate', values = [0.1, 0.01, 0.001, 0.0001, 0.00001])


    ## BUILDING THE AUTOENCODER

    # Initialiser function
    initializer = tf.keras.initializers.GlorotNormal(seed = 15)

    # Defining the input
    input = tf.keras.Input(shape = (globals()['train_X'].shape[1], ), name = 'Input_Layer')
    x = input

    # Defining the encoder structure
    for i in range(n_hidden-1):
        x = tf.keras.layers.Dense(layers_dims[i], activation = activation_function, kernel_initializer = initializer, name = f'Encoder_{i+1}')(x)
    # Defining the last hidden layer -> latent space
    x = tf.keras.layers.Dense(layers_dims[-1], activation = activation_function, kernel_initializer = initializer, name = 'Encoder_Output')(x)

    # Defining that the encoder output will be equal to the decoder input, that is equal to x for now
    encoder_output = decoder_input = x

    # Defining the decoder structure
    for i in range(len(layers_dims)-1, 0, -1):
        x = tf.keras.layers.Dense(layers_dims[i], activation = activation_function, kernel_initializer = initializer, name = f'Decoder_{len(layers_dims)-i}')(x)
    # Defining the last hidden layer -> output
    output = tf.keras.layers.Dense(globals()['train_X'].shape[1], activation = activation_function, kernel_initializer = initializer, name = 'Decoder_Output')(x)

    # Splitting also the encoder and decoder structures
    encoder = tf.keras.Model(input, encoder_output, name = 'Encoder')
    decoder = tf.keras.Model(decoder_input, output, name = 'Decoder')

    # Defining our autoencoder
    autoencoder = tf.keras.Model(input, decoder(encoder(input)), name = 'Autoencoder')

    # Compiling the model
    autoencoder.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate), loss = 'mse', metrics = [tf.keras.metrics.RootMeanSquaredError()])

    return autoencoder

## Step 3 - Using tuner to tune the autoencoder

In [11]:
tuner = kt.BayesianOptimization(tune_autoencoder,
                    objective = 'val_loss',
                    max_trials = 250 , 
                    directory = 'AutoML_Experiments',
                    project_name = 'Initial_Trial',
                    overwrite = True
                    )

# Defining a callback that stops the search if the results aren't improving
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    min_delta = 0.0001,
    patience = 20,
    verbose = 1, 
    mode = 'min',
    restore_best_weights = True)
# Defining a callback that saves our model
cp = tf.keras.callbacks.ModelCheckpoint(filepath = 'Best_Model/best_model.h5',
                                mode = 'min', monitor = 'val_loss', verbose = 2 , save_best_only = True)

# Initializing the tuner search - that will basically iterate over a certain number of different combinations (defined in the tuner above)
tuner.search(globals()['train_X'], globals()['train_X'], epochs = 5, batch_size = globals()['batch_size'], validation_split = 0.1, callbacks = [early_stop])

# Printing a summary with the results obtained during the tuning process
tuner.results_summary()

Trial 250 Complete [00h 00m 17s]
val_loss: 0.00883884821087122

Best val_loss So Far: 0.0013835941208526492
Total elapsed time: 01h 18m 17s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in AutoML_Experiments\Initial_Trial
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x0000023DCBC6D150>
Trial summary
Hyperparameters:
Hidden_Layers: 3
drop_0-1: 1.3239958845750421
drop_1-2: 1.2
drop_2-3: 1.4523714176020224
Activation_Function: elu
Batch_Size: 32
Learning_Rate: 0.01
drop_3-4: 1.4438585360092966
drop_4-5: 1.2783384428661924
drop_5-6: 1.2
drop_6-7: 1.4400459052613928
Score: 0.0013835941208526492
Trial summary
Hyperparameters:
Hidden_Layers: 3
drop_0-1: 1.3013252027816042
drop_1-2: 1.403428164435384
drop_2-3: 1.6515322097239973
Activation_Function: elu
Batch_Size: 48
Learning_Rate: 0.01
drop_3-4: 1.5595235898578412
drop_4-5: 1.5515910489498532
drop_5-6: 1.434441899698823
drop_6-7: 1.4939991185858474
Score: 0.0016921275528147817
Trial summary
Hyperpa

In [21]:
## RETRIEVING THE BEST MODEL

# Getting the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)


{'Hidden_Layers': 3, 'drop_0-1': 1.3239958845750421, 'drop_1-2': 1.2, 'drop_2-3': 1.4523714176020224, 'Activation_Function': 'elu', 'Batch_Size': 32, 'Learning_Rate': 0.01, 'drop_3-4': 1.4438585360092966, 'drop_4-5': 1.2783384428661924, 'drop_5-6': 1.2, 'drop_6-7': 1.4400459052613928}


In [22]:
# Retrieving the best model
model = tuner.hypermodel.build(best_hps)
model.summary()
# Final fitting of the model
history = model.fit(globals()['train_X'], globals()['train_X'], epochs = 100, batch_size = best_hps.values['Batch_Size'], validation_split = 0.1, callbacks = [early_stop, cp]).history

Model: "Autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input_Layer (InputLayer)    [(None, 48)]              0         
                                                                 
 Encoder (Functional)        (None, 20)                3494      
                                                                 
 Decoder (Functional)        (None, 48)                2538      
                                                                 
Total params: 6,032
Trainable params: 6,032
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 1: val_loss did not improve from 0.00079
Epoch 2/100
Epoch 2: val_loss did not improve from 0.00079
Epoch 3/100
Epoch 3: val_loss did not improve from 0.00079
Epoch 4/100
Epoch 4: val_loss did not improve from 0.00079
Epoch 5/100
Epoch 5: val_loss did not improve from 0.00079
Epoch 6/100
Epoch 6: v

In [14]:
# Retrieving the encoder model - what actually matters for Dimensionality Reduction
encoder = tf.keras.Model(model.input, model.layers[-2].output)
encoder.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input_Layer (InputLayer)    [(None, 48)]              0         
                                                                 
 Encoder_1 (Dense)           (None, 36)                1764      
                                                                 
 Encoder_2 (Dense)           (None, 30)                1110      
                                                                 
 Encoder_Output (Dense)      (None, 20)                620       
                                                                 
Total params: 3,494
Trainable params: 3,494
Non-trainable params: 0
_________________________________________________________________


## Step 4 - Proceeding with Dimensionality Reduction study and comparison

In [23]:
# Getting the encoder reduced data 
encoder_reduced_train = encoder.predict(globals()['train_X'])
encoder_reduced_test = encoder.predict(test_X)

  1/300 [..............................] - ETA: 4s



In [24]:
# Importing all the classifiers to be used
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

# Importing metrics
from sklearn.metrics import f1_score

In [25]:
# Loading each classifier (with their default hyperparameters)
svm = SVC()
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
nb = GaussianNB()
log_reg = LogisticRegression()

In [26]:
# Fitting and applying all classifiers to the original sized dataset
svm.fit(globals()['train_X'], train_y)
rf.fit(globals()['train_X'], train_y)
gb.fit(globals()['train_X'], train_y)
nb.fit(globals()['train_X'], train_y)
log_reg.fit(globals()['train_X'], train_y)

# Predicting the results of the test set
y_pred_svm = svm.predict(test_X)
y_pred_rf = rf.predict(test_X)
y_pred_gb = gb.predict(test_X)
y_pred_nb = nb.predict(test_X)
y_pred_log_reg = log_reg.predict(test_X)

# Calculating the metrics for each classifier
f1_svm = f1_score(test_y, y_pred_svm, average = 'weighted')
f1_rf = f1_score(test_y, y_pred_rf, average = 'weighted')
f1_gb = f1_score(test_y, y_pred_gb, average = 'weighted')
f1_nb = f1_score(test_y, y_pred_nb, average = 'weighted')
f1_log_reg = f1_score(test_y, y_pred_log_reg, average = 'weighted')

# Storing the metrics under a dataframe
metrics = pd.DataFrame(columns = ['N_vars', 'SVM', 'RF', 'GB', 'NB', 'LogReg'])
metrics.loc[0] = ['All', f1_svm, f1_rf, f1_gb, f1_nb, f1_log_reg]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [27]:
# Fitting and applying all classifiers to the reduced dataset
svm.fit(encoder_reduced_train, train_y)
rf.fit(encoder_reduced_train, train_y)
gb.fit(encoder_reduced_train, train_y)
nb.fit(encoder_reduced_train, train_y)
log_reg.fit(encoder_reduced_train, train_y)

# Predicting the results of the test set
y_pred_svm_encoder = svm.predict(encoder_reduced_test)
y_pred_rf_encoder = rf.predict(encoder_reduced_test)
y_pred_gb_encoder = gb.predict(encoder_reduced_test)
y_pred_nb_encoder = nb.predict(encoder_reduced_test)
y_pred_log_reg_encoder = log_reg.predict(encoder_reduced_test)

# Calculating the metrics for each classifier
f1_svm_encoder = f1_score(test_y, y_pred_svm_encoder, average = 'weighted')
f1_rf_encoder = f1_score(test_y, y_pred_rf_encoder, average = 'weighted')
f1_gb_encoder = f1_score(test_y, y_pred_gb_encoder, average = 'weighted')
f1_nb_encoder = f1_score(test_y, y_pred_nb_encoder, average = 'weighted')
f1_log_reg_encoder = f1_score(test_y, y_pred_log_reg_encoder, average = 'weighted')

# Storing the metrics under a dataframe
metrics.loc[1] = [encoder.output.shape[1], f1_svm_encoder, f1_rf_encoder, f1_gb_encoder, f1_nb_encoder, f1_log_reg_encoder]

In [28]:
metrics

Unnamed: 0,N_vars,SVM,RF,GB,NB,LogReg
0,All,0.978157,0.987704,0.990669,0.978652,0.983199
1,20.0,0.978912,0.98582,0.983163,0.972681,0.977353
