In [None]:
# Defining random seeds to enable reproducibility
from numpy.random import seed
seed(1)

import tensorflow as tf
tf.random.set_seed(1)

import random
random.seed(1)
 
import pickle
import numpy as np
import keras_tuner
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import applications
from tensorflow.keras import optimizers
from tensorflow.keras.layers import InputLayer, Flatten, Dense
from tensorflow.python.client import device_lib
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

 
print(device_lib.list_local_devices())
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


def load_dataset():
    """
    Loads the datasets encoded in .pkl files and returns its decoded form.

    Returns
    -------
    list
        A list of n-dimensional arrays representing the subjects samples that will be used to \\
        train the drunkenness classification model.
    ndarray
        A n-dimensional array representing the samples labels.
    """

    print("Loading Sober-Drunk Face Dataset, from Patras University")
    
    # Defining the sample and label sets filenames
    sets = [
        "Insert the x_balanced.pkl file path here",
        "Insert the y_balanced.pkl file path here"   
    ]
    
    # Defining an empty list for storing the decoded dataset
    loaded_datasets = []
 
    # Iterating over the dataset files
    for set_ in sets:
        # Opening the .pkl file in read mode
        with open(set_, 'rb') as file:
            # Appending the decoded dataset to the dataset list
            loaded_datasets.append(pickle.load(file))
    
    # Unpacking the dataset list into individual subsets
    x, y = loaded_datasets
    
    # Converting the label list to the n-dimensional array format
    y_arr= np.array(y)
    
    # Printing the dataset length for sanity check
    print("\nSamples total: {0}".format(len(x)))   
    
    # Slicing the frame sequences of each subject for selecting the
    # frames sampled at each 5 Hz
    x = x[::5]
    y_arr = y_arr[::5]
    
    # Printing the dataset length after slicing for sanity check
    print("\nSamples total after slicing: {0}".format(len(x)))
    
    # Returning the samples set and its respective labels
    return x, y_arr


def min_max_norm(dataset):
    """
    Normalizes the keyframes according to the minimum-maximum norm, \\
    such that pixel values ranges from 0 to 1.

    Parameters
    ----------
    dataset : list
        A list of n-dimensional arrays representing the subjects keyframes.

    Returns
    -------
    ndarray
        A n-dimensional array representing keyframes with pixel values ranging from 0 to 1.
    """

    # Converting the dataset type from list to n-dimensional array
    dataset = np.asarray(dataset, dtype="int16")

    # Finding the keyframes minimum and maximum values
    x_min = dataset.min(axis=(1, 2), keepdims=True)
    x_max = dataset.max(axis=(1, 2), keepdims=True)

    # Applying the minimum-maximum norm to each keyframe
    norm_dataset = (dataset - x_min) / (x_max - x_min)

    # Printing the minimum and maximum values from a given sample for sanity check
    print("\nMinMax normalization")
    print("dataset shape: ", norm_dataset.shape)
    print("min: ", norm_dataset[0].min())
    print("max: ", norm_dataset[0].max())

    # Returning the normalized dataset
    return norm_dataset


class CVTuner(keras_tuner.engine.tuner.Tuner):
    """
    A custom keras tuner class for running the the stratified k-fold cross-validation \\
    method during the hyperparameter search process.

    Methods
    -------
    run_trial(trial, x, y)
        Defines a custom training loop to enable the k-fold cross-validation \\
        during the hyperparameter search.
    """

    def run_trial(self, trial, x, y):
        """
        Defines a custom training loop to enable the k-fold cross-validation \\
        during the hyperparameter search.

        Parameters
        ----------
        trial : Any
            A trial object from keras_tuner. This object contains information \\
            related to the current search execution, such as the hyperparameter \\
            values and the model performance.
        x : ndarray
            A n-dimensional array representing the training samples.
        y : ndarray
            A n-dimensional array representing the training samples labels.

        Returns
        -------
        dictionary
            The name of the objective function to track as the dictionary key and its \\
            respective value.

        Notes
        -----
        The Keras tuner approach for comparing trials is not the same method we used to evaluate the model, hence, \\
        after obtaining the best hyperparameter settings we retrained the model in order to verify its training behavior \\
        and to assess its classification performance. Check this GitHub discussion for more details regarding Keras tuner \\
        score calculation: https://github.com/keras-team/keras-tuner/discussions/581.
        """

        # Defining the stratified cross-validation folds
        folds = list(StratifiedKFold(n_splits=5, shuffle=False, random_state=None).split(x, y))

        # Defining the epochs hyperparameter search space
        epochs = trial.hyperparameters.Choice('epochs', values=[10, 20, 40, 60, 80, 100, 200, 300, 400, 500])
        
        # Defining the batch size hyperparameter search space
        batch_size = trial.hyperparameters.Int('batch_size', min_value=20, max_value=480, step=20)

        # Instantiating an empty list for storing the model classification performance on each
        # stratified cross-validation fold
        validation_accuracies = []
        
        # Defining the early stopping callback
        callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, restore_best_weights=True, verbose=1)

        # Iterating over the stratified cross-validation folds
        for j, (train_indices, valid_indices) in enumerate(folds):
            print('\nFold ',j)
            
            # building a keras sequential model with a set of hyperparameters randomly taken
            # from the search space
            model = self.hypermodel.build(trial.hyperparameters)

            # Defining the training and validation sets
            x_train, x_valid = x[train_indices], x[valid_indices]
            y_train, y_valid = y[train_indices], y[valid_indices]
            
            print("\nTraining with {0} samples and validating with {1} samples\n".format(len(x_train), len(x_valid)))

            # Fitting the model
            history = model.fit(x_train, y_train, 
                                validation_data=(x_valid, y_valid),
                                shuffle=False,
                                batch_size=batch_size, 
                                epochs=epochs,
                                callbacks=[callback])
            
            # Evaluating the model on validation data
            val_loss, val_acc = model.evaluate(x_valid, y_valid, verbose=1)
            # Appending the model classification performance to the validation accuracies list
            validation_accuracies.append(val_acc)

        # Returning the objective metric to track
        self.oracle.update_trial(trial.trial_id, {'avg_cv_accuracy': np.mean(validation_accuracies)})


def build_model(hp):
    """
    Builds a keras sequential model and returns it.

    Parameters
    ----------
    hp : Any
        Defines the hyperparameters to search while building the model.

    Global variables
    ----------------
    features_shape : ndarray
        A n-dimensional array describing the flattened feature maps shape.

    Returns
    -------
    Sequential
        A single layer binary classification model.
    """
    
    global features_shape
    
    # Defining a Sequential model
    model = models.Sequential()
    # Adding an input layer to receive the flattened feature array
    model.add(InputLayer(input_shape=features_shape, name="input"))
    # Adding an output layer to classify the received features as sober (0) or drunk (1)
    model.add(Dense(1, activation='sigmoid', name="output"))
    
    # Defining the optmization function
    learning_rate = hp.Choice("learning_rate", values=[1e-3, 1e-4, 1e-5, 1e-6])
    adam = optimizers.Adam(learning_rate=learning_rate)
    
    # Compiling the model
    model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=False),
                  optimizer=adam,
                  metrics=['accuracy'])
    
    # Printing the model summary
    model.summary()

    # Returning the sequential model
    return model


def feature_extraction(x_train):
    """
    Uses a pre-trained model to extract common features from facial thermal images. \\
    Such features will be used in the transfer learning step to train a binary \\
    classification model, which is expected to abstract new knowledge from these \\ 
    generic representations.

    Parameters
    ----------
    x_train : ndarray
        A n-dimensional array representing the training samples.
    
    Returns
    -------
    ndarray
        A n-dimensional array representing the flattened training feature maps.
    ndarray
        A n-dimensional array describing the flattened feature maps shape.
    """
    
    # Defining a VGG pre-trained model with new input shape and without dense layers
    vgg = applications.VGG16(weights='imagenet', include_top=False, input_shape=(128,160,3))

    # Defining the VGG model output as the last pooling layer feature maps
    output = vgg.layers[-1].output

    # Adding a Flatten layer to the output
    output = Flatten()(output)

    # Defining a functional model for feature extraction
    vgg_model = models.Model(vgg.input, output)

    # Changing the model 'trainable' parameter to False
    vgg_model.trainable = False

    print("\n")

    # Iterating over the model layers
    for layer in vgg_model.layers:
        # Freezing the layers weights
        layer.trainable = False
        # Printing the layers 'trainable' parameter for sanity check
        print("{}: {}".format(layer.name, layer.trainable))

    # Extracting the training features
    train_features = vgg_model.predict(x_train, verbose=1)

    print('\nTraining Bottleneck Features: {0}'.format(train_features.shape))
    print('\nModel output shape: {0}'.format(vgg_model.output_shape))

    # Defining the flattened fetaure maps shape used by the build_model function
    features_shape = train_features[0].shape

    # Returning the training features and its array shape
    return train_features, features_shape


# Loading the Sober-Drunk Dataset samples
x, y = load_dataset()

# Defining the training and test subsets using the random train-test split strategy
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1, stratify=y)

# Applying the min-max normalization
x_train = min_max_norm(x_train)
x_test = min_max_norm(x_test)

# Reshaping datsets to the tensor format (channel last)
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 3)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 3)

# Extracting features from the training subset
train_features, features_shape = feature_extraction(x_train)

# Defining the random search object
tuner = CVTuner(hypermodel=build_model, 
                oracle=keras_tuner.oracles.RandomSearch(objective=keras_tuner.Objective("avg_cv_accuracy", "max"),
                                                        max_trials=64,
                                                        seed=1,
                                                       ),
                directory=r'random search',
                project_name=r'base model')

# Running the random search
tuner.search(train_features, y_train)
# Printing all results out of the 64 trials
tuner.results_summary(64)