In [56]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import QuantileTransformer, MinMaxScaler
import tensorflow as tf
from pathlib import Path
from time import strftime

In [2]:
%load_ext tensorboard
%tensorboard --logdir=./my_logs
print('Tensorboard launched at localhost:6006')

Launching TensorBoard...

Tensorboard launched at localhost:6006


In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [27]:
def get_run_dir(root='my_logs'):
    return Path(root) / strftime('run_%Y_%m_%d_%H_%M_%S')

In [28]:
def get_score(true_values, predicted_values):
    mae_ = mean_absolute_error(true_values, predicted_values)
    mse_ = mean_squared_error(true_values, predicted_values)
    r2_ = r2_score(true_values, predicted_values)

    return mae_, mse_, r2_

# Loading data

In [29]:
dataset = pd.read_csv('dataset_all_features.csv')

In [30]:
dataset = dataset.loc[dataset['COND'] != '0']

data = dataset.iloc[:, :-3]
labels = dataset.iloc[:, -2:]

# Deleting categorical features

In [31]:
data.drop(columns=['FACEATTRIBUTES-BLUR-BLURLEVEL', 'FACEATTRIBUTES-EXPOSURE-EXPOSURELEVEL', 'FACEATTRIBUTES-GENDER',
                   'FACEATTRIBUTES-GLASSES', 'FACEATTRIBUTES-HAIR-INVISIBLE', 'FACEATTRIBUTES-MAKEUP-EYEMAKEUP',
                   'FACEATTRIBUTES-MAKEUP-LIPMAKEUP', 'FACEATTRIBUTES-NOISE-NOISELEVEL',
                   'FACEATTRIBUTES-ACCESSORIES', 'FACEID'],
          inplace=True)

# Splitting data

In [32]:
data_train_full, data_test, labels_train_full, labels_test = train_test_split(data, labels, test_size=0.2, random_state=123)

data_train, data_validation, labels_train, labels_validation = train_test_split(data_train_full, labels_train_full, test_size=0.2, random_state=123)

data_train = data_train.loc[labels_train['ANS_AROUSAL'] != 1]
data_train = data_train.loc[labels_train['ANS_AROUSAL'] != 5]
data_train = data_train.loc[labels_train['ANS_AROUSAL'] != 9]
labels_train = labels_train.loc[labels_train['ANS_AROUSAL'] != 1]
labels_train = labels_train.loc[labels_train['ANS_AROUSAL'] != 5]
labels_train = labels_train.loc[labels_train['ANS_AROUSAL'] != 9]

data_train = data_train.loc[labels_train['ANS_VALENCE'] != 1]
data_train = data_train.loc[labels_train['ANS_VALENCE'] != 5]
data_train = data_train.loc[labels_train['ANS_VALENCE'] != 9]
labels_train = labels_train.loc[labels_train['ANS_VALENCE'] != 1]
labels_train = labels_train.loc[labels_train['ANS_VALENCE'] != 5]
labels_train = labels_train.loc[labels_train['ANS_VALENCE'] != 9]

# Scaling values to range 0 - 1

In [33]:
mms = MinMaxScaler()

data_train_mms = mms.fit_transform(data_train)
data_train_mms = pd.DataFrame(data_train_mms, columns=data_train.columns)
data_validation_mms = mms.fit_transform(data_validation)
data_validation_mms = pd.DataFrame(data_validation_mms, columns=data_validation.columns)
data_test_mms = mms.fit_transform(data_test)
data_test_mms = pd.DataFrame(data_test_mms, columns=data_test.columns)

# Scaling input values to Gaussian distributions

In [34]:
quantile_transformer = QuantileTransformer(output_distribution='normal', random_state=0)

data_train_qt = quantile_transformer.fit_transform(data_train_mms)
data_train_qt = pd.DataFrame(data_train_qt, columns=data_train_mms.columns)
data_validation_qt = quantile_transformer.fit_transform(data_validation_mms)
data_validation_qt = pd.DataFrame(data_validation_qt, columns=data_validation_mms.columns)
data_test_qt = quantile_transformer.fit_transform(data_test_mms)
data_test_qt = pd.DataFrame(data_test_qt, columns=data_test_mms.columns)

# Training

In [110]:
tf.random.set_seed(123)

n_inputs = data_train_qt.shape[1]

def relu_max(x):
    return tf.keras.activations.relu(x, max_value=9)

model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=n_inputs),
        tf.keras.layers.Dense(n_inputs, activation='relu'),
        tf.keras.layers.Dense(n_inputs, activation='relu'),
        tf.keras.layers.Dense(2, activation=relu_max)
    ]
)

model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_48 (Dense)            (None, 89)                8010      
                                                                 
 dense_49 (Dense)            (None, 89)                8010      
                                                                 
 dense_50 (Dense)            (None, 2)                 180       
                                                                 
Total params: 16,200
Trainable params: 16,200
Non-trainable params: 0
_________________________________________________________________


In [111]:
model.compile(loss=tf.keras.losses.MeanSquaredError(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=[tf.keras.metrics.MeanAbsoluteError()])

In [112]:
n_epochs = 50

run_dir = get_run_dir()
tensorboard_cb = tf.keras.callbacks.TensorBoard(run_dir, profile_batch=(0, n_epochs))

history = model.fit(data_train_qt, labels_train, epochs=n_epochs, batch_size= 50,
                    validation_data=(data_validation_qt, labels_validation), callbacks=[tensorboard_cb])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [113]:
predictions = model.predict(data_test_qt)

mae, mse, r2 = get_score(labels_test, predictions)
print(f'''\nValues for validation set:\nMAE: {mae}\nMSE: {mse}\nRMSE: {mse**.5}\nR2:  {r2}''')


Values for validation set:
MAE: 1.9643370734574488
MSE: 6.0689975445280435
RMSE: 2.4635335484884395
R2:  -0.03127502821219097


# Overfitting?

In [19]:
tf.random.set_seed(123)

norm_layer = tf.keras.layers.Normalization()

input_ = tf.keras.layers.Input(shape=n_inputs)
normalized = norm_layer(input_)
output = tf.keras.layers.Dense(2)(normalized)

model = tf.keras.Model(inputs=[input_], outputs=[output])

model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 89)]              0         
                                                                 
 normalization_2 (Normalizat  (None, 89)               179       
 ion)                                                            
                                                                 
 dense_5 (Dense)             (None, 2)                 180       
                                                                 
Total params: 359
Trainable params: 180
Non-trainable params: 179
_________________________________________________________________


In [20]:
model.compile(loss='mse',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=['mae'])

In [21]:
norm_layer.adapt(data_train_qt)

In [22]:
n_epochs = 50

# run_dir = get_run_dir()
# tensorboard_cb = tf.keras.callbacks.TensorBoard(run_dir, profile_batch=(0, n_epochs))

model.fit(data_train_qt, labels_train, epochs=n_epochs, validation_data=(data_validation_qt, labels_validation))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f66f478cb20>

In [23]:
predictions = model.predict(data_validation_qt)

mae, mse, r2 = get_score(labels_validation, predictions)
print(f'''\nValues for validation set:\nMAE: {mae}\nMSE: {mse}\nRMSE: {mse**.5}\nR2:  {r2}''')


Values for validation set:
MAE: 1.9900330094556329
MSE: 5.703879559937679
RMSE: 2.388279623481656
R2:  0.0006626172185835166
