In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [52]:
def get_score(true_values, predicted_values):
    mae_ = mean_absolute_error(true_values, predicted_values)
    mse_ = mean_squared_error(true_values, predicted_values)
    r2_ = r2_score(true_values, predicted_values)

    return mae_, mse_, r2_

# Loading data

### All values (positive, negative, neutral)

In [53]:
dataset = pd.read_csv('dataset.csv')

In [54]:
data = dataset.iloc[:, :-2]
labels = dataset.iloc[:, -2:]

data['COND'].replace(['-', '0', '+'], [-1, 0, 1], inplace=True)

In [55]:
data_train_full, data_test, labels_train_full, labels_test = train_test_split(data, labels, test_size=0.2, random_state=123)

data_train, data_validation, labels_train, labels_validation = train_test_split(data_train_full, labels_train_full, test_size=0.2, random_state=123)

data_train = data_train.loc[labels_train['ANS_AROUSAL'] != 1]
data_train = data_train.loc[labels_train['ANS_AROUSAL'] != 5]
data_train = data_train.loc[labels_train['ANS_AROUSAL'] != 9]
labels_train = labels_train.loc[labels_train['ANS_AROUSAL'] != 1]
labels_train = labels_train.loc[labels_train['ANS_AROUSAL'] != 5]
labels_train = labels_train.loc[labels_train['ANS_AROUSAL'] != 9]

data_train = data_train.loc[labels_train['ANS_VALENCE'] != 1]
data_train = data_train.loc[labels_train['ANS_VALENCE'] != 5]
data_train = data_train.loc[labels_train['ANS_VALENCE'] != 9]
labels_train = labels_train.loc[labels_train['ANS_VALENCE'] != 1]
labels_train = labels_train.loc[labels_train['ANS_VALENCE'] != 5]
labels_train = labels_train.loc[labels_train['ANS_VALENCE'] != 9]

### Values without neutral

In [56]:
dataset_nn = dataset.loc[dataset['COND'] != '0']

data_nn = dataset_nn.iloc[:, :-2]
labels_nn = dataset_nn.iloc[:, -2:]

data_nn['COND'].replace(['-', '0', '+'], [-1, 0, 1], inplace=True)

In [57]:
data_train_full_nn, data_test_nn, labels_train_full_nn, labels_test_nn = train_test_split(data_nn, labels_nn, test_size=0.2, random_state=123)

data_train_nn, data_validation_nn, labels_train_nn, labels_validation_nn = train_test_split(data_train_full_nn, labels_train_full_nn, test_size=0.2, random_state=123)

data_train_nn = data_train_nn.loc[labels_train_nn['ANS_AROUSAL'] != 1]
data_train_nn = data_train_nn.loc[labels_train_nn['ANS_AROUSAL'] != 5]
data_train_nn = data_train_nn.loc[labels_train_nn['ANS_AROUSAL'] != 9]
labels_train_nn = labels_train_nn.loc[labels_train_nn['ANS_AROUSAL'] != 1]
labels_train_nn = labels_train_nn.loc[labels_train_nn['ANS_AROUSAL'] != 5]
labels_train_nn = labels_train_nn.loc[labels_train_nn['ANS_AROUSAL'] != 9]

data_train_nn = data_train_nn.loc[labels_train_nn['ANS_VALENCE'] != 1]
data_train_nn = data_train_nn.loc[labels_train_nn['ANS_VALENCE'] != 5]
data_train_nn = data_train_nn.loc[labels_train_nn['ANS_VALENCE'] != 9]
labels_train_nn = labels_train_nn.loc[labels_train_nn['ANS_VALENCE'] != 1]
labels_train_nn = labels_train_nn.loc[labels_train_nn['ANS_VALENCE'] != 5]
labels_train_nn = labels_train_nn.loc[labels_train_nn['ANS_VALENCE'] != 9]

# ANS_AROUSAL FaceAPI only

In [58]:
data_train.iloc[:, :8].head()

Unnamed: 0,ANGER,CONTEMPT,DISGUST,FEAR,HAPPINESS,NEUTRAL,SADNESS,SURPRISE
44330,0.0,0.0,0.0,0.0,0.0,0.982,0.017,0.0
62965,0.0,0.0,0.0,0.0,0.0,0.998,0.001,0.0
28189,0.0,0.0,0.0,0.0,0.0,0.998,0.0,0.002
36095,0.0,0.0,0.0,0.0,0.0,0.929,0.071,0.0
31155,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [59]:
rfr = RandomForestRegressor(random_state=0)

rfr.fit(data_train.iloc[:, :8], labels_train.iloc[:, 1])
print('Training finished')

Training finished


In [60]:
predictions = rfr.predict(data_validation.iloc[:, :8])

mae, mse, r2 = get_score(labels_validation.iloc[:, 1], predictions)
print(f'''Values for validation set:\nMAE: {mae}\nMSE: {mse}\nRMSE: {mse**.5}\nR2:  {r2}''')

Values for validation set:
MAE: 1.7163781625831083
MSE: 4.652008127780545
RMSE: 2.1568514385048743
R2:  -0.0818242475048283


# ANS_AROUSAL FaceAPI only no neutral values

In [61]:
rfr_nn = RandomForestRegressor(random_state=0)

rfr_nn.fit(data_train_nn.iloc[:, :8], labels_train_nn.iloc[:, 1])
print('Training finished')

Training finished


In [62]:
predictions_nn = rfr_nn.predict(data_validation_nn.iloc[:, :8])

mae_nn, mse_nn, r2_nn = get_score(labels_validation_nn.iloc[:, 1], predictions_nn)
print(f'''Values for validation set:\nMAE: {mae_nn}\nMSE: {mse_nn}\nRMSE: {mse_nn**.5}\nR2:  {r2_nn}''')

Values for validation set:
MAE: 1.6567921005346171
MSE: 4.3473772459589135
RMSE: 2.0850365095026304
R2:  -0.09282751036062264


# ANS_AROUSAL FaceAPI + Personality

In [63]:
data_train.iloc[:, :-1].head()

Unnamed: 0,ANGER,CONTEMPT,DISGUST,FEAR,HAPPINESS,NEUTRAL,SADNESS,SURPRISE,OPENNESS,CONSCIENTIOUSNESS,NEUROTICISM,AGREEABLENESS,EXTRAVERSION
44330,0.0,0.0,0.0,0.0,0.0,0.982,0.017,0.0,3.0,6.0,4.0,7.0,7.0
62965,0.0,0.0,0.0,0.0,0.0,0.998,0.001,0.0,7.0,5.0,3.0,6.0,5.0
28189,0.0,0.0,0.0,0.0,0.0,0.998,0.0,0.002,3.0,10.0,5.0,7.0,7.0
36095,0.0,0.0,0.0,0.0,0.0,0.929,0.071,0.0,7.0,10.0,5.0,4.0,7.0
31155,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6.0,4.0,7.0,8.0,5.0


In [64]:
rfr_pers = RandomForestRegressor(random_state=0)

rfr_pers.fit(data_train.iloc[:, :-1], labels_train.iloc[:, 1])
print('Training finished')

Training finished


In [65]:
predictions_pers = rfr_pers.predict(data_validation.iloc[:, :-1])

mae_pers, mse_pers, r2_pers = get_score(labels_validation.iloc[:, :-1], predictions_pers)
print(f'''Values for validation set:\nMAE: {mae_pers}\nMSE: {mse_pers}\nRMSE: {mse_pers**.5}\nR2:  {r2_pers}''')

Values for validation set:
MAE: 2.449599635355488
MSE: 8.553965762014426
RMSE: 2.9247163558222917
R2:  -0.26400718318493266


# Both ANS FaceAPI only

In [66]:
rfr_both = RandomForestRegressor(random_state=0)

rfr_both.fit(data_train.iloc[:, :8], labels_train)
print('Training finished')

Training finished


In [67]:
predictions_both = rfr_both.predict(data_validation.iloc[:, :8])

mae_both, mse_both, r2_both = get_score(labels_validation, predictions_both)
print(f'''Values for validation set:\nMAE: {mae_both}\nMSE: {mse_both}\nRMSE: {mse_both**.5}\nR2:  {r2_both}''')

Values for validation set:
MAE: 1.9940882821772499
MSE: 5.948592111735042
RMSE: 2.4389735774983383
R2:  -0.07581322668626733


# Both ANS FaceAPI only no neutral values

In [68]:
rfr_nn_both = RandomForestRegressor(random_state=0)

rfr_nn_both.fit(data_train_nn.iloc[:, :8], labels_train_nn)
print('Training finished')

Training finished


In [69]:
predictions_nn_both = rfr_nn_both.predict(data_validation_nn.iloc[:, :8])

mae_nn_both, mse_nn_both, r2_nn_both = get_score(labels_validation_nn, predictions_nn_both)
print(f'''Values for validation set:\nMAE: {mae_nn_both}\nMSE: {mse_nn_both}\nRMSE: {mse_nn_both**.5}\nR2:  {r2_nn_both}''')

Values for validation set:
MAE: 2.0572041108162797
MSE: 6.236241848074122
RMSE: 2.4972468536518613
R2:  -0.09234677571275218


# Both ANS FaceAPI + Personality

In [70]:
rfr_pers_both = RandomForestRegressor(random_state=0)

rfr_pers_both.fit(data_train.iloc[:, :-1], labels_train)
print('Training finished')

Training finished


In [71]:
predictions_pers_both = rfr_pers_both.predict(data_validation.iloc[:, :-1])

mae_pers_both, mse_pers_both, r2_pers_both = get_score(labels_validation, predictions_pers_both)
print(f'''Values for validation set:\nMAE: {mae_pers_both}\nMSE: {mse_pers_both}\nRMSE: {mse_pers_both**.5}\nR2:  {r2_pers_both}''')

Values for validation set:
MAE: 1.9804207840623045
MSE: 6.1589296623328575
RMSE: 2.481719094162927
R2:  -0.11008669070903598


# Both ANS FaceAPI + Personality no neutral

In [72]:
rfr_pers_nn_both = RandomForestRegressor(random_state=0)

rfr_pers_nn_both.fit(data_train_nn.iloc[:, :-1], labels_train_nn)
print('Training finished')

Training finished


In [73]:
predictions_pers_nn_both = rfr_pers_nn_both.predict(data_validation_nn.iloc[:, :-1])

mae_pers_nn_both, mse_pers_nn_both, r2_pers_nn_both = get_score(labels_validation_nn, predictions_nn_both)
print(f'''Values for validation set:\nMAE: {mae_pers_nn_both}\nMSE: {mse_pers_nn_both}\nRMSE: {mse_pers_nn_both**.5}\nR2:  {r2_pers_nn_both}''')

Values for validation set:
MAE: 2.0572041108162797
MSE: 6.236241848074122
RMSE: 2.4972468536518613
R2:  -0.09234677571275218
