# Random forest 8 emotions

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

Load data and scale it
Remove highly correlated features
Create train and test splits

In [2]:
# data loading
dataset = pd.read_csv('../dataset/labeled_8_combined_dataset.csv', index_col=0)

data = dataset.iloc[:, :-4]
labels = dataset.iloc[:, -1]
unique_labels = np.unique(labels)

# scaling the data
data_scaled = StandardScaler().fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=data.columns)

# removing correlated features
corr = data.corr().abs()
upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
data.drop(to_drop, axis=1, inplace=True)

# creating train and test splits
data_train, data_test, labels_train, labels_test = train_test_split(data_scaled, labels, test_size=0.1, random_state=0)

# ECG

Extract only ecg related features

In [3]:
ecg_columns = [col for col in data.columns if 'ECG' in col or 'HRV' in col]

ecg_data_train = data_train[ecg_columns]
ecg_data_test = data_test[ecg_columns]

Define the model and train it

In [4]:
ecg_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [5]:
ecg_rfc.fit(ecg_data_train, labels_train)
ecg_predictions = ecg_rfc.predict(ecg_data_test)

In [6]:
ecg_cr = classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1935    0.0706    0.1034        85
   Neutralna     0.2565    0.4949    0.3379       198
  Odprezenie     0.2955    0.1111    0.1615       117
      Radosc     0.1676    0.1895    0.1779       153
      Smutek     0.0556    0.0130    0.0211        77
      Wstret     0.2462    0.3571    0.2915       182
  Zdziwienie     0.1786    0.0490    0.0769       102
       Zlosc     0.1966    0.1608    0.1769       143

    accuracy                         0.2271      1057
   macro avg     0.1988    0.1808    0.1684      1057
weighted avg     0.2109    0.2271    0.1983      1057



# EDA

Extract only ecg related features

In [7]:
eda_columns = [col for col in data.columns if 'EDA' in col or 'SCR' in col]

eda_data_train = data_train[eda_columns]
eda_data_test = data_test[eda_columns]

Define the model and train it

In [8]:
eda_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [9]:
eda_rfc.fit(eda_data_train, labels_train)
eda_predictions = eda_rfc.predict(eda_data_test)

In [10]:
eda_cr = classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1731    0.1059    0.1314        85
   Neutralna     0.3333    0.4899    0.3967       198
  Odprezenie     0.1728    0.1197    0.1414       117
      Radosc     0.1223    0.1503    0.1349       153
      Smutek     0.0571    0.0260    0.0357        77
      Wstret     0.2621    0.2967    0.2784       182
  Zdziwienie     0.0833    0.0588    0.0690       102
       Zlosc     0.2348    0.2168    0.2255       143

    accuracy                         0.2233      1057
   macro avg     0.1799    0.1830    0.1766      1057
weighted avg     0.2023    0.2233    0.2077      1057



# Combined

In [11]:
rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [12]:
rfc.fit(data_train, labels_train)
predictions = rfc.predict(data_test)

In [13]:
cr = classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1667    0.0941    0.1203        85
   Neutralna     0.3000    0.4848    0.3707       198
  Odprezenie     0.2714    0.1624    0.2032       117
      Radosc     0.1390    0.1699    0.1529       153
      Smutek     0.0769    0.0130    0.0222        77
      Wstret     0.2546    0.3022    0.2764       182
  Zdziwienie     0.1132    0.0588    0.0774       102
       Zlosc     0.2200    0.2308    0.2253       143

    accuracy                         0.2308      1057
   macro avg     0.1927    0.1895    0.1810      1057
weighted avg     0.2099    0.2308    0.2109      1057



# Summary

In [14]:
print(f'''
\tAccuracy:
ECG:\t{ecg_cr['accuracy']}
EDA:\t{eda_cr['accuracy']}
Both:\t{cr['accuracy']}
\tAverage F1:
ECG:\t{ecg_cr['macro avg']['f1-score']}
EDA:\t{eda_cr['macro avg']['f1-score']}
Both:\t{cr['macro avg']['f1-score']}
\tWeighted F1:
ECG:\t{ecg_cr['weighted avg']['f1-score']}
EDA:\t{eda_cr['weighted avg']['f1-score']}
Both:\t{cr['weighted avg']['f1-score']}
''')


	Accuracy:
ECG:	0.22705771050141912
EDA:	0.22327341532639547
Both:	0.23084200567644275
	Average F1:
ECG:	0.16839533875712412
EDA:	0.17661390545823358
Both:	0.18104828930766337
	Weighted F1:
ECG:	0.19833041716314917
EDA:	0.2077474529395782
Both:	0.21089112895663728

