# Random forest 8 emotions

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

Load data and scale it
Remove highly correlated features
Create train and test splits

In [2]:
# data loading
dataset = pd.read_csv('../dataset/labeled_8_combined_dataset.csv', index_col=0)

data = dataset.iloc[:, :-4]
labels = dataset.iloc[:, -1]
unique_labels = np.unique(labels)

# scaling the data
data_scaled = StandardScaler().fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=data.columns)

# creating train and test splits
data_train, data_test, labels_train, labels_test = train_test_split(data_scaled, labels, test_size=0.1, random_state=0)

# ECG

Extract only ecg related features

In [3]:
ecg_columns = [col for col in data.columns if 'ECG' in col or 'HRV' in col]

ecg_data_train = data_train[ecg_columns]
ecg_data_test = data_test[ecg_columns]

Define the model and train it

In [4]:
ecg_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [5]:
ecg_rfc.fit(ecg_data_train, labels_train)
ecg_predictions = ecg_rfc.predict(ecg_data_test)

In [6]:
ecg_cr = classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1389    0.0588    0.0826        85
   Neutralna     0.2690    0.5000    0.3498       198
  Odprezenie     0.2353    0.1026    0.1429       117
      Radosc     0.1728    0.1830    0.1778       153
      Smutek     0.0000    0.0000    0.0000        77
      Wstret     0.2500    0.3626    0.2960       182
  Zdziwienie     0.1500    0.0588    0.0845       102
       Zlosc     0.1652    0.1329    0.1473       143

    accuracy                         0.2223      1057
   macro avg     0.1727    0.1748    0.1601      1057
weighted avg     0.1925    0.2223    0.1928      1057



# EDA

Extract only ecg related features

In [7]:
eda_columns = [col for col in data.columns if 'EDA' in col or 'SCR' in col]

eda_data_train = data_train[eda_columns]
eda_data_test = data_test[eda_columns]

Define the model and train it

In [8]:
eda_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [9]:
eda_rfc.fit(eda_data_train, labels_train)
eda_predictions = eda_rfc.predict(eda_data_test)

In [10]:
eda_cr = classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1509    0.0941    0.1159        85
   Neutralna     0.3274    0.4646    0.3841       198
  Odprezenie     0.1923    0.1282    0.1538       117
      Radosc     0.1538    0.1961    0.1724       153
      Smutek     0.1081    0.0519    0.0702        77
      Wstret     0.2327    0.2582    0.2448       182
  Zdziwienie     0.1013    0.0784    0.0884       102
       Zlosc     0.2424    0.2238    0.2327       143

    accuracy                         0.2233      1057
   macro avg     0.1886    0.1869    0.1828      1057
weighted avg     0.2075    0.2233    0.2105      1057



# Combined

In [11]:
rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [12]:
rfc.fit(data_train, labels_train)
predictions = rfc.predict(data_test)

In [13]:
cr = classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1667    0.0941    0.1203        85
   Neutralna     0.3000    0.4848    0.3707       198
  Odprezenie     0.2714    0.1624    0.2032       117
      Radosc     0.1390    0.1699    0.1529       153
      Smutek     0.0769    0.0130    0.0222        77
      Wstret     0.2546    0.3022    0.2764       182
  Zdziwienie     0.1132    0.0588    0.0774       102
       Zlosc     0.2200    0.2308    0.2253       143

    accuracy                         0.2308      1057
   macro avg     0.1927    0.1895    0.1810      1057
weighted avg     0.2099    0.2308    0.2109      1057



# Summary

In [14]:
print(f'''
  Accuracy:
ECG:\t{ecg_cr['accuracy']:.4f}
EDA:\t{eda_cr['accuracy']:.4f}
Both:\t{cr['accuracy']:.4f}
  Average F1:
ECG:\t{ecg_cr['macro avg']['f1-score']:.4f}
EDA:\t{eda_cr['macro avg']['f1-score']:.4f}
Both:\t{cr['macro avg']['f1-score']:.4f}
  Weighted F1:
ECG:\t{ecg_cr['weighted avg']['f1-score']:.4f}
EDA:\t{eda_cr['weighted avg']['f1-score']:.4f}
Both:\t{cr['weighted avg']['f1-score']:.4f}
''')


  Accuracy:
ECG:	0.2223
EDA:	0.2233
Both:	0.2308
  Average F1:
ECG:	0.1601
EDA:	0.1828
Both:	0.1810
  Weighted F1:
ECG:	0.1928
EDA:	0.2105
Both:	0.2109

