# Random forest 4 emotions

In [18]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

Load data and scale it
Remove highly correlated features
Create train and test splits

In [19]:
# data loading
dataset = pd.read_csv('../dataset/labeled_4_combined_dataset.csv', index_col=0)

data = dataset.iloc[:, :-4]
labels = dataset.iloc[:, -1]
unique_labels = np.unique(labels)

# scaling the data
data_scaled = StandardScaler().fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=data.columns)

# creating train and test splits
data_train, data_test, labels_train, labels_test = train_test_split(data_scaled, labels, test_size=0.1, random_state=0)

# ECG

Extract only ecg related features

In [20]:
ecg_columns = [col for col in data.columns if 'ECG' in col or 'HRV' in col]

ecg_data_train = data_train[ecg_columns]
ecg_data_test = data_test[ecg_columns]

Define the model and train it

In [21]:
ecg_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [22]:
ecg_rfc.fit(ecg_data_train, labels_train)
ecg_predictions = ecg_rfc.predict(ecg_data_test)

In [23]:
ecg_cr = classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.3137    0.3389    0.3258       298
  Odprezenie     0.4167    0.0777    0.1310       193
      Smutek     0.3582    0.6140    0.4524       329
       Zlosc     0.2593    0.1477    0.1882       237

    accuracy                         0.3340      1057
   macro avg     0.3369    0.2946    0.2743      1057
weighted avg     0.3341    0.3340    0.2988      1057



# EDA

Extract only ecg related features

In [24]:
eda_columns = [col for col in data.columns if 'EDA' in col or 'SCR' in col]

eda_data_train = data_train[eda_columns]
eda_data_test = data_test[eda_columns]

Define the model and train it

In [25]:
eda_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [26]:
eda_rfc.fit(eda_data_train, labels_train)
eda_predictions = eda_rfc.predict(eda_data_test)

In [27]:
eda_cr = classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.3000    0.3322    0.3153       298
  Odprezenie     0.2376    0.1244    0.1633       193
      Smutek     0.3492    0.4681    0.4000       329
       Zlosc     0.2973    0.2321    0.2607       237

    accuracy                         0.3141      1057
   macro avg     0.2960    0.2892    0.2848      1057
weighted avg     0.3033    0.3141    0.3016      1057



# Combined

In [28]:
rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [29]:
rfc.fit(data_train, labels_train)
predictions = rfc.predict(data_test)

In [30]:
cr = classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.3104    0.3490    0.3286       298
  Odprezenie     0.3800    0.0984    0.1564       193
      Smutek     0.3577    0.5653    0.4382       329
       Zlosc     0.2763    0.1772    0.2159       237

    accuracy                         0.3321      1057
   macro avg     0.3311    0.2975    0.2848      1057
weighted avg     0.3302    0.3321    0.3060      1057



# Summary

In [31]:
print(f'''
  Accuracy:
ECG:\t{ecg_cr['accuracy']:.4f}
EDA:\t{eda_cr['accuracy']:.4f}
Both:\t{cr['accuracy']:.4f}
  Average F1:
ECG:\t{ecg_cr['macro avg']['f1-score']:.4f}
EDA:\t{eda_cr['macro avg']['f1-score']:.4f}
Both:\t{cr['macro avg']['f1-score']:.4f}
  Weighted F1:
ECG:\t{ecg_cr['weighted avg']['f1-score']:.4f}
EDA:\t{eda_cr['weighted avg']['f1-score']:.4f}
Both:\t{cr['weighted avg']['f1-score']:.4f}
''')


  Accuracy:
ECG:	0.3340
EDA:	0.3141
Both:	0.3321
  Average F1:
ECG:	0.2743
EDA:	0.2848
Both:	0.2848
  Weighted F1:
ECG:	0.2988
EDA:	0.3016
Both:	0.3060

