# Random forest 6 emotions

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

Load data and scale it
Remove highly correlated features
Create train and test splits

In [2]:
# data loading
dataset = pd.read_csv('../dataset/labeled_6_combined_dataset.csv', index_col=0)

data = dataset.iloc[:, :-4]
labels = dataset.iloc[:, -1]
unique_labels = np.unique(labels)

# scaling the data
data_scaled = StandardScaler().fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=data.columns)

# creating train and test splits
data_train, data_test, labels_train, labels_test = train_test_split(data_scaled, labels, test_size=0.1, random_state=0)

# ECG

Extract only ecg related features

In [3]:
ecg_columns = [col for col in data.columns if 'ECG' in col or 'HRV' in col]

ecg_data_train = data_train[ecg_columns]
ecg_data_test = data_test[ecg_columns]

Define the model and train it

In [4]:
ecg_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [5]:
ecg_rfc.fit(ecg_data_train, labels_train)
ecg_predictions = ecg_rfc.predict(ecg_data_test)

In [6]:
ecg_cr = classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1667    0.0609    0.0892       115
   Neutralna     0.3111    0.6235    0.4151       247
  Odprezenie     0.2941    0.1212    0.1717       165
      Radosc     0.2121    0.1313    0.1622       160
      Smutek     0.1972    0.0927    0.1261       151
       Zlosc     0.2695    0.3470    0.3034       219

    accuracy                         0.2763      1057
   macro avg     0.2418    0.2294    0.2113      1057
weighted avg     0.2529    0.2763    0.2389      1057



# EDA

Extract only ecg related features

In [7]:
eda_columns = [col for col in data.columns if 'EDA' in col or 'SCR' in col]

eda_data_train = data_train[eda_columns]
eda_data_test = data_test[eda_columns]

Define the model and train it

In [8]:
eda_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [9]:
eda_rfc.fit(eda_data_train, labels_train)
eda_predictions = eda_rfc.predict(eda_data_test)

In [10]:
eda_cr = classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1429    0.1043    0.1206       115
   Neutralna     0.3901    0.6032    0.4738       247
  Odprezenie     0.2475    0.1515    0.1880       165
      Radosc     0.1793    0.1625    0.1705       160
      Smutek     0.2079    0.1391    0.1667       151
       Zlosc     0.2951    0.3288    0.3110       219

    accuracy                         0.2886      1057
   macro avg     0.2438    0.2482    0.2384      1057
weighted avg     0.2633    0.2886    0.2672      1057



# Combined

In [11]:
rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [12]:
rfc.fit(data_train, labels_train)
predictions = rfc.predict(data_test)

In [13]:
cr = classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1493    0.0870    0.1099       115
   Neutralna     0.3841    0.6640    0.4866       247
  Odprezenie     0.3188    0.1333    0.1880       165
      Radosc     0.1571    0.1375    0.1467       160
      Smutek     0.2222    0.1060    0.1435       151
       Zlosc     0.2695    0.3470    0.3034       219

    accuracy                         0.2933      1057
   macro avg     0.2502    0.2458    0.2297      1057
weighted avg     0.2671    0.2933    0.2606      1057



# Summary

In [14]:
print(f'''
  Accuracy:
ECG:\t{ecg_cr['accuracy']:.4f}
EDA:\t{eda_cr['accuracy']:.4f}
Both:\t{cr['accuracy']:.4f}
  Average F1:
ECG:\t{ecg_cr['macro avg']['f1-score']:.4f}
EDA:\t{eda_cr['macro avg']['f1-score']:.4f}
Both:\t{cr['macro avg']['f1-score']:.4f}
  Weighted F1:
ECG:\t{ecg_cr['weighted avg']['f1-score']:.4f}
EDA:\t{eda_cr['weighted avg']['f1-score']:.4f}
Both:\t{cr['weighted avg']['f1-score']:.4f}
''')


  Accuracy:
ECG:	0.2763
EDA:	0.2886
Both:	0.2933
  Average F1:
ECG:	0.2113
EDA:	0.2384
Both:	0.2297
  Weighted F1:
ECG:	0.2389
EDA:	0.2672
Both:	0.2606

