# Random forest 6 emotions

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

Load data and scale it
Remove highly correlated features
Create train and test splits

In [2]:
# data loading
dataset = pd.read_csv('../dataset/labeled_6_combined_dataset.csv', index_col=0)

data = dataset.iloc[:, :-4]
labels = dataset.iloc[:, -1]
unique_labels = np.unique(labels)

# scaling the data
data_scaled = StandardScaler().fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=data.columns)

# removing correlated features
corr = data.corr().abs()
upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
data.drop(to_drop, axis=1, inplace=True)

# creating train and test splits
data_train, data_test, labels_train, labels_test = train_test_split(data_scaled, labels, test_size=0.1, random_state=0)

# ECG

Extract only ecg related features

In [3]:
ecg_columns = [col for col in data.columns if 'ECG' in col or 'HRV' in col]

ecg_data_train = data_train[ecg_columns]
ecg_data_test = data_test[ecg_columns]

Define the model and train it

In [4]:
ecg_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [5]:
ecg_rfc.fit(ecg_data_train, labels_train)
ecg_predictions = ecg_rfc.predict(ecg_data_test)

In [6]:
ecg_cr = classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, ecg_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1818    0.0522    0.0811       115
   Neutralna     0.3050    0.6397    0.4131       247
  Odprezenie     0.3231    0.1273    0.1826       165
      Radosc     0.2165    0.1313    0.1634       160
      Smutek     0.2424    0.1060    0.1475       151
       Zlosc     0.2662    0.3379    0.2978       219

    accuracy                         0.2800      1057
   macro avg     0.2558    0.2324    0.2142      1057
weighted avg     0.2640    0.2800    0.2414      1057



# EDA

Extract only ecg related features

In [7]:
eda_columns = [col for col in data.columns if 'EDA' in col or 'SCR' in col]

eda_data_train = data_train[eda_columns]
eda_data_test = data_test[eda_columns]

Define the model and train it

In [8]:
eda_rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [9]:
eda_rfc.fit(eda_data_train, labels_train)
eda_predictions = eda_rfc.predict(eda_data_test)

In [10]:
eda_cr = classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, eda_predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1529    0.1130    0.1300       115
   Neutralna     0.3844    0.5992    0.4684       247
  Odprezenie     0.2475    0.1515    0.1880       165
      Radosc     0.1565    0.1437    0.1498       160
      Smutek     0.1939    0.1258    0.1526       151
       Zlosc     0.2822    0.3105    0.2957       219

    accuracy                         0.2800      1057
   macro avg     0.2362    0.2406    0.2307      1057
weighted avg     0.2550    0.2800    0.2587      1057



# Combined

In [11]:
rfc = RandomForestClassifier(
    n_estimators=1000,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=0
)

In [12]:
rfc.fit(data_train, labels_train)
predictions = rfc.predict(data_test)

In [13]:
cr = classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4, output_dict=True)
print(classification_report(labels_test, predictions, target_names=unique_labels, zero_division=0, digits=4))

              precision    recall  f1-score   support

  Ekscytacja     0.1493    0.0870    0.1099       115
   Neutralna     0.3841    0.6640    0.4866       247
  Odprezenie     0.3188    0.1333    0.1880       165
      Radosc     0.1571    0.1375    0.1467       160
      Smutek     0.2222    0.1060    0.1435       151
       Zlosc     0.2695    0.3470    0.3034       219

    accuracy                         0.2933      1057
   macro avg     0.2502    0.2458    0.2297      1057
weighted avg     0.2671    0.2933    0.2606      1057



# Summary

In [14]:
print(f'''
\tAccuracy:
ECG:\t{ecg_cr['accuracy']}
EDA:\t{eda_cr['accuracy']}
Both:\t{cr['accuracy']}
\tAverage F1:
ECG:\t{ecg_cr['macro avg']['f1-score']}
EDA:\t{eda_cr['macro avg']['f1-score']}
Both:\t{cr['macro avg']['f1-score']}
\tWeighted F1:
ECG:\t{ecg_cr['weighted avg']['f1-score']}
EDA:\t{eda_cr['weighted avg']['f1-score']}
Both:\t{cr['weighted avg']['f1-score']}
''')


	Accuracy:
ECG:	0.2800378429517502
EDA:	0.2800378429517502
Both:	0.293282876064333
	Average F1:
ECG:	0.21423965913027673
EDA:	0.23073735073706292
Both:	0.22968813671405827
	Weighted F1:
ECG:	0.24135662256910165
EDA:	0.2586703179372227
Both:	0.2605891447043834

