In [None]:
#imports
import numpy as np
import pickle
from matplotlib import pyplot as plt
from scipy.stats import randint
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_recall_fscore_support,ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV

In [None]:
#load data here from SMHD Dataset embedded. Replace filenames.

with open('evaluation-x-smhd', 'rb') as file:
    evaluation_x = pickle.load(file)

with open('evaluation-y-smhd', 'rb') as file:
    evaluation_y = pickle.load(file)

with open('x-training-smhd', 'rb') as file:
    X_training = pickle.load(file)

with open('y-training-smhd', 'rb') as file:
    y_training = pickle.load(file) 

In [None]:
#create lists
x_train = []
for x in X_training:
    x_train.append(x)

x_test = []
for x in evaluation_x:
    x_test.append(x)

y_train = []
for y in y_training:
    y_train.append(y)

y_test = []
for y in evaluation_y:
    y_test.append(y)

In [None]:
#implementation of Random Forest model 
param_dist = {'n_estimators': randint(50,700),
              'max_depth': randint(1,20)}
model = RandomForestClassifier()
#find best hyperparameters by random search in the parameter distribution. cv= cross-validation
rand_search = RandomizedSearchCV(model, 
                                 param_distributions = param_dist, 
                                 n_iter=5, 
                                 cv=5)


rand_search.fit(x_train, y_train) #fit with training data


predictions = rand_search.predict(x_test) #get predictions

#evaluation of predictions
precision, recall, f1, _ = precision_recall_fscore_support(y_test, predictions, average='weighted')
accuracy = accuracy_score(y_test, predictions)

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')
print(f'Accuracy: {accuracy:.2f}')

#confusion matrix
cm = confusion_matrix(y_test, predictions)
print("Confusion Matrix:", cm)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(y_test))
disp.plot(cmap='Blues', values_format='d')
plt.show()