# Merge all the diffrent data to one big data frame

In [50]:
# Data handling and plotting
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil import parser  # More robust date parsing

# MongoDB
from pymongo import MongoClient
from bson import ObjectId


import numpy as np
import imblearn
import seaborn as sns
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from imblearn.metrics import specificity_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import confusion_matrix

import mord


In [None]:
heart_rate = pd.read_csv("csv/Heart_Rate.csv")
print(heart_rate.head())

hrv = pd.read_csv("csv/HRV.csv")
print(hrv.head())

mood = pd.read_csv("csv/Mood.csv")
print(mood.head())

sleep = pd.read_csv("csv/sleep.csv")
print(sleep.head())

steps = pd.read_csv("csv/steps.csv")
print(steps.head())

resting_heart_rate = pd.read_csv("csv/resting_heart_rate.csv")
print(resting_heart_rate.head())

In [52]:
merged = heart_rate.merge(hrv,on='date',how='outer').merge(mood,on='date',how='outer').merge(sleep,on='date',how='outer').merge(steps,on='date',how='outer').merge(resting_heart_rate,on='date',how='outer')

In [None]:
merged = merged.dropna(thresh=7) #Alle records met te veel missende features droppen
merged = merged[~merged['mood_score'].isna()] #Enkel de records waar een moodscore voor is gebruiken
merged = merged.drop(columns="date") #Date bevat geen nuttige gegevens met wat we willen onderzoeken
merged.isna().describe()


## Model training

In [54]:
def train_RandomForestModel(X_train, y_train):
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    return model

def evaluate_model(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted')
    spec = specificity_score(y_true, y_pred, average='weighted') 
    return {'accuracy': acc, 'precision': prec, 'recall': recall, 'specificity': spec}

def train_OrdinalLogisticRegrModel(X_train, y_train):
    model = mord.LogisticIT()
    model.fit(X_train, y_train)
    return model

In [55]:
X = merged.drop(columns="mood_score")
y = merged['mood_score'].astype(int)

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

rfModel = train_RandomForestModel(X_train, y_train)
y_pred = rfModel.predict(X_test)
metricsRf = evaluate_model(y_test, y_pred)

In [None]:
for x, y in metricsRf.items():
  print(x, y)

In [None]:
cm = confusion_matrix(y_test,y_pred)
plt.figure()
sns.heatmap(cm, annot=True, square = True, cmap = 'Blues_r')
plt.title(f'Accuracy: {metricsRf['accuracy']}')
plt.xlabel('Predicted')
plt.ylabel('Actual')

In [None]:
olrModel = train_OrdinalLogisticRegrModel(X_train, y_train)
y_pred = olrModel.predict(X_test)
metricsOlr = evaluate_model(y_test, y_pred)

cm = confusion_matrix(y_test,y_pred)
plt.figure()
sns.heatmap(cm, annot=True, square = True, cmap = 'Blues_r')
plt.title(f'Accuracy: {metricsOlr['accuracy']}')
plt.xlabel('Predicted')
plt.ylabel('Actual')