In [None]:
from argparse import Namespace
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import itertools
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, recall_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
import warnings

warnings.filterwarnings('ignore')

In [None]:
DATA_DIR_BASELINE = os.path.join('dataset_entropy_measures_all_events.csv')
dataset_entropy = pd.read_csv(DATA_DIR_BASELINE)
dataset_entropy

In [None]:
# pre-processing dataset
dataset_all_events = pd.DataFrame(dataset_entropy, columns=dataset_entropy.columns[1:])
dataset_all_events_positive = dataset_all_events[(dataset_all_events.disturbed_sleep_pattern == 1) | (dataset_all_events.agitation_irritability_aggression == 1) | (dataset_all_events.depressed_anxiety == 1)| (dataset_all_events.accidental_fall == 1)| (dataset_all_events.motor_function_behavior == 1)| (dataset_all_events.period_of_confusion == 1)| (dataset_all_events.hospital == 1)| (dataset_all_events.uti == 1)]
dataset_all_events_positive['label']=1
dataset_all_events = pd.merge(dataset_all_events, dataset_all_events_positive, how='left')
dataset_all_events.label = dataset_all_events.label.fillna(0.5)
dataset_all_events = pd.DataFrame(dataset_all_events, columns=['patient_id','day_date','entropy_daytime','entropy_night','entropy_rate_mk_daytime','entropy_rate_mk_night','entropy_production_daytime', 'entropy_production_night','entropy_vn_frequency_daytime','entropy_vn_frequency_night','entropy_vn_duration_daytime','entropy_vn_duration_night','duration_difference_daytime','duration_difference_night','label'])
dataset_all_events = dataset_all_events.dropna()
dataset_all_events

In [None]:
dataset_all_events.label.value_counts().min()

In [None]:
train_recall_svm = []
test_recall_svm= []
train_f1_svm = []
test_f1_svm = []

for i in range(30):

    # # balance negative and positive lables
    dataset_num = dataset_all_events.label.value_counts().min()
    dataset_symptoms_negative = dataset_all_events[dataset_all_events['label']==0.5].sample(dataset_num)
    dataset_symptoms_positive = dataset_all_events[dataset_all_events['label']==1.0].sample(dataset_num)
    dataset_symptoms = pd.concat([dataset_symptoms_negative, dataset_symptoms_positive])

    dataset_symptoms = pd.DataFrame(dataset_symptoms, columns=['entropy_daytime','entropy_night','entropy_rate_mk_daytime','entropy_rate_mk_night','entropy_production_daytime', 'entropy_production_night','entropy_vn_frequency_daytime','entropy_vn_frequency_night','entropy_vn_duration_daytime','entropy_vn_duration_night','duration_difference_daytime','duration_difference_night','label'])
    # optimise the format of the labels
    mapping = {0.5:0, 1.0:1}
    dataset_symptoms['label'] = dataset_symptoms['label'].map(mapping)

    X = np.array(dataset_symptoms.iloc[:, :-1])
    y = np.array(dataset_symptoms.iloc[:, -1])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

    # normalisation
    X_scaler = StandardScaler().fit(X_train)
    standardized_X_train = X_scaler.transform(X_train)
    standardized_X_test = X_scaler.transform(X_test)

    linear_svc = SVC(kernel='linear', max_iter=-1, C = 0.00001).fit(X_train, y_train)

    training = linear_svc.predict(X_train)
    testing=linear_svc.predict(X_test)

    test_f1_svm.append(f1_score(testing,y_test))
    test_recall_svm.append(recall_score(testing,y_test))

    train_f1_svm.append(f1_score(training,y_train))
    train_recall_svm.append(recall_score(training,y_train))

test_recall_svm = pd.DataFrame(test_recall_svm, columns=['test_recall_svm'])
test_f1_svm = pd.DataFrame(test_f1_svm, columns=['test_f1_svm'])

train_recall_svm = pd.DataFrame(train_recall_svm, columns=['train_recall_svm'])
train_f1_svm = pd.DataFrame(train_f1_svm, columns=['train_f1_svm'])

svm_result = pd.concat([test_recall_svm, test_f1_svm, train_recall_svm, train_f1_svm],axis=1)
svm_result.columns = ['recall_bs_ts', 'F1_bs_ts','recall_bs_tr','F1_bs_tr']
svm_result.boxplot()
plt.ylim(0,1) 

In [None]:
# svm_result.to_csv('evaluation_entropy_SVM.csv')