In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import csv


def plot_confusion_matrix(actual_val, pred_val, title=None):
    confusion_matrix = pd.crosstab(actual_val, pred_val,
                                   rownames=['Actual'],
                                   colnames=['Predicted'])
    plot = sns.heatmap(confusion_matrix, annot=True, fmt=',.0f')
    if title is None:
        pass
    else:
        plot.set_title(title)
        
    plt.show()
    return confusion_matrix

n_stage = 2


In [None]:
CM = np.zeros([n_stage,n_stage])
acc_list = []
macroF1_list = []
DICE_list = []

# read training data and labels
X = np.loadtxt(open("./params_v2/features_DaLiA_train.csv", "rb"), delimiter=",")
X = list(X)
X = np.array(X).astype("float")

y = np.loadtxt(open("./params_v2/labels_DaLiA_train.csv", "rb"), delimiter=",")
y = list(y)
y = np.array(y).astype("int64")

In [None]:
CM = np.zeros([n_stage,n_stage])
acc_list = []
macroF1_list = []

# db = "TROIKA"
# db = "WESAD"
db = 'DaLiA_test'

X_train = X[:, 0:9]
y_train = y

X_test = np.array(list(np.loadtxt(open("./params_v2/features_"+db+".csv"), delimiter=","))).astype("float")
X_test = X_test[:, 0:9]
y_test = np.array(list(np.loadtxt(open("./params_v2/labels_"+db+".csv"), delimiter=","))).astype("int64")

# # Specifying the parameter
# model = lgb.LGBMClassifier(objective = 'binary', \
#                            num_leaves = 20, \
#                            num_iterations = 100, \
#                            metric = 'binary_logloss', \
#                            learning_rate = 0.15)

model = lgb.LGBMClassifier(objective = 'binary',\
                           num_leaves = 7, \
                           num_iterations = 100, \
                           metric = 'binary_logloss', \
                           learning_rate = 0.1)

model.fit(X_train, y_train)
pred = model.predict(X_test)

accuracy = accuracy_score(y_test, pred)
macroF1 = 0.5 * ( f1_score(y_test, pred, pos_label=1) + f1_score(y_test, pred, pos_label=0) )

cm = confusion_matrix(y_test, pred, labels=[1,0])
TP = cm[0,0]    # artefact
TN = cm[1,1]    # no artefact
FP = cm[1,0]
FN = cm[0,1]

TPR = TP / (TP + FN)
TNR = TN / (TN + FP)
PPV = TP / (TP + FP)
NPV = cm[1,1]/sum(cm[:,1])
F1p = 2 * TPR * PPV / (TPR + PPV)
F1n =  f1_score(y_test, pred, pos_label=0)

if not (macroF1 == 0.5 * (F1p + F1n)):
    print("err\n")

DICE = 2*TP / (2*TP + FP + FN)

pfm = [TPR, PPV, TNR, NPV, F1p, F1n, macroF1, accuracy, DICE]
# performance.append(pfm)

print("fold %d, training_size = %d, testing_size = %d:" % (i+1, np.shape(X_train)[0], np.shape(y_test)[0]))
prt = np.around(np.array(pfm), decimals=4)
for pp in range(len(prt)):
    print("%s = %.4f" % (pfm_name[pp], prt[pp]))

stat = np.zeros([3, n_stage])
for j in range(n_stage):
    stat[0,j] = cm[j,j]/sum(cm[:,j])
    stat[1,j] = cm[j,j]/sum(cm[j,:])
    stat[2,j] = 2*stat[0,j]*stat[1,j]/(stat[0,j]+stat[1,j])    

print('\n')
print(cm)

with open('./params_v2/pred_'+db+'.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(pred)
    