In [1]:
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import pickle
import logging

import utils.download_db as ddb
from utils.global_config import CONFIG

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:
logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)

# Get model

In [3]:
model_filename = r'../models/XGBClassifier.pickle'
with open(model_filename, 'rb') as bin_file:
    model = pickle.load(bin_file)

# Download database №1

In [4]:
url = "https://physionet.org/static/published-projects/afdb/mit-bih-atrial-fibrillation-database-1.0.0.zip"
name = "MIT-BIH-AtrialFibrillation"

db_path = ddb.get_db(url, name, "../../analyse/data/")

signals = ddb.get_signals(db_path, reload=False)

In [5]:
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)

windows_pd = pd.DataFrame(windows)
classification_pd = pd.DataFrame(classification)

In [6]:
windows_pd

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.002326,0.055419,0.099190,0.222344,0.923729,-0.420833,1.662562,4,0,2,...,0,2,0,0,1,4,0,0,0,0
1,-0.004695,0.038917,0.093349,0.207399,0.923729,-0.420833,1.167500,5,0,2,...,0,2,0,0,1,4,0,0,0,0
2,-0.004695,0.040613,0.091894,0.205703,0.923729,-0.420833,1.218393,5,0,3,...,0,2,0,0,1,3,0,0,0,0
3,-0.004695,0.038467,0.091732,0.203557,0.923729,-0.420833,1.154017,5,0,3,...,0,2,0,0,1,3,0,0,0,0
4,-0.004785,0.015771,0.082734,0.188534,0.923729,-0.420833,0.473120,5,0,3,...,0,2,0,1,1,3,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2294577,-0.224906,2.857417,155.126511,3.605519,61.406250,-1.000000,68.578016,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294578,-0.236220,2.727775,149.324823,3.476644,61.406250,-1.000000,68.194373,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294579,-0.213592,5.590630,328.090512,6.307107,71.166466,-1.000000,139.765746,0,0,0,...,2,0,1,0,0,2,2,1,2,5
2294580,-0.236220,5.551042,328.571580,6.346694,71.166466,-1.000000,138.776056,0,0,0,...,2,0,1,0,0,2,3,1,2,5


In [7]:
classification_pd

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
2294577,True
2294578,True
2294579,True
2294580,True


# Tests

**Random**

In [8]:
results = pd.DataFrame()

for i in range(100):
    _, X_test, _, y_test = train_test_split(windows_pd, classification_pd, train_size=0.9, random_state=i)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = model.score(X_test, y_test)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])


In [9]:
results

Unnamed: 0,score,error,f1 norm
0,0.992920,0.007080,0.966021
1,0.992664,0.007336,0.964940
2,0.992851,0.007149,0.965274
3,0.992686,0.007314,0.965696
4,0.992820,0.007180,0.965662
...,...,...,...
95,0.992881,0.007119,0.965464
96,0.992689,0.007311,0.965289
97,0.992610,0.007390,0.964836
98,0.993011,0.006989,0.965394


In [10]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("max score:\t", np.max(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.9928212883677611
min score:	 0.9924649981300349
max score:	 0.9931523838811087
mean f1 norm:	 0.9653891780068171


**Test all signals**

In [11]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [12]:
results

Unnamed: 0,score,error,f1 norm
0,0.973256,0.026744,0.520371
1,0.970877,0.029123,0.50269
2,0.958567,0.041433,0.921486
3,0.951141,0.048859,0.90738
4,0.992948,0.007052,0.870761
5,0.994795,0.005205,0.905387
6,0.972461,0.027539,0.853288
7,0.981246,0.018754,0.895025
8,0.99421,0.00579,0.995451
9,0.994446,0.005554,0.995607


In [13]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.9696345063702908
min score:	 0.8020210465095146
mean f1 norm:	 0.9087377278095665


# Download database №2

In [14]:
url = "https://physionet.org/static/published-projects/vfdb/mit-bih-malignant-ventricular-ectopy-database-1.0.0.zip"
name = "MIT-BIH-Malignant-Ventricular-Ectopy"

db_path = ddb.get_db(url, name, "../../analyse/data/")

signals = ddb.get_signals(db_path, reload=False)

In [15]:
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)

windows_pd = pd.DataFrame(windows)
classification_pd = pd.DataFrame(classification)

In [16]:
windows_pd

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.042558,0.110940,0.314108,0.389761,1.261905,-0.413043,3.328204,3,0,2,...,3,3,1,0,0,0,6,0,3,0
1,-0.021796,0.122727,0.306837,0.377974,1.261905,-0.413043,3.681810,4,0,2,...,3,3,1,0,0,0,5,0,3,0
2,-0.021796,0.082418,0.262118,0.337665,1.259259,-0.413043,2.472537,5,0,2,...,3,3,1,0,0,0,5,0,3,0
3,-0.021796,0.090877,0.257901,0.329206,1.259259,-0.413043,2.726296,6,0,2,...,3,2,1,0,0,0,5,0,3,0
4,-0.032246,0.076556,0.266664,0.343527,1.259259,-0.437037,2.296667,5,0,3,...,3,2,1,0,0,0,5,0,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137633,0.029740,0.144267,0.373776,0.458281,1.949367,-0.658747,4.328006,0,0,0,...,3,0,0,0,0,4,4,0,5,0
137634,-0.048210,0.124958,0.378138,0.456837,1.949367,-0.658747,3.748738,0,0,0,...,2,0,0,0,0,4,5,0,5,0
137635,0.029740,0.153716,0.374756,0.462206,1.949367,-0.658747,4.611483,0,0,0,...,2,0,0,0,0,4,5,0,4,0
137636,0.029740,0.152094,0.376591,0.463828,1.949367,-0.658747,4.562824,0,0,0,...,2,0,0,0,0,4,5,0,4,0


In [17]:
classification_pd

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
137633,True
137634,True
137635,True
137636,True


# Tests

**Random**

In [18]:
results = pd.DataFrame()

for i in range(100):
    _, X_test, _, y_test = train_test_split(windows_pd, classification_pd, train_size=0.9, random_state=i)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = model.score(X_test, y_test)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])


In [19]:
results

Unnamed: 0,score,error,f1 norm
0,0.750894,0.249106,0.802611
1,0.751790,0.248210,0.806508
2,0.746578,0.253422,0.798655
3,0.749945,0.250055,0.797632
4,0.752281,0.247719,0.802016
...,...,...,...
95,0.748320,0.251680,0.802578
96,0.742620,0.257380,0.795448
97,0.759919,0.240081,0.807678
98,0.748956,0.251044,0.797512


In [20]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("max score:\t", np.max(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.7473216925562811
min score:	 0.7351834235068668
max score:	 0.7600389970899004
mean f1 norm:	 0.8003927763236177


**Test all siganls**

In [21]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [22]:
results

Unnamed: 0,score,error,f1 norm
0,0.912013,0.087987,0.888492
1,0.887635,0.112365,0.863194
2,0.869938,0.130062,0.894947
3,0.811045,0.188955,0.846154
4,0.616508,0.383492,0.470699
5,0.739893,0.260107,0.820242
6,0.69471,0.30529,0.665166
7,0.76166,0.23834,0.757273
8,0.865331,0.134669,0.865536
9,0.740514,0.259486,0.324841


In [23]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.7327153050708497
min score:	 0.19112814895947425
mean f1 norm:	 0.7747217416349463


# Test all signals at once

In [24]:
signals = ddb.get_all_signals()

In [25]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [26]:
results

Unnamed: 0,score,error,f1 norm
0,0.973256,0.026744,0.520371
1,0.970877,0.029123,0.502690
2,0.958567,0.041433,0.921486
3,0.951141,0.048859,0.907380
4,0.992948,0.007052,0.870761
...,...,...,...
85,0.985348,0.014652,0.982353
86,0.730044,0.269956,0.816496
87,0.726629,0.273371,0.810855
88,1.000000,0.000000,1.000000


In [27]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.8538073412905641
min score:	 0.19112814895947425
mean f1 norm:	 0.8432188012353076
