In [1]:
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import pickle
import logging

import utils.download_db as ddb
from utils.global_config import CONFIG

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:
logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)

# Get model

In [3]:
model_filename = r'../models/XGBClassifier.pickle'
with open(model_filename, 'rb') as bin_file:
    model = pickle.load(bin_file)

# Download database №1

In [4]:
url = "https://physionet.org/static/published-projects/afdb/mit-bih-atrial-fibrillation-database-1.0.0.zip"
name = "MIT-BIH-AtrialFibrillation"

db_path = ddb.get_db(url, name, "../../analyse/data/")

signals = ddb.get_signals(db_path, reload=False)

In [5]:
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)

windows_pd = pd.DataFrame(windows)
classification_pd = pd.DataFrame(classification)

In [6]:
windows_pd

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.002326,0.055419,0.099190,0.222344,0.923729,-0.420833,1.662562,4,0,2,...,0,2,0,0,1,4,0,0,0,0
1,-0.004695,0.038917,0.093349,0.207399,0.923729,-0.420833,1.167500,5,0,2,...,0,2,0,0,1,4,0,0,0,0
2,-0.004695,0.040613,0.091894,0.205703,0.923729,-0.420833,1.218393,5,0,3,...,0,2,0,0,1,3,0,0,0,0
3,-0.004695,0.038467,0.091732,0.203557,0.923729,-0.420833,1.154017,5,0,3,...,0,2,0,0,1,3,0,0,0,0
4,-0.004785,0.015771,0.082734,0.188534,0.923729,-0.420833,0.473120,5,0,3,...,0,2,0,1,1,3,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2294577,-0.224906,2.857417,155.126511,3.605519,61.406250,-1.000000,68.578016,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294578,-0.236220,2.727775,149.324823,3.476644,61.406250,-1.000000,68.194373,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294579,-0.213592,5.590630,328.090512,6.307107,71.166466,-1.000000,139.765746,0,0,0,...,2,0,1,0,0,2,2,1,2,5
2294580,-0.236220,5.551042,328.571580,6.346694,71.166466,-1.000000,138.776056,0,0,0,...,2,0,1,0,0,2,3,1,2,5


In [7]:
classification_pd

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
2294577,True
2294578,True
2294579,True
2294580,True


# Tests

**Random**

In [8]:
results = pd.DataFrame()

for i in range(100):
    _, X_test, _, y_test = train_test_split(windows_pd, classification_pd, train_size=0.9, random_state=i)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = model.score(X_test, y_test)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])


In [9]:
results

Unnamed: 0,score,error,f1 norm
0,0.994738,0.005262,0.971831
1,0.994680,0.005320,0.972002
2,0.994557,0.005443,0.971998
3,0.994672,0.005328,0.971725
4,0.994852,0.005148,0.972142
...,...,...,...
95,0.994558,0.005442,0.971667
96,0.994665,0.005335,0.971674
97,0.994656,0.005344,0.971618
98,0.994747,0.005253,0.971820


In [10]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("max score:\t", np.max(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.9947382043820501
min score:	 0.9944240244192761
max score:	 0.9949863977996444
mean f1 norm:	 0.9718269331788788


**Test all signals**

In [11]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [12]:
results

Unnamed: 0,score,error,f1 norm
0,0.915383,0.084617,0.269798
1,0.967613,0.032387,0.484414
2,0.970116,0.029884,0.943905
3,0.970034,0.029966,0.943865
4,0.990872,0.009128,0.84395
5,0.987275,0.012725,0.795026
6,0.984645,0.015355,0.913485
7,0.952967,0.047033,0.772162
8,0.998173,0.001827,0.998565
9,0.996304,0.003696,0.99708


In [13]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.9744991503414784
min score:	 0.8858799367514222
mean f1 norm:	 0.8939142754567536


# Download database №2

In [14]:
url = "https://physionet.org/static/published-projects/vfdb/mit-bih-malignant-ventricular-ectopy-database-1.0.0.zip"
name = "MIT-BIH-Malignant-Ventricular-Ectopy"

db_path = ddb.get_db(url, name, "../../analyse/data/")

signals = ddb.get_signals(db_path, reload=False)

In [15]:
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)

windows_pd = pd.DataFrame(windows)
classification_pd = pd.DataFrame(classification)

In [16]:
windows_pd

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.042558,0.110940,0.314108,0.389761,1.261905,-0.413043,3.328204,3,0,2,...,3,3,1,0,0,0,6,0,3,0
1,-0.021796,0.122727,0.306837,0.377974,1.261905,-0.413043,3.681810,4,0,2,...,3,3,1,0,0,0,5,0,3,0
2,-0.021796,0.082418,0.262118,0.337665,1.259259,-0.413043,2.472537,5,0,2,...,3,3,1,0,0,0,5,0,3,0
3,-0.021796,0.090877,0.257901,0.329206,1.259259,-0.413043,2.726296,6,0,2,...,3,2,1,0,0,0,5,0,3,0
4,-0.032246,0.076556,0.266664,0.343527,1.259259,-0.437037,2.296667,5,0,3,...,3,2,1,0,0,0,5,0,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137633,0.029740,0.144267,0.373776,0.458281,1.949367,-0.658747,4.328006,0,0,0,...,3,0,0,0,0,4,4,0,5,0
137634,-0.048210,0.124958,0.378138,0.456837,1.949367,-0.658747,3.748738,0,0,0,...,2,0,0,0,0,4,5,0,5,0
137635,0.029740,0.153716,0.374756,0.462206,1.949367,-0.658747,4.611483,0,0,0,...,2,0,0,0,0,4,5,0,4,0
137636,0.029740,0.152094,0.376591,0.463828,1.949367,-0.658747,4.562824,0,0,0,...,2,0,0,0,0,4,5,0,4,0


In [17]:
classification_pd

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
137633,True
137634,True
137635,True
137636,True


# Tests

**Random**

In [18]:
results = pd.DataFrame()

for i in range(100):
    _, X_test, _, y_test = train_test_split(windows_pd, classification_pd, train_size=0.9, random_state=i)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = model.score(X_test, y_test)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])


In [19]:
results

Unnamed: 0,score,error,f1 norm
0,0.852510,0.147490,0.818492
1,0.857414,0.142586,0.825419
2,0.855297,0.144703,0.816775
3,0.856443,0.143557,0.819519
4,0.861201,0.138799,0.822790
...,...,...,...
95,0.852380,0.147620,0.816847
96,0.855379,0.144621,0.821362
97,0.859072,0.140928,0.823925
98,0.851478,0.148522,0.817229


In [20]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("max score:\t", np.max(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.8542222350922373
min score:	 0.8474071445514679
max score:	 0.8612011425795518
mean f1 norm:	 0.8190517790588241


**Test all siganls**

In [21]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [22]:
results

Unnamed: 0,score,error,f1 norm
0,0.935242,0.064758,0.918824
1,0.844382,0.155618,0.81662
2,0.877207,0.122793,0.902615
3,0.779774,0.220226,0.826247
4,0.737092,0.262908,0.665514
5,0.781846,0.218154,0.852425
6,0.796854,0.203146,0.797546
7,0.714573,0.285427,0.701934
8,0.898808,0.101192,0.901506
9,0.76173,0.23827,0.427451


In [23]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.762990907449632
min score:	 0.13355953115360888
mean f1 norm:	 0.793689692438507


# Test all signals at once

In [24]:
signals = ddb.get_all_signals()

In [25]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [26]:
results

Unnamed: 0,score,error,f1 norm
0,0.915383,0.084617,0.269798
1,0.967613,0.032387,0.484414
2,0.970116,0.029884,0.943905
3,0.970034,0.029966,0.943865
4,0.990872,0.009128,0.843950
...,...,...,...
85,0.943427,0.056573,0.935138
86,0.829318,0.170682,0.883101
87,0.869950,0.130050,0.909016
88,1.000000,0.000000,1.000000


In [27]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.8710951204832423
min score:	 0.13355953115360888
mean f1 norm:	 0.8449155904256107
