In [2]:
import sys
sys.path.append('../')

import numpy as np
import pickle
import logging

import utils.download_db as ddb
from utils.global_config import CONFIG

In [3]:
logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)

# Download data

In [4]:
url = "https://physionet.org/static/published-projects/afdb/mit-bih-atrial-fibrillation-database-1.0.0.zip"
name = "MIT-BIH-AtrialFibrillation"

db_path = ddb.get_db(url, name, "../../analyse/data/")

signals = ddb.get_signals(db_path, reload=False)

In [7]:
import pandas as pd

windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)

windows_pd = pd.DataFrame(windows)
classification_pd = pd.DataFrame(classification)

In [8]:
windows_pd

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.002326,0.055419,0.099190,0.222344,0.923729,-0.420833,1.662562,4,0,2,...,0,2,0,0,1,4,0,0,0,0
1,-0.004695,0.038917,0.093349,0.207399,0.923729,-0.420833,1.167500,5,0,2,...,0,2,0,0,1,4,0,0,0,0
2,-0.004695,0.040613,0.091894,0.205703,0.923729,-0.420833,1.218393,5,0,3,...,0,2,0,0,1,3,0,0,0,0
3,-0.004695,0.038467,0.091732,0.203557,0.923729,-0.420833,1.154017,5,0,3,...,0,2,0,0,1,3,0,0,0,0
4,-0.004785,0.015771,0.082734,0.188534,0.923729,-0.420833,0.473120,5,0,3,...,0,2,0,1,1,3,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2294577,-0.224906,2.857417,155.126511,3.605519,61.406250,-1.000000,68.578016,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294578,-0.236220,2.727775,149.324823,3.476644,61.406250,-1.000000,68.194373,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294579,-0.213592,5.590630,328.090512,6.307107,71.166466,-1.000000,139.765746,0,0,0,...,2,0,1,0,0,2,2,1,2,5
2294580,-0.236220,5.551042,328.571580,6.346694,71.166466,-1.000000,138.776056,0,0,0,...,2,0,1,0,0,2,3,1,2,5


In [9]:
classification_pd

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
2294577,True
2294578,True
2294579,True
2294580,True


# Get model

In [18]:
model_filename = r'../models/XGBClassifier.pickle'
with open(model_filename, 'rb') as bin_file:
    model = pickle.load(bin_file)

# Tests

In [38]:
from sklearn.model_selection import train_test_split


results = pd.DataFrame()

for i in range(100):
    _, X_test, _, y_test = train_test_split(windows_pd, classification_pd, train_size=0.9, random_state=i)
    result = model.score(X_test, y_test)
    df = pd.DataFrame({"score" : result, "error" : 1 - result}, index=[i])
    results = pd.concat([results, df])


In [39]:
results

Unnamed: 0,score,error
0,0.999961,0.000039
1,0.999979,0.000021
2,0.999981,0.000019
3,0.999984,0.000016
4,0.999969,0.000031
...,...,...
95,0.999984,0.000016
96,0.999965,0.000035
97,0.999978,0.000022
98,0.999982,0.000018


In [42]:
print("mean:\t", np.mean(results["score"]))
print("min:\t", np.min(results["score"]))
print("max:\t", np.max(results["score"]))

mean:	 0.9999781036165614
min:	 0.9998704701316643
max:	 0.9999903954633762
