In [1]:
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import pickle
import logging

import utils.download_db as ddb
from utils.global_config import CONFIG

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:
logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)

# Get model

In [3]:
model_filename = r'../models/RFClassifier.pickle'
with open(model_filename, 'rb') as bin_file:
    model = pickle.load(bin_file)

# Download database №1

In [4]:
url = "https://physionet.org/static/published-projects/afdb/mit-bih-atrial-fibrillation-database-1.0.0.zip"
name = "MIT-BIH-AtrialFibrillation"

db_path = ddb.get_db(url, name, "../../analyse/data/")

signals = ddb.get_signals(db_path, reload=False)

In [5]:
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)

windows_pd = pd.DataFrame(windows)
classification_pd = pd.DataFrame(classification)

In [6]:
windows_pd

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.002326,0.055419,0.099190,0.222344,0.923729,-0.420833,1.662562,4,0,2,...,0,2,0,0,1,4,0,0,0,0
1,-0.004695,0.038917,0.093349,0.207399,0.923729,-0.420833,1.167500,5,0,2,...,0,2,0,0,1,4,0,0,0,0
2,-0.004695,0.040613,0.091894,0.205703,0.923729,-0.420833,1.218393,5,0,3,...,0,2,0,0,1,3,0,0,0,0
3,-0.004695,0.038467,0.091732,0.203557,0.923729,-0.420833,1.154017,5,0,3,...,0,2,0,0,1,3,0,0,0,0
4,-0.004785,0.015771,0.082734,0.188534,0.923729,-0.420833,0.473120,5,0,3,...,0,2,0,1,1,3,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2294577,-0.224906,2.857417,155.126511,3.605519,61.406250,-1.000000,68.578016,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294578,-0.236220,2.727775,149.324823,3.476644,61.406250,-1.000000,68.194373,0,0,0,...,2,0,1,1,0,2,2,1,2,5
2294579,-0.213592,5.590630,328.090512,6.307107,71.166466,-1.000000,139.765746,0,0,0,...,2,0,1,0,0,2,2,1,2,5
2294580,-0.236220,5.551042,328.571580,6.346694,71.166466,-1.000000,138.776056,0,0,0,...,2,0,1,0,0,2,3,1,2,5


In [7]:
classification_pd

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
2294577,True
2294578,True
2294579,True
2294580,True


# Tests

**Random**

In [8]:
results = pd.DataFrame()

for i in range(100):
    _, X_test, _, y_test = train_test_split(windows_pd, classification_pd, train_size=0.9, random_state=i)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = model.score(X_test, y_test)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])


In [9]:
results

Unnamed: 0,score,error,f1 norm
0,0.986668,0.013332,0.953886
1,0.986621,0.013379,0.953159
2,0.986623,0.013377,0.953450
3,0.986487,0.013513,0.953509
4,0.986710,0.013290,0.953405
...,...,...,...
95,0.986625,0.013375,0.952847
96,0.986508,0.013492,0.953097
97,0.986310,0.013690,0.952542
98,0.986768,0.013232,0.953500


In [10]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("max score:\t", np.max(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.9866175007773598
min score:	 0.9861429172211156
max score:	 0.9870441299999746
mean f1 norm:	 0.9532576514229788


**Test all signals**

In [11]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [12]:
results

Unnamed: 0,score,error,f1 norm
0,0.928788,0.071212,0.291406
1,0.938198,0.061802,0.325639
2,0.942716,0.057284,0.889768
3,0.942639,0.057361,0.890051
4,0.992323,0.007677,0.855936
5,0.990338,0.009662,0.82554
6,0.944603,0.055397,0.742415
7,0.9391,0.0609,0.722986
8,0.994374,0.005626,0.995572
9,0.994609,0.005391,0.995729


In [13]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.958611395338953
min score:	 0.7969320804783628
mean f1 norm:	 0.8557799651971602


# Download database №2

In [14]:
url = "https://physionet.org/static/published-projects/vfdb/mit-bih-malignant-ventricular-ectopy-database-1.0.0.zip"
name = "MIT-BIH-Malignant-Ventricular-Ectopy"

db_path = ddb.get_db(url, name, "../../analyse/data/")

signals = ddb.get_signals(db_path, reload=False)

In [15]:
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)

windows_pd = pd.DataFrame(windows)
classification_pd = pd.DataFrame(classification)

In [16]:
windows_pd

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.042558,0.110940,0.314108,0.389761,1.261905,-0.413043,3.328204,3,0,2,...,3,3,1,0,0,0,6,0,3,0
1,-0.021796,0.122727,0.306837,0.377974,1.261905,-0.413043,3.681810,4,0,2,...,3,3,1,0,0,0,5,0,3,0
2,-0.021796,0.082418,0.262118,0.337665,1.259259,-0.413043,2.472537,5,0,2,...,3,3,1,0,0,0,5,0,3,0
3,-0.021796,0.090877,0.257901,0.329206,1.259259,-0.413043,2.726296,6,0,2,...,3,2,1,0,0,0,5,0,3,0
4,-0.032246,0.076556,0.266664,0.343527,1.259259,-0.437037,2.296667,5,0,3,...,3,2,1,0,0,0,5,0,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137633,0.029740,0.144267,0.373776,0.458281,1.949367,-0.658747,4.328006,0,0,0,...,3,0,0,0,0,4,4,0,5,0
137634,-0.048210,0.124958,0.378138,0.456837,1.949367,-0.658747,3.748738,0,0,0,...,2,0,0,0,0,4,5,0,5,0
137635,0.029740,0.153716,0.374756,0.462206,1.949367,-0.658747,4.611483,0,0,0,...,2,0,0,0,0,4,5,0,4,0
137636,0.029740,0.152094,0.376591,0.463828,1.949367,-0.658747,4.562824,0,0,0,...,2,0,0,0,0,4,5,0,4,0


In [17]:
classification_pd

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
137633,True
137634,True
137635,True
137636,True


# Tests

**Random**

In [18]:
results = pd.DataFrame()

for i in range(100):
    _, X_test, _, y_test = train_test_split(windows_pd, classification_pd, train_size=0.9, random_state=i)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = model.score(X_test, y_test)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])


In [19]:
results

Unnamed: 0,score,error,f1 norm
0,0.690940,0.309060,0.757051
1,0.691449,0.308551,0.761166
2,0.686737,0.313263,0.752904
3,0.687367,0.312633,0.755533
4,0.691401,0.308599,0.757090
...,...,...,...
95,0.682444,0.317556,0.752748
96,0.677773,0.322227,0.750295
97,0.696484,0.303516,0.762852
98,0.682656,0.317344,0.750085


In [20]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("max score:\t", np.max(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.6846244652800024
min score:	 0.6717668834654618
max score:	 0.6977442078066911
mean f1 norm:	 0.7550256692753814


**Test all siganls**

In [21]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [22]:
results

Unnamed: 0,score,error,f1 norm
0,0.770296,0.229704,0.731265
1,0.763987,0.236013,0.724327
2,0.803219,0.196781,0.847362
3,0.79674,0.20326,0.835709
4,0.629416,0.370584,0.462826
5,0.619375,0.380625,0.734997
6,0.703374,0.296626,0.673689
7,0.701629,0.298371,0.675879
8,0.872179,0.127821,0.872276
9,0.754794,0.245206,0.386108


In [23]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.6808468080273133
min score:	 0.18072289156626506
mean f1 norm:	 0.7244945748641206


# Test all signals at once

In [24]:
signals = ddb.get_all_signals()

In [25]:
results = pd.DataFrame()

for i, signal in enumerate(signals):
    X_test, y_test = signal.get_data()
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    result = accuracy_score(y_test, y_pred)
    df = pd.DataFrame({"score" : result, "error" : 1 - result, "f1 norm" : f1}, index=[i])
    results = pd.concat([results, df])

In [26]:
results

Unnamed: 0,score,error,f1 norm
0,0.928788,0.071212,0.291406
1,0.938198,0.061802,0.325639
2,0.942716,0.057284,0.889768
3,0.942639,0.057361,0.890051
4,0.992323,0.007677,0.855936
...,...,...,...
85,0.920228,0.079772,0.910909
86,0.789840,0.210160,0.857143
87,0.782955,0.217045,0.850589
88,0.988439,0.011561,0.994186


In [27]:
print("mean score:\t", np.mean(results["score"]))
print("min score:\t", np.min(results["score"]))
print("mean f1 norm:\t", np.mean(results["f1 norm"]))

mean score:	 0.822815374875485
min score:	 0.18072289156626506
mean f1 norm:	 0.7915959965898963
