In [1]:
import sys
import os
import numpy as np
import pandas as pd
import pickle
import logging

sys.path.append('../../analyse')
sys.path.append('../../')

from analyse.utils.global_config import GlobalConfig
from analyse.utils.download_db import (
    get_signals,
    get_db,
)

logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)
GlobalConfig(r'../../analyse/config/params.json')

<analyse.utils.global_config.GlobalConfig at 0x111000130>

In [43]:
# Downloading dataset
url = "https://physionet.org/static/published-projects/vfdb/mit-bih-malignant-ventricular-ectopy-database-1.0.0.zip"
name = "MIT-BIH-Malignant-Ventricular-Ectopy"

db_path = get_db(url, name, "../../analyse/data/")

signals = get_signals(db_path, reload=False)

In [52]:
# Preprocessing
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)
print(len(windows))

137638


In [53]:
# Splitting dataset into training and testing datasets
X = pd.DataFrame(windows)
y = pd.DataFrame(classification)

from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 21)

print(X_test.iloc[:,0:-1].values)
print(len(X_train))

[[-2.19882729e-01 -1.41107676e-03  1.39524628e+00 ...  2.00000000e+00
   1.00000000e+00  2.00000000e+00]
 [ 0.00000000e+00 -6.55222008e-04  3.98165094e-04 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [-1.68241793e-02  1.03953335e-02  7.21503758e-02 ...  0.00000000e+00
   1.00000000e+00  0.00000000e+00]
 ...
 [-7.49074178e-03  6.84665386e-02  1.94494871e-01 ...  4.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [-6.09756098e-02  3.29288199e-02  3.33371699e-01 ...  4.00000000e+00
   2.00000000e+00  3.00000000e+00]
 [-3.82072829e-02  1.83617180e-02  5.84644005e-02 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]]
110110


In [None]:
# Searching for the best hyper-parameters:
xgb_estimator = XGBClassifier()
xgb_clsf = GridSearchCV(
    estimator=xgb_estimator,
    param_grid=GlobalConfig.get("est_params"),
    verbose=4,
    n_jobs=-1,
    refit=True,
    scoring='roc_auc'
)
xgb_clsf_model = xgb_clsf.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [None]:
# Caching model
model_filename = "../../analyse/models/XGBClassifier_Malignant_Ventricular_Ectopy.pickle"

with open(model_filename, 'wb') as bin_file:
    pickle.dump(
        xgb_clsf_model,
        file=bin_file,
        protocol=pickle.HIGHEST_PROTOCOL
    )

In [51]:
import wfdb
data, info = wfdb.rdsamp(f"{db_path}/421")
data = np.array(data)

info['annotation'] = wfdb.rdann(f"{db_path}/421", 'atr')
print(info)
info['annotation'].__dict__['aux_note']

{'fs': 250, 'sig_len': 525000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['ECG', 'ECG'], 'comments': [], 'annotation': <wfdb.io.annotation.Annotation object at 0x140eac880>}


['(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(NOISE\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 '(VT\x00',
 '(N\x00',
 