In [1]:
import sys
import numpy as np
import pandas as pd
import pickle
import logging

sys.path.append('../../analyse')
sys.path.append('../../')

from analyse.utils.global_config import GlobalConfig
from analyse.utils.download_db import (
    get_signals,
    get_db,
)

logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)
GlobalConfig(r'../../analyse/config/params.json')

<analyse.utils.global_config.GlobalConfig at 0x103425cd0>

In [2]:
# Downloading datasets
url1 = "https://physionet.org/static/published-projects/vfdb/mit-bih-malignant-ventricular-ectopy-database-1.0.0.zip"
name1 = "MIT-BIH-Malignant-Ventricular-Ectopy"
url2 = "https://physionet.org/static/published-projects/afdb/mit-bih-atrial-fibrillation-database-1.0.0.zip"
name2 = "MIT-BIH-AtrialFibrillation"

db_path1 = get_db(url1, name1, "../../analyse/data/")
db_path2 = get_db(url2, name2, "../../analyse/data/")

signals1 = get_signals(db_path1, reload=False)
signals2 = get_signals(db_path2, reload=False)

In [9]:
# Splitting and preprocessing
from analyse.utils.download_db import split_preprocess_signals
X_train1, y_train1, X_test1, y_test1 = split_preprocess_signals(signals1)
X_train2, y_train2, X_test2, y_test2 = split_preprocess_signals(signals2)

In [16]:
# Uniting two datasets
X_train = pd.concat([X_train1, X_train2], axis=0)
y_train = pd.concat([y_train1, y_train2], axis=0)
X_test = pd.concat([X_test1, X_test2], axis=0)
y_test = pd.concat([y_test1, y_test2], axis=0)

In [None]:
# Training model on both datasets with searching best hyper-parameters
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV

xgb_estimator = XGBClassifier()
xgb_clsf = RandomizedSearchCV(
    xgb_estimator,
    {
        "max_depth" : range(2, 16, 1),
        "n_estimators" : range(60, 261, 40),
        "learning_rate": np.linspace(0.01, 2, 20),
        "subsample": np.linspace(0.7, 0.9, 20),
        "colsample_bytree": np.linspace(0.5, 0.98, 10),
        "min_child_weight": range(1, 9, 1)
    },
    cv=3,
    scoring='roc_auc',
    n_iter=300,
    n_jobs=-1
)

xgb_clsf_model = xgb_clsf.fit(X_train, y_train)