In [4]:
import sys
import os
import numpy as np
import pandas as pd
import pickle
import logging

sys.path.append('../../analyse')
sys.path.append('../../') 

from analyse.utils.global_config import GlobalConfig
from analyse.utils.download_db import (
    get_signals,
    get_db,
)

logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)
GlobalConfig(r'../../analyse/config/params.json')

<analyse.utils.global_config.GlobalConfig at 0x125bf53a0>

In [5]:
url = "https://physionet.org/static/published-projects/afdb/mit-bih-atrial-fibrillation-database-1.0.0.zip"
name = "MIT-BIH-AtrialFibrillation"

db_path = get_db(url, name, "../../analyse/data/")

signals = get_signals(db_path, reload=False)

In [6]:
windows = []
classification = []
for sig in signals:
    for window in sig.windows:
        metrics, has_defect = window.get_data()
        windows.append(metrics)
        classification.append(has_defect)
print(len(windows))

2294582


In [37]:
# splitting dataset into training and testing data
X = pd.DataFrame(windows)
y = pd.DataFrame(classification)
# clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=974218425)

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 21)

In [38]:
X_test.iloc[:,0:-1].values

array([[ 1.85054883e-02,  5.54341216e-02,  1.28752746e-01, ...,
         2.00000000e+00,  1.00000000e+00,  3.00000000e+00],
       [-1.28792057e-02,  3.43998063e-03,  9.78299775e-03, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  5.08800265e-04,  1.10685736e-04, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       ...,
       [ 1.93325193e-02,  4.30025088e-02,  9.39711754e-02, ...,
         2.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-1.63794875e-02,  7.80042349e-02,  1.91037307e-01, ...,
         7.00000000e+00,  1.00000000e+00,  0.00000000e+00],
       [ 6.38984158e-03,  1.83597968e-04,  3.66490773e-04, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

In [18]:
len(X_test[X_test['AAA'] < 10])

227272

In [20]:
len(X_test[X_test['AAA'] < 5])

196409

**Creating Multi-layer Perceptron classifier using random hyper-params from the Internet**

In [31]:
classifier = MLPClassifier(hidden_layer_sizes=(150, 100, 50), 
                           max_iter=300, 
                           activation = 'relu',
                           solver='adam',
                           random_state=9831235)

In [32]:
MLP_model = classifier.fit(X_train, y_train.values.ravel())

In [33]:
model_filename = "../../analyse/models/MLPClassifier.pickle"

with open(model_filename, 'wb') as bin_file:
    pickle.dump(
        MLP_model,
        file=bin_file,
        protocol=pickle.HIGHEST_PROTOCOL
    )

In [34]:
# predicting with test set
y_pred = classifier.predict(X_test)

In [46]:
print("Test dataset:", MLP_model.score(X_test, y_test))
print("Train dataset:", MLP_model.score(X_train, y_train))
print("Full dataset:", MLP_model.score(X, y))

Test dataset: 0.9827594096535975
Train dataset: 0.9862099021335592
Full dataset: 0.985519802735313


In [43]:
from sklearn.metrics import classification_report
print("Classification_report:\n")
print(classification_report(y_test, y_pred))

Classification_report:

              precision    recall  f1-score   support

       False       0.98      0.99      0.98    244678
        True       0.99      0.97      0.98    214239

    accuracy                           0.98    458917
   macro avg       0.98      0.98      0.98    458917
weighted avg       0.98      0.98      0.98    458917



In [36]:
from sklearn.metrics import confusion_matrix

# function for prediction accuracy calculation
def accuracy(conf_matrix):
    diag_sum = conf_matrix.trace()
    all_sum = conf_matrix.sum()
    return diag_sum / all_sum

conf_matrix = confusion_matrix(y_pred, y_test)
print("MLPClassifier accuracy:", accuracy(conf_matrix))

MLPClassifier accuracy: 0.9827594096535975


**(WORKS TOO LONG) Searching for the best hyper-parameters:**

In [51]:
from sklearn.model_selection import GridSearchCV

parameter_space = {
    'hidden_layer_sizes': [(3,), (10,), (10,20), (50,50,50), (50,100,50)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

mlp_estimator = MLPClassifier()

In [53]:
enhanced_MLP_model = enhanced_MLP.fit(X_train, y_train.values.ravel())

Fitting 5 folds for each of 80 candidates, totalling 400 fits




KeyboardInterrupt: 

In [None]:
enhanced_model_filename = "../../analyse/models/MLPClassifier_enhanced.pickle"

with open(enhanced_model_filename, 'wb') as bin_file:
    pickle.dump(
        enhanced_MLP_model,
        file=bin_file,
        protocol=pickle.HIGHEST_PROTOCOL
    )

In [None]:
print('Best hyper-parameters found:\n', enhanced_MLP.best_params_)

In [None]:
print('Results of enhanced model on the test set:')
print(classification_report(y_test, enhanced_MLP.predict(X_test)))

**Trying another hyper-parameters:**

In [64]:
classifier = MLPClassifier(hidden_layer_sizes=(50,150,50), 
                           max_iter=300,
                           activation = 'relu',
                           solver='adam',
                           random_state=9831235)

In [65]:
MLP_model = classifier.fit(X_train, y_train.values.ravel())

In [66]:
print("Test dataset:", MLP_model.score(X_test, y_test))
print("Train dataset:", MLP_model.score(X_train, y_train))
print("Full dataset:", MLP_model.score(X, y)) 

Test dataset: 0.9803820734468325
Train dataset: 0.9835928668902005
Full dataset: 0.9829507073619509


In [None]:
model_filename = "../../analyse/models/MLPClassifier.pickle"


if MLP_model.score(X_test, y_test) > 0.9827594096535975:
    with open(model_filename, 'wb') as bin_file:
        pickle.dump(
            MLP_model,
            file=bin_file,
            protocol=pickle.HIGHEST_PROTOCOL
        )