In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd "/content/gdrive/MyDrive/MIT_BIH_ECG"

Mounted at /content/gdrive
/content/gdrive/MyDrive/MIT_BIH_ECG


### Imports

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import csv
import pywt
from scipy import stats

%matplotlib inline

### Variables Definitions

In [None]:
path = './mitbih_database/'
output_loc = "./ECGs_10_classes.hdf5"

classes = ['N', 'L', 'R', 'A', 'V', '/', 'f', 'F', '!', 'j']
# classes = ['N', 'L', 'R', 'A', 'V']
n_classes = len(classes)        # here is 5
count_classes = [0]*n_classes
print(count_classes)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


### Prepere Input Files

In [None]:
# Read files
# The return values of os.walk() is a tupple with three elements.
# All filenames in the "path" are contained in the third field.
# The build-in function is used to return the next item in the iterater.
filenames = next(os.walk(path))[2]

# Split and save .csv , .txt
records = list()
annotations = list()
filenames.sort()


In [None]:
# segrefating filenames and annotations
for f in filenames:
    filename, file_extension = os.path.splitext(f)

    # *.csv
    if(file_extension == '.csv'):
        records.append(path + filename + file_extension)

    # *.txt
    else:
        annotations.append(path + filename + file_extension)

### Data Extraction and Preprocessing

In [None]:
window1_size = 90
window2_size = 166
X = list()
y = list()

# Records
for r in range(0, len(records)):
    signals = []

    with open(records[r], 'rt') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') # read CSV file\
        row_index = -1
        for row in spamreader:
            if(row_index >= 0):
                signals.insert(row_index, int(row[1]))
            row_index += 1

    # signals = stats.zscore(signals)

    # Read anotations: R position and Arrhythmia class
    example_beat_printed = False
    with open(annotations[r], 'r') as fileID:
        data = fileID.readlines()
        beat = list()

        for d in range(1, len(data)): # 0 index is Chart Head
            splitted = data[d].split(' ')
            splitted = filter(None, splitted)
            next(splitted) # Time... Clipping
            pos = int(next(splitted)) # Sample ID
            arrhythmia_type = next(splitted) # Type
            if (window1_size <= pos and pos < (len(signals) - window2_size)):
                if (arrhythmia_type in classes):
                    arrhythmia_index = classes.index(arrhythmia_type)
                    count_classes[arrhythmia_index] += 1
                    beat = signals[pos-window1_size:pos+window2_size]     ## REPLACE WITH R-PEAK DETECTION
                    X.append(beat)
                    y.append(arrhythmia_index)

# data shape
print(np.shape(X), np.shape(y))

(109534, 256) (109534,)


In [None]:
df = pd.DataFrame(y[:])
df[:] = df[:].astype(int)
df.to_csv("targets.csv", index=False)
np.savetxt("./ECG/targets.csv", df, delimiter=',')

In [None]:
pip install pyts

In [None]:
df = pd.DataFrame(X[:])
df = df.T
df.to_csv("ECG_signals.csv", index=False)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,109524,109525,109526,109527,109528,109529,109530,109531,109532,109533
0,963,958,949,955,956,958,965,955,953,957,...,970,984,970,968,972,986,977,969,977,987
1,962,955,947,955,956,958,967,953,956,960,...,973,981,969,971,971,986,977,970,979,986
2,964,955,953,954,956,959,968,952,955,959,...,975,980,975,969,970,985,977,970,983,987
3,963,953,952,953,959,961,969,952,953,961,...,973,978,979,966,971,982,978,971,982,988
4,966,954,953,955,959,961,968,951,953,960,...,972,980,981,965,970,980,978,971,981,988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,959,954,958,957,961,971,951,960,958,962,...,991,975,966,975,987,985,970,978,987,967
252,957,956,955,959,961,971,950,960,962,960,...,990,975,968,974,988,983,969,977,985,966
253,956,953,956,958,959,968,955,957,961,961,...,989,975,970,971,987,981,970,979,987,962
254,957,954,957,961,960,971,956,957,959,961,...,984,972,971,970,986,979,970,976,986,961


In [None]:
import cv2

IMG_HEIGHT = 32
IMG_WIDTH = 32
data1 = []

for i in range(10):
    img_folder = f'./ECG/Difference/images{i+1}'
    img_data_array=[]
    for file in os.listdir(os.path.join(img_folder)):

        image_path= os.path.join(img_folder, file)
        image= cv2.imread( image_path, cv2.COLOR_BGR2RGB)
        image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
        image=np.array(image)
        image = image.astype('float16')
        image /= 255
        img_data_array.append(image)
    data1.append(img_data_array)
    print(i)
print(len(data1))

0
1
2
3
4
5
6
7
8
9
10


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split

X_trains1 = []
X_tests1 = []
y_trains1 = []
y_tests1 = []
label = []
for i in range(10):
    temp = [i for j in range(y.count(i))]
    label.append(temp)
temp = list(data1[0])
X_trains1, X_tests1, y_trains1, y_tests1 = train_test_split(temp, label[0], test_size=0.2, random_state=1)

for i in range(1, 10):
    temp = list(data1[i])
    X_train, X_test, y_train, y_test = train_test_split(temp, label[i], test_size=0.2, random_state=1)

    # Merge inputs and targets
    X_trains1 = np.concatenate((X_trains1, X_train), axis=0)
    X_tests1 = np.concatenate((X_tests1, X_test), axis=0)
    y_trains1 = np.concatenate((y_trains1, y_train), axis=0)
    y_tests1 = np.concatenate((y_tests1, y_test), axis=0)

print(len(X_trains1), len(y_trains1))


87623 87623


In [None]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
import keras
import numpy as np

# Model configuration
batch_size = 32
input_shape = (32, 32, 3)
loss_function = sparse_categorical_crossentropy
no_classes = 10
no_epochs = 10
optimizer = Adam()
verbosity = 1
num_folds = 10


# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []

# Merge inputs and targets
inputs1 = np.concatenate((X_trains1, X_tests1), axis=0)
targets1 = np.concatenate((y_trains1, y_tests1), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True)

# Define the model architecture
model1 = Sequential()
model1.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Flatten())
model1.add(Dense(256, activation='relu'))
model1.add(Dense(128, activation='relu'))
model1.add(Dense(no_classes, activation='softmax'))
print(model1.summary())

# Compile the model
model1.compile(loss=loss_function,
            optimizer=optimizer,
            metrics=['accuracy'])
print('Finish!')

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 15, 15, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 6, 6, 64)         0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 2304)              0         
                                                                 
 dense_6 (Dense)             (None, 256)              

In [None]:
import tensorflow as tf
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(inputs1, targets1):

    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history1 = model1.fit(inputs1[train], targets1[train],
              batch_size=batch_size,
              epochs=no_epochs,
              verbose=verbosity,
              callbacks=[callback])

    # Generate generalization metrics
    scores1 = model1.evaluate(inputs1[test], targets1[test], verbose=0)
    print(f'Score for fold {fold_no}: {model1.metrics_names[0]} of {scores1[0]}; {model1.metrics_names[1]} of {scores1[1]*100}%')
    acc_per_fold.append(scores1[1] * 100)
    loss_per_fold.append(scores1[0])

    # Increase fold number
    fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 1: loss of 0.05199152231216431; accuracy of 98.62150549888611%
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 2: loss of 0.031159762293100357; accuracy of 99.2422878742218%
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 3: loss of 0.020790966227650642; accuracy of 99.40661191940308%
------------------------------------------------------------------------
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [None]:
import cv2

IMG_HEIGHT = 32
IMG_WIDTH = 32
data = []

for i in range(10):
    img_folder = f'./ECG/Summation/images{i+1}'
    img_data_array=[]
    for file in os.listdir(os.path.join(img_folder)):
        image_path= os.path.join(img_folder, file)
        image= cv2.imread( image_path, cv2.COLOR_BGR2RGB)
        image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
        image=np.array(image)
        image = image.astype('float16')
        image /= 255
        img_data_array.append(image)
    data.append(img_data_array)
    print(i)
print(len(data))

0
1
2
3
4
5
6
7
8
9
10


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split

X_trains = []
X_tests = []
y_trains = []
y_tests = []
label = []
for i in range(10):
    temp = [i for j in range(y.count(i))]
    label.append(temp)
temp = list(data[0])
X_trains, X_tests, y_trains, y_tests = train_test_split(temp, label[0], test_size=0.2, random_state=1)


for i in range(1, 10):
    temp = list(data[i])
    X_train, X_test, y_train, y_test = train_test_split(temp, label[i], test_size=0.2, random_state=1)

    # Merge inputs and targets
    X_trains = np.concatenate((X_trains, X_train), axis=0)
    X_tests = np.concatenate((X_tests, X_test), axis=0)
    y_trains = np.concatenate((y_trains, y_train), axis=0)
    y_tests = np.concatenate((y_tests, y_test), axis=0)

print(len(X_trains), len(y_trains))


87623 87623


In [None]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
import keras
import numpy as np

# Model configuration
batch_size = 32
input_shape = (32, 32, 3)
loss_function = sparse_categorical_crossentropy
no_classes = 10
no_epochs = 10
optimizer = Adam()
verbosity = 1
num_folds = 10


# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []

# Merge inputs and targets
inputs = np.concatenate((X_trains, X_tests), axis=0)
targets = np.concatenate((y_trains, y_tests), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True)

# Define the model architecture
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(no_classes, activation='softmax'))
print(model.summary())

# Compile the model
model.compile(loss=loss_function,
            optimizer=optimizer,
            metrics=['accuracy'])
print('Finish!')

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_20 (Conv2D)          (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d_20 (MaxPoolin  (None, 15, 15, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_21 (Conv2D)          (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_21 (MaxPoolin  (None, 6, 6, 64)         0         
 g2D)                                                            
                                                                 
 flatten_10 (Flatten)        (None, 2304)              0         
                                                                 
 dense_30 (Dense)            (None, 256)             

In [None]:

import tensorflow as tf
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(inputs, targets):

    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(inputs[train], targets[train],
              batch_size=batch_size,
              epochs=no_epochs,
              verbose=verbosity,
              callbacks=[callback])

    # Generate generalization metrics
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 1: loss of 0.07727085053920746; accuracy of 98.33850860595703%
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 2: loss of 0.04149759188294411; accuracy of 99.05057549476624%
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 3: loss of 0.037436820566654205; accuracy of 99.08708930015564%
------------------------------------------------------------------------
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [None]:
from sklearn.metrics import classification_report
preds = model.predict(X_tests)
preds = np.argmax(preds.astype('int'), axis=1)
y_tests = y_tests.flatten()
print(classification_report(y_tests, preds, digits=4))

              precision    recall  f1-score   support

           0     0.9523    1.0000    0.9756     15005
           1     1.0000    0.9882    0.9941      1615
           2     1.0000    0.8415    0.9139      1451
           3     1.0000    0.6961    0.8208       510
           4     1.0000    0.8962    0.9453      1426
           5     1.0000    0.9907    0.9954      1405
           6     1.0000    0.8122    0.8964       197
           7     1.0000    0.2174    0.3571       161
           8     1.0000    1.0000    1.0000        95
           9     1.0000    0.5000    0.6667        46

    accuracy                         0.9657     21911
   macro avg     0.9952    0.7942    0.8565     21911
weighted avg     0.9674    0.9657    0.9628     21911

