# [Capstone Project] ECG Classification Methods

## Introduction
Using the ECG5000 dataset, I will implement both methods A and B discussed in the paper. I am implementing the code for the algorithms from scratch. 

## Importing the Data


In [10]:
import pickle

#Load the ECG5000_train.pickle and ECG5000_validation.pickle file
train_data = pickle.load(open('ECG5000_train.pickle', 'rb'), encoding='latin1')
val_data = pickle.load(open('ECG5000_validation.pickle', 'rb'), encoding='latin1')

#save the file into variable ECGdataset
ECGdataset = {
    'train': train_data,
    'validation': val_data
}

## Method A: 1-D CNNs

This method does not have preprocessing so the raw data can be used. Using the Conv1D from keras tensorflow for the 1-D CNNs. I used this kaggle set as reference on how to implement this https://www.kaggle.com/code/isaienkov/1d-convolutional-neural-network-starter. The function is to try and cover the framework put forth in the paper. 

In [19]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, ReLU,
    GlobalAveragePooling1D, Dense
)
from tensorflow.keras.utils import to_categorical

# Extract features and labels
X_train = ECGdataset['train'][:, :-1]
y_train = ECGdataset['train'][:, -1].astype(int)
X_val = ECGdataset['validation'][:, :-1]
y_val = ECGdataset['validation'][:, -1].astype(int)

num_classes = len(np.unique(np.concatenate([y_train, y_val])))
y_train_cat = to_categorical(y_train - 1, num_classes)
y_val_cat = to_categorical(y_val - 1, num_classes)

X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)


# 1-D CNN
def build_1dcnn_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    x = Conv1D(64, kernel_size=7, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv1D(128, kernel_size=5, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv1D(256, kernel_size=3, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = GlobalAveragePooling1D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    return Model(inputs, outputs)

# Compile the Model
input_shape = X_train.shape[1:] 
model = build_1dcnn_model(input_shape, num_classes)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Train the model
model.fit(
    X_train, y_train_cat,
    epochs=30,
    batch_size=64,
    validation_data=(X_val, y_val_cat),
)

# Evaluate
loss, acc = model.evaluate(X_val, y_val_cat, verbose=0)
print(f"Validation Accuracy: {acc:.4f}")

# #Predictions on Validation Set
#y_pred = np.argmax(model.predict(X_val), axis=1) 
#y_prob = model.predict(X_val)

# #Calculate Metrics
#accuracy = accuracy_score(y_val, y_pred)
#precision = precision_score(y_val, y_pred, average='macro')
#recall = recall_score(y_val, y_pred, average='macro')
#f1 = f1_score(y_val, y_pred, average='macro')
#auroc = roc_auc_score(y_val, y_prob, average='macro', multi_class='ovr')
#conf_matrix = confusion_matrix(y_val, y_pred)

#print(f"Accuracy: {accuracy:.4f}")
#print(f"Precision: {precision:.4f}")
#print(f"Recall: {recall:.4f}")
#print(f"F1 Score: {f1:.4f}")
#print(f"AUROC: {auroc:.4f}")

# #Visualize
#plt.figure(dpi=100)

#group_counts = ["{0:0.0f}".format(value) for value in conf_matrix.flatten()]
#group_percentages = ["{0:.2%}".format(value) for value in conf_matrix.flatten()/np.sum(conf_matrix)]
#labels = [f"{v1}\n{v2}" for v1, v2 in zip(group_counts, group_percentages)]
#labels = np.asarray(labels).reshape(conf_matrix.shape)

#sns.heatmap(conf_matrix, annot=labels, fmt='', cmap='Blues')

#plt.xlabel('Predicted Labels')
#plt.ylabel('True Labels')
#plt.title('Confusion Matrix')


Epoch 1/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step - accuracy: 0.1811 - loss: 2.4100 - val_accuracy: 0.5153 - val_loss: 2.2633
Epoch 2/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.5504 - loss: 1.5056 - val_accuracy: 0.5153 - val_loss: 2.1668
Epoch 3/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.5406 - loss: 1.2702 - val_accuracy: 0.5153 - val_loss: 2.0758
Epoch 4/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.5446 - loss: 1.2425 - val_accuracy: 0.5287 - val_loss: 2.0318
Epoch 5/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - accuracy: 0.5523 - loss: 1.1876 - val_accuracy: 0.5387 - val_loss: 2.0164
Epoch 6/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - accuracy: 0.5505 - loss: 1.1790 - val_accuracy: 0.1387 - val_loss: 2.0711
Epoch 7/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━

## Method A: Block-based Neural Networks

This method does not have preprocessing so the raw data can be used. Using the Conv1D from keras tensorflow for the 1-D CNNs. I used this kaggle set as reference on how to implement this https://www.kaggle.com/code/isaienkov/1d-convolutional-neural-network-starter. The function is to try and cover the framework put forth in the paper. 

In [25]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, BatchNormalization, MaxPooling2D,
    Flatten, Dense
)
from tensorflow.keras.utils import to_categorical

# Extract features and labels
X_train = ECGdataset['train'][:, :-1]
y_train = ECGdataset['train'][:, -1].astype(int)
X_val = ECGdataset['validation'][:, :-1]
y_val = ECGdataset['validation'][:, -1].astype(int)

num_classes = len(np.unique(np.concatenate([y_train, y_val])))
y_train_cat = to_categorical(y_train - 1, num_classes)
y_val_cat = to_categorical(y_val - 1, num_classes)

X_train_2d = X_train.reshape(-1, 10, 14, 1)
X_val_2d = X_val.reshape(-1, 10, 14, 1)

# 2-D block based NN
def conv2d_block(x, filters, kernel_size):
    x = Conv2D(filters, kernel_size, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    return x

def build_block2d_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    x = conv2d_block(inputs, 32, (3, 3))
    x = conv2d_block(x, 64, (3, 3))

    x = Flatten()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model


# Compile the model
input_shape = X_train_2d.shape[1:]
model = build_block2d_model(input_shape, num_classes)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Train the model
model.fit(
    X_train_2d, y_train_cat,
    epochs=30,
    batch_size=64,
    validation_data=(X_val_2d, y_val_cat),
)

#Evaluate
loss, acc = model.evaluate(X_val_2d, y_val_cat, verbose=0)
print(f"Validation Accuracy: {acc:.4f}")

# #Predictions on Validation Set
#y_pred = np.argmax(model.predict(X_val), axis=1) 
#y_prob = model.predict(X_val)

# #Calculate Metrics
#accuracy = accuracy_score(y_val, y_pred)
#precision = precision_score(y_val, y_pred, average='macro')
#recall = recall_score(y_val, y_pred, average='macro')
#f1 = f1_score(y_val, y_pred, average='macro')
#auroc = roc_auc_score(y_val, y_prob, average='macro', multi_class='ovr')
#conf_matrix = confusion_matrix(y_val, y_pred)

#print(f"Accuracy: {accuracy:.4f}")
#print(f"Precision: {precision:.4f}")
#print(f"Recall: {recall:.4f}")
#print(f"F1 Score: {f1:.4f}")
#print(f"AUROC: {auroc:.4f}")

# #Visualize
#plt.figure(dpi=100)

#group_counts = ["{0:0.0f}".format(value) for value in conf_matrix.flatten()]
#group_percentages = ["{0:.2%}".format(value) for value in conf_matrix.flatten()/np.sum(conf_matrix)]
#labels = [f"{v1}\n{v2}" for v1, v2 in zip(group_counts, group_percentages)]
#labels = np.asarray(labels).reshape(conf_matrix.shape)

#sns.heatmap(conf_matrix, annot=labels, fmt='', cmap='Blues')

#plt.xlabel('Predicted Labels')
#plt.ylabel('True Labels')
#plt.title('Confusion Matrix')

Epoch 1/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.2859 - loss: 2.6584 - val_accuracy: 0.3287 - val_loss: 2.1217
Epoch 2/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4865 - loss: 1.3347 - val_accuracy: 0.5187 - val_loss: 1.9924
Epoch 3/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6335 - loss: 1.0947 - val_accuracy: 0.5373 - val_loss: 1.9826
Epoch 4/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6384 - loss: 1.0205 - val_accuracy: 0.5413 - val_loss: 1.9530
Epoch 5/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7061 - loss: 0.8641 - val_accuracy: 0.5680 - val_loss: 1.9619
Epoch 6/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6885 - loss: 0.8544 - val_accuracy: 0.5760 - val_loss: 1.9535
Epoch 7/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━