# 1. Import library

In [None]:
# !pip install wfdb

# Handle data path
from pathlib import Path
import collections

# Read and display data from Physionet
import wfdb
import pprint
import collections
from IPython.display import clear_output

# Data manipulation and plotting
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import resample
from scipy.signal import butter,filtfilt

# Divide data into train and test set and save to HDF5
import h5py
import os
from sklearn.model_selection import train_test_split
from sklearn import preprocessing


# 2. Classification

# 2.1 Load data

In [None]:
# Deep learning models
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch import nn
from scipy.stats import truncnorm
from torch.nn.parameter import Parameter
import torch.optim as optim
from torch.autograd import Variable
from torch.nn import Conv1d

In [None]:
# read data from h5 file 

process_dir = Path("./") / "data" / "processed_data"

# Use this for inter-patient
file_name = 'ECG_MIT-BIH_processed_data_interpatient_360Hz.h5'

# Use this for intra-patient
# file_name = 'ECG_MIT-BIH_processed_data_intrapatient_360Hz.h5'

file = h5py.File(process_dir / file_name, 'r')
train_set_d = file['train_data'][:]
train_set_l = file['train_labels'][:]
test_set_d = file['test_data'][:]
test_set_l = file['test_labels'][:]
file.close()

In [None]:
train_ecg_sample = train_set_d[14]
train_time_in_second = np.arange(len(train_ecg_sample)) / 360.

plt.rcParams["figure.figsize"]= 15, 5
plt.plot(train_time_in_second, train_ecg_sample, c='b', label='360 Hz')
plt.xlabel('time in second')
plt.ylabel('ECG value in mV')
plt.title('Train ECG data for one beat')
plt.legend()
plt.show()
plt.clf()

In [None]:
## plot a sample test set signal
test_ecg_sample = test_set_d[0]
test_time_in_second = np.arange(len(test_ecg_sample)) / 360.

plt.rcParams["figure.figsize"]= 15, 5
plt.plot(test_time_in_second, test_ecg_sample, c='b', label='360 Hz')
plt.xlabel('time in second')
plt.ylabel('ECG value in mV')
plt.title('Test ECG data for one beat')
plt.legend()
plt.show()
plt.clf()

In [None]:
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import StratifiedShuffleSplit
from tensorflow.keras.utils import to_categorical

import torch

sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
for train_index, test_index in sss.split(train_set_d, train_set_l):
    cnn_train_data, cnn_val_data = train_set_d[train_index], train_set_d[test_index]
    cnn_train_labels, cnn_val_labels = train_set_l[train_index], train_set_l[test_index]
    
# cnn_train_labels = to_categorical(cnn_train_labels, num_classes=5)
# cnn_val_labels = to_categorical(cnn_val_labels, num_classes=5)
# cnn_test_labels = to_categorical(test_set_l, num_classes=5)
cnn_test_labels = test_set_l

batch_size=128

train_reshape_data = np.reshape(cnn_train_data, (cnn_train_data.shape[0], 1, cnn_train_data.shape[1]))
train_tensor = TensorDataset(torch.FloatTensor(train_reshape_data), torch.LongTensor(cnn_train_labels))
train_dataloader = DataLoader(train_tensor, batch_size=batch_size, shuffle=True)

val_reshape_data = np.reshape(cnn_val_data, (cnn_val_data.shape[0], 1, cnn_val_data.shape[1]))
val_tensor = TensorDataset(torch.FloatTensor(val_reshape_data), torch.LongTensor(cnn_val_labels))
val_dataloader = DataLoader(val_tensor, batch_size=batch_size, shuffle=True)
    
test_reshape_data = np.reshape(test_set_d, (test_set_d.shape[0], 1, test_set_d.shape[1]))
test_tensor = TensorDataset(torch.FloatTensor(test_reshape_data), torch.LongTensor(cnn_test_labels))
test_dataloader = DataLoader(test_tensor)

print(train_reshape_data.shape, val_reshape_data.shape, test_reshape_data.shape)
# print(Counter(cnn_train_labels), Counter(cnn_val_labels))

## 2.2 Setup the model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# a generator for batches of data
# yields data (batchsize) and labels (batchsize)
# if shuffle is True, it will load batches in a random order
def DataBatch(data, label, batchsize, shuffle=True):
    n = data.shape[0]
    if shuffle:
        index = np.random.permutation(n)
    else:
        index = np.arange(n)
    for i in range(int(np.ceil(n/batchsize))):
        inds = index[i*batchsize : min(n,(i+1)*batchsize)]
        yield data[inds], label[inds]

# tests the accuracy of a classifier
def test(test_data_loader, classifier):
    correct=0.
    total_length = 0
    with torch.no_grad():
        for (data,label) in test_data_loader:
            data, label = data.to(device), label
            prediction = classifier(data)
            total_length += label.size(0)
            # correct += np.sum(prediction==label)
            correct += (prediction == label.numpy()).sum().item()
    return correct/(total_length)*100

def testLoss(test_data_loader, criterion, classifier):
    test_loss = []
    with torch.no_grad():
        for (data,labels) in test_data_loader:
            data, labels = data.to(device), labels.to(device)
            prediction = classifier.forward(data)
            loss = criterion(prediction, labels)
            test_loss.append(loss.item())
    return np.mean(np.array(test_loss))


def Confusion(test_data_loader, classifier, num_class=5):
    M=np.zeros((num_class,num_class))
    acc=0.0
    correct=0.
    with torch.no_grad():
        for (data,label) in test_data_loader:
            data, label = data.to(device), label.to(device)
            prediction = classifier(data)
            label = label.numpy()
            correct += np.sum(prediction==label)
            for i in range(len(label)):
                M[label[i],prediction[i]] += 1
        for i in range(num_class):
            M[i,:] /= np.sum(M[i,:])
        acc = correct/(test_data_loader.dataset.tensors[0].shape[0])*100
    
    return M, acc

def VisualizeConfusion(M):
    #plt.figure(figsize=(14, 6))
    plt.imshow(M)
    plt.show()
    
def predict(test_data_loader, classifier):
    preds = []
    with torch.no_grad():
        for (data,label) in test_data_loader:
            data, label = data.to(device), label.to(device)
            prediction = classifier(data)
            preds += list(prediction)

    return np.array(preds)

def predict_proba(test_data_loader, classifier):
    proba = torch.tensor([]).to(device)
    with torch.no_grad():
        for (data,label) in test_data_loader:
            data, label = data.to(device), label.to(device)
            prediction = classifier.forward(data)
            proba = torch.cat((proba, prediction), 0)

    return proba.cpu().numpy()

def ToOneVsAll(labels, one_label):
    output_labels = np.copy(labels)
    for i, y in enumerate(labels):
        if y == one_label:
            output_labels[i] = 1
        else:
            output_labels[i] = 0

    return output_labels

In [None]:
from torch.nn.modules.activation import Softmax

class CNN(nn.Module):
    
    def __init__(self, in_dim=1024, classes=5, n_filters=256, filter_size=2, hidden=128, dropout_rate=0.5):
        super(CNN,self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, n_filters, filter_size, padding='same'),
            nn.ReLU(),
            nn.Conv1d(n_filters, n_filters, filter_size, padding='same'),
            nn.ReLU(),
            nn.Conv1d(n_filters, n_filters, filter_size, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(n_filters, affine=True),
            nn.Dropout(dropout_rate),
            nn.Conv1d(n_filters, n_filters, filter_size, padding='same'),
            nn.ReLU(),
            nn.Conv1d(n_filters, n_filters, filter_size, padding='same'),
            nn.ReLU(),
            nn.Conv1d(n_filters, n_filters, filter_size, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(n_filters, affine=True),
            nn.Dropout(dropout_rate),
            nn.AvgPool1d(in_dim, stride=in_dim)
        )
        self.fc = nn.Sequential(
            nn.Linear(n_filters, hidden),
            nn.ReLU(),
            nn.Linear(hidden, classes),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        x = self.conv(x) 
        x = torch.flatten(x, 1)
        return self.fc(x)
            
    def __call__(self, x):
        #inputs = Variable(torch.FloatTensor(x))
        prediction = self.forward(x)
        return np.argmax(prediction.data.cpu().numpy(), 1)



## 2.3 Train model

In [None]:
focal_loss = torch.hub.load(
	'adeelh/pytorch-multi-class-focal-loss',
	model='focal_loss',
	alpha=torch.tensor([0.25,0.25,0.25, 0.25, 0.25]),
	gamma=2,
	reduction='mean',
	device=device,
	dtype=torch.float32,
	force_reload=False
)

cnn = CNN()
cnn.to(device)
epochs = 10

# criterion = nn.CrossEntropyLoss()
# criterion.to(device)
optimizer = optim.Adam(cnn.parameters(), lr=1e-3, weight_decay=1e-6)

train_loss = []
val_accuracy = []
val_accuracy = []
total_val_loss = []
total_test_loss = []

for epoch in range(epochs):
    cnn.to(device)
    cnn.train()  # set network in training mode
    epoch_val_loss = []

    for (data,labels) in train_dataloader:
        # Now train the model using the optimizer and the batch data
        # print(data.shape, labels.shape)
        data, labels = data.to(device), labels.to(device)
        prediction = cnn.forward(data)
        labels=labels.to(torch.int64).to(device)
        loss = focal_loss(prediction, labels)
        epoch_val_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_mean_val_loss = np.mean(np.array(epoch_val_loss))

    cnn.to(device)
    cnn.eval()  # set network in evaluation mode
    val_acc = test(val_dataloader, cnn)
    val_loss = testLoss(val_dataloader, focal_loss, cnn)
    total_val_loss.append(val_loss)

    print ('Epoch:%d Val Accuracy: %f, train loss: %f, val loss: %f'%(epoch+1, val_acc, epoch_mean_val_loss, val_loss))
    
torch.save(cnn.state_dict(), Path('./model/focal_loss_interpatient.pt'))

# 2.4 Test model

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

def test_model(model, test_data_loader, device):
    """
    This function will run test of the model on the test dataset and return 
        - classification report string (for display purpose)
        - dictionary of classification report (for query purpose)
        - confusion matrix
    """
    
    predictions = []
    labels = []
    model.to(device)
    with torch.no_grad():
        for (data,label) in test_data_loader:
            data, label = data.to(device), label
            predictions += list(model(data))
            labels += list(label.numpy())
            
            # print(predictions[0], labels[0])
            # break
            
    
            
    predictions = np.array(predictions)
    labels = np.array(labels)
        
    target_names = ['N', 'S', 'V', 'F', 'Q']
    report = classification_report(labels, predictions, target_names=target_names, digits=3)
    report_dict = classification_report(labels, predictions, target_names=target_names, output_dict=True)
    c_matrix = confusion_matrix(labels, predictions)
    return report, report_dict, c_matrix

cnn = CNN()
cnn.load_state_dict(torch.load(Path('./model/focal_loss_interpatient.pt')))
cnn.eval()
report, report_dict, c_matrix = test_model(cnn, test_dataloader, device)

In [None]:
print('Ttraining result:\n', report)

In [None]:
import itertools

## display confusion matrix
display_labels = ['N', 'S', 'V', 'F', 'Q']

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    plt.clf()
    
plot_confusion_matrix(c_matrix, display_labels ,
                      title='Normalzied Confusion Matrix', normalize=True, cmap='Greys')
