# 236605 Final Project

## KNN Demonstration 

In [1]:
import sklearn as sk
import matplotlib.pyplot as plt
import torchvision.transforms as tvtf
from ECG_multi_lead_dataloader import *
import transforms as tf
import torchvision
import torch
import knn_classifier as knn
import os

In [2]:
# Define the transforms that should be applied to each ECG record before returning it
tf_ds = tvtf.Compose([
    tf.ECG_tuple_transform(-1) # Reshape to 1D Tensor
])

In [3]:
root_dir = r'C:\Users\noam\Desktop\vadimDB'+'\\'

ECG_test=ECG_Multilead_Dataset(root_dir=root_dir,transform= tf_ds) # For KNN demo

In [4]:
# Define how much data to load (only use a subset for speed)
num_train = 35000
num_test = 1000
batch_size = 10000

# Training dataset & loader
ds_train = tf.SubsetDataset(ECG_test, num_train)  #(train=True, transform=tf_ds)
dl_train = torch.utils.data.DataLoader(ds_train,batch_size= batch_size,shuffle=False)

# Test dataset & loader
ds_test = tf.SubsetDataset(ECG_test, num_test, offset= num_train)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size)

# Get all test data to predict in one go
test_iter = iter(dl_test)
x_test, y_test = test_iter.next()

In [5]:
knn_classifier = knn.KNNClassifier(k=10)
knn_classifier.train(dl_train)
y_pred = knn_classifier.predict(x_test)

# Calculate accuracy
accuracy = knn.accuracy(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

Accuracy: 78.50%


## Digitized 12 Lead ECG classification

Clean up the environment from the previous exmple to free the memory (if necessary):

In [None]:
%reset

In [19]:
import torch
torch.cuda.empty_cache()

In [1]:
%load_ext autoreload
%autoreload 2

### Imports and loadings

Imports and preperations, change root_dir here to the directory of the data pickles

In [20]:
import torch
import models
import transforms as tf
import matplotlib.pyplot as plt
from ECG_multi_lead_dataloader import *

##### Change root direrctory here #####
root_dir = r'C:\Users\noam\Desktop\vadimDB'+'\\'

%load_ext autoreload
%autoreload 2

plt.rcParams.update({'font.size': 12})
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Using device: cuda


Load the database (might take some minutes)

In [21]:
ds = ECG_Multilead_Dataset(root_dir=root_dir)

Prepare the dataloaders

In [42]:
# Define how much data to load (only use a subset for speed)

# for real training:
#num_train = 35000
# for small set overfit experiment: 
num_train = 3500
num_test = 1000
batch_size = 128

# Training dataset & loader
ds_train = tf.SubsetDataset(ds, num_train)  #(train=True, transform=tf_ds)
dl_train = torch.utils.data.DataLoader(ds_train,batch_size= batch_size,shuffle=True)

# Test dataset & loader
ds_test = tf.SubsetDataset(ds, num_test, offset= num_train)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size)

Let's see what did we load:

In [28]:
x, y = iter(dl_train).next()
x1, x2 = x

print('Long lead data of shape: ', x2.shape)
print('Short lead data of shape: ', x1.shape)
print('Labels of shape: ', y.shape)

Long lead data of shape:  torch.Size([128, 1, 5000])
Short lead data of shape:  torch.Size([128, 12, 1250])
Labels of shape:  torch.Size([128])


### Model creation

Use this box to determine the archetecture of the digitized ECG classifing model. Note that the input data is tuples structured $(x_1,x_2)$, on which $x_1$ is a 12 on $N$ matrix containing digitized signals of short 12 lead ECG and $x_2$ is a 1 on $4\cdot N$ matrix containing the long lead. Both inputs enter the model through 1d CNNs and combined to a single set of features before finally being cassified by a simple feedforward NN. 

In [29]:
### CNNs structure: 

# num of channels and kernel length in each layer of each branch, note that list lengths must correspond 
short_hidden_channels = [16, 32, 64, 128, 256, 512]
long_hidden_channels  = [4, 8, 16, 32, 64, 128, 256, 512]
short_kernel_lengths = [5]*6
long_kernel_lengths = [5]*8

# which tricks to use: dropout, stride, batch normalization and dilation 
short_dropout = None
long_dropout = None
short_stride = 2
long_stride = 2
short_dilation = 1
long_dilation = 1
short_batch_norm = True
long_batch_norm = True

# enter input length here
short_input_length = 1250
long_input_length = 5000

### FC net structure:

# num of hidden units in every FC layer
fc_hidden_dims = [128]

# num of output classess 
num_of_classes = 2

build the model:

In [30]:
del model

model = models.Ecg12LeadNet(short_hidden_channels, long_hidden_channels,
                 short_kernel_lengths, long_kernel_lengths,
                 fc_hidden_dims,
                 short_dropout, long_dropout,
                 short_stride, long_stride,
                 short_dilation, long_dilation,
                 short_batch_norm, long_batch_norm,
                 short_input_length, long_input_length,
                 num_of_classes).to(device)

print(model)

Ecg12LeadNet(
  (short_cnn): ConvNet(
    (cnn): Sequential(
      (0): Conv1d(12, 16, kernel_size=(5,), stride=(2,))
      (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv1d(16, 32, kernel_size=(5,), stride=(2,))
      (4): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
      (6): Conv1d(32, 64, kernel_size=(5,), stride=(2,))
      (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): ReLU()
      (9): Conv1d(64, 128, kernel_size=(5,), stride=(2,))
      (10): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (11): ReLU()
      (12): Conv1d(128, 256, kernel_size=(5,), stride=(2,))
      (13): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (14): ReLU()
      (15): Conv1d(256, 512, kernel_size=(5,), stride=(2,))
      (16): BatchNorm1d(512, eps=1e-05,

Try the model on a single batch to make sure the dimentions fit:

In [31]:
x_try = (x1.to(device, dtype=torch.float), x2.to(device, dtype=torch.float))
y_pred = model(x_try)
print('Output batch size is:', y_pred.shape[0], ', and number of class scores:', y_pred.shape[1],'\n')

num_correct = torch.sum((y_pred > 0).flatten() == (y.to(device, dtype=torch.long)==1))
print(100*num_correct.item()/len(y), '% Accurecy... maybe we should consider training the model')

Output batch size is: 128 , and number of class scores: 1 

47.65625 % Accurecy... maybe we should consider training the model


### Let the game begin - Training

This training section is based on the course abstruct Trainer class from HW 3. We have implemented a custom class for our model inhereting from Trainer.

In [38]:
import torch.nn as nn
import torch.optim as optim
from training import Ecg12LeadNetTrainerBinary

torch.manual_seed(42)

lr = 0.001
num_epochs = 5

loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
trainer = Ecg12LeadNetTrainerBinary(model, loss_fn, optimizer, device)

for epoch in range(num_epochs):
    epoch_result = trainer.train_epoch(dl_train, verbose=True)
    
    if epoch == 0 or (epoch+1) % 1 == 0:
        avg_loss = np.mean(epoch_result.losses)
        accuracy = np.mean(epoch_result.accuracy)
        print(f'\nEpoch #{epoch+1}: Avg. loss = {avg_loss:.3f}, Accuracy = {accuracy:.2f}%')

train_batch (Avg. Loss 0.000, Accuracy 100.0): 100%|███████████████████████████████████| 28/28 [00:08<00:00,  3.34it/s]

Epoch #1: Avg. loss = 0.000, Accuracy = 100.00%
train_batch (Avg. Loss 0.000, Accuracy 100.0): 100%|███████████████████████████████████| 28/28 [00:09<00:00,  3.14it/s]

Epoch #2: Avg. loss = 0.000, Accuracy = 100.00%
train_batch (Avg. Loss 0.000, Accuracy 100.0): 100%|███████████████████████████████████| 28/28 [00:10<00:00,  2.68it/s]

Epoch #3: Avg. loss = 0.000, Accuracy = 100.00%
train_batch (Avg. Loss 0.000, Accuracy 100.0): 100%|███████████████████████████████████| 28/28 [00:11<00:00,  2.59it/s]

Epoch #4: Avg. loss = 0.000, Accuracy = 100.00%
train_batch (Avg. Loss 0.000, Accuracy 100.0): 100%|███████████████████████████████████| 28/28 [00:10<00:00,  3.23it/s]

Epoch #5: Avg. loss = 0.000, Accuracy = 100.00%


In [47]:
test_result = trainer.test_epoch(dl_test, verbose=True)
print('Test accurecy is: ', test_result[1], '%')

test_batch (Avg. Loss 1.055, Accuracy 89.9): 100%|███████████████████████████████████████| 8/8 [00:01<00:00,  7.45it/s]
test accurecy is:  89.9 %
