# CSC413 Final Project



## Preparation

### Imports

In [None]:
# need to restart runtime after running this block for the first time
# this code block needs to run twice
!pip install wfdb

Collecting wfdb
  Downloading wfdb-3.4.1-py3-none-any.whl (137 kB)
[?25l[K     |██▍                             | 10 kB 20.9 MB/s eta 0:00:01[K     |████▊                           | 20 kB 7.3 MB/s eta 0:00:01[K     |███████▏                        | 30 kB 6.7 MB/s eta 0:00:01[K     |█████████▌                      | 40 kB 6.3 MB/s eta 0:00:01[K     |████████████                    | 51 kB 3.9 MB/s eta 0:00:01[K     |██████████████▎                 | 61 kB 4.7 MB/s eta 0:00:01[K     |████████████████▋               | 71 kB 5.0 MB/s eta 0:00:01[K     |███████████████████             | 81 kB 4.8 MB/s eta 0:00:01[K     |█████████████████████▍          | 92 kB 5.4 MB/s eta 0:00:01[K     |███████████████████████▉        | 102 kB 5.0 MB/s eta 0:00:01[K     |██████████████████████████▏     | 112 kB 5.0 MB/s eta 0:00:01[K     |████████████████████████████▋   | 122 kB 5.0 MB/s eta 0:00:01[K     |███████████████████████████████ | 133 kB 5.0 MB/s eta 0:00:01[K     |██

In [None]:
!pip install scipy



In [None]:
# libraries

import numpy as np
import torch
import torch.nn as nn

import os
import wfdb
import pickle
from sklearn import preprocessing
from scipy.signal import find_peaks
from tqdm import tqdm
from sklearn import model_selection

from torch.autograd import Variable

### Download Data

In [None]:
wfdb.dl_database('mitdb', os.path.join(os.getcwd(), 'mitdb'))

Generating record list for: 100
Generating record list for: 101
Generating record list for: 102
Generating record list for: 103
Generating record list for: 104
Generating record list for: 105
Generating record list for: 106
Generating record list for: 107
Generating record list for: 108
Generating record list for: 109
Generating record list for: 111
Generating record list for: 112
Generating record list for: 113
Generating record list for: 114
Generating record list for: 115
Generating record list for: 116
Generating record list for: 117
Generating record list for: 118
Generating record list for: 119
Generating record list for: 121
Generating record list for: 122
Generating record list for: 123
Generating record list for: 124
Generating record list for: 200
Generating record list for: 201
Generating record list for: 202
Generating record list for: 203
Generating record list for: 205
Generating record list for: 207
Generating record list for: 208
Generating record list for: 209
Generati

In [None]:
# record and annotation for sample 100
record = wfdb.rdrecord('mitdb/100', sampto=3000)
annotation = wfdb.rdann('mitdb/100', 'atr', sampto=3000)
print(annotation)

<wfdb.io.annotation.Annotation object at 0x7f51305986d0>


`record` is `<class 'wfdb.io.record.Record'>`

`annotation` is `<class 'wfdb.io.annotation.Annotation'>`

### Plot

In [None]:
wfdb.plot_wfdb(record=record, annotation=annotation, plot_sym=True,
                   time_units='seconds', title='MIT-BIH Record 100',
                   figsize=(10,4), ecg_grids='all')

ImportError: ignored

<Figure size 720x288 with 2 Axes>

### Data Description

information regarding `<class 'wfdb.io.annotation.Annotation'>` and `<class 'wfdb.io.annotation.Annotation'>` can be found on this
[link](https://wfdb.readthedocs.io/en/latest/io.html#module-wfdb.io) or run the code block below

In [None]:
# help(wfdb.Annotation)

In [None]:
# help(wfdb.Record)

### Data Preprocessing

In [None]:
# ONLY RUN THIS IF YOU PLAN ON REPROCESSING DATA
!rm -r processed_data

rm: cannot remove 'processed_data': No such file or directory


In [None]:
# all the files
file_name = ['100','101','102','103','104','105','106','107','108','109',
             '111','112','113','114','115','116','117','118','119','121',
             '122','123','124','200','201','202','203','205','207','208',
             '209','210','212','213','214','215','217','219','220','221',
             '222','223','228','230','231','232','233','234']

# file_name = ['222','223','228','230','231']
# file_name = ['111', '101', '104', '222', '228', '207', '209','210','212']

# files used for LSTM
# file_name = ['215', '123']

# these should work but they don't
# '100', '101', '102', '103', '104', '105', '107', '108'

features = ['MLII', 'V1', 'V2', 'V4', 'V5']
labels_list = ['N','S','V','F','Q']


def preprocess(small=False, split=0.0, size=720, save=False):
  full_data = list()
  full_label = list()
  for num in file_name:
    # If small, then sample only 6000
    if small:
      record_file = wfdb.rdrecord("mitdb/"+num, sampto=35000, smooth_frames= True)
    else:
      record_file = wfdb.rdrecord("mitdb/"+num, smooth_frames= True)

    # Obtain signal and peak index
    signals_mlii = preprocessing.scale(np.nan_to_num(record_file.p_signal[:,0]))
    signals_mlii[np.isnan(signals_mlii)] = 0
    signals_mlii = signals_mlii.tolist()
    peaks_mlii, _ = find_peaks(signals_mlii, distance=150)

    # Create data point centered at peak
    for peak in tqdm(peaks_mlii[1:-1]):
      start = peak - size//2
      if start < 0:
        continue
      end = peak + size//2
      if end - start != size:
        continue
      ann = wfdb.rdann("mitdb/"+num, extension="atr", sampfrom = start, sampto = end, return_label_elements=["symbol"])
      annSymbol = ann.symbol

      # If symbol not in label list, ignore this data?
      if len(annSymbol) == 0 or annSymbol[0] not in labels_list:
        continue

      # remove some N classes:
      if annSymbol[0] == 'N' and np.random.random()>0.15:
        continue

      # Create one hot vector
      label_vec = [0] * len(labels_list)
      label_vec[labels_list.index(annSymbol[0])] = 1

      signal = np.asarray(signals_mlii[start:end], dtype=float)
      if np.isnan(signal).any():
        continue
      # append label vector and data vector
      full_label.append(label_vec)
      full_data.append(signal)

  full_data = np.nan_to_num(full_data)

  try:
    full_data = np.asarray(full_data, dtype=float)
  except:
    print("{} caused an error in full data".format(num))
  try:
    full_label = np.asarray(full_label, dtype=float)
  except:
    print("{} caused an error in full label".format(num))
  print(full_data.dtype)

  # If no split, just return
  if split == 0.0:
    return full_data, full_label

  # Split
  X_train, X_test, y_train, y_test = model_selection.train_test_split(full_data, full_label, test_size=split, random_state=11)

  if save:
    # Save
    np.savetxt("processed_data/X_train.csv", X_train)
    np.savetxt("processed_data/y_train.csv", y_train)
    # Save test set if split > 0
    if split > 0.0:
      np.savetxt("processed_data/X_test.csv", X_test)
      np.savetxt("processed_data/y_test.csv", y_test)
  return X_train, y_train, X_test, y_test

!mkdir processed_data

mkdir: cannot create directory ‘processed_data’: File exists


## Neural Network Initializations

In [None]:
class PrintLayer(nn.Module):
    def __init__(self, p):
        super(PrintLayer, self).__init__()
        self.p = p

    def forward(self, x):
        # Do your print / debug stuff here
        # print(self.p, x.shape)
        return x

### CNN


In [None]:
class CNN_ECG(nn.Module):
    def __init__(self, input_size):
        super().__init__()

        # CNN Layers with Max Pooling
        self.cnn_1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=5, kernel_size=3, stride=1),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.LeakyReLU(),
        )

        self.cnn_2 = nn.Sequential(
            nn.Conv1d(in_channels=5, out_channels=10, kernel_size=4, stride=1),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.LeakyReLU(),
        )

        self.cnn_3 = nn.Sequential(
            nn.Conv1d(in_channels=10, out_channels=20, kernel_size=4, stride=1),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.LeakyReLU(),
        )

        # Fully Connected Layers
        self.fc = nn.Sequential(
            nn.Linear(in_features=600, out_features=30),
            nn.LeakyReLU(),
            nn.Linear(in_features=30, out_features=20),
            nn.LeakyReLU(),
            nn.Linear(in_features=20, out_features=5),
        )

    def forward(self, x):
        cnn_1_out = self.cnn_1(x)
        cnn_2_out = self.cnn_2(cnn_1_out)
        cnn_3_out = self.cnn_3(cnn_2_out)

        fc_in = cnn_3_out.view([cnn_3_out.shape[0], cnn_3_out.shape[1] * cnn_3_out.shape[2]])
        fc_out = self.fc(fc_in)

        return fc_out

### RNN-LSTM

In [None]:
class RNN_LSTM(nn.Module):
    def __init__(self, input_size, batch_size):
        super(RNN_LSTM, self).__init__()

        self.dense = nn.Sequential(
            nn.Linear(in_features=100, out_features=5),
            nn.Sigmoid()
        )

        self.batch_size = batch_size

        self.lstm1 = nn.LSTMCell(input_size = input_size, hidden_size = 64)
        self.do1 = nn.Dropout(p = 0.2)

        self.lstm2 = nn.LSTMCell(input_size = 64, hidden_size = 256)
        self.do2 = nn.Dropout(p = 0.2)

        self.lstm3 = nn.LSTMCell(input_size = 256, hidden_size = 100)
        self.do3 = nn.Dropout(p = 0.2)

    def forward(self, x):
        """Forward pass of the LSTM
        """

        h_0 = Variable(torch.zeros(x.shape[0], 64))
        c_0 = Variable(torch.zeros(x.shape[0], 64))

        h_1 = Variable(torch.zeros(x.shape[0], 256))
        c_1 = Variable(torch.zeros(x.shape[0], 256))

        h_2 = Variable(torch.zeros(x.shape[0], 100))
        c_2 = Variable(torch.zeros(x.shape[0], 100))

        x = x.squeeze(1)
        for i in range(5):

          h_0, c_0 = self.lstm1(x, (h_0, c_0))
          out1 = self.do1(h_0)
          h_1, c_1 = self.lstm2(out1, (h_1, c_1))
          out2 = self.do1(h_1)
          h_2, c_2 = self.lstm3(out2, (h_2, h_2))
          out3 = self.do1(h_2)

          output = self.dense(out3)

        return output

## Functions

### CNN

In [None]:
def train_CNN(lr, X_train, y_train, train_iter):
  batch_size = 256
  input_size = 1 # will be changed later
  model = CNN_ECG(input_size)
  cutoff = 0

  epochs = 20
  criterion = nn.CrossEntropyLoss() # not sure if we should use this
  optimizer = torch.optim.Adam(model.parameters(), lr)

  for epoch in range(epochs + 1):
    total_loss = torch.Tensor([0])

    acc_count = 0
    acc_total = train_iter * batch_size

    for b in range(train_iter):
      batch_idx = np.random.randint(low=0, high=X_train.shape[0] , size=(batch_size,))
      x_batch = X_train[batch_idx]
      y_batch = y_train[batch_idx]

      y_predict = model(x_batch)
      y_predict = torch.squeeze(y_predict)
      loss = criterion(y_predict, y_batch)
      total_loss += loss

      y_batch = y_batch.long()
      y_predict = y_predict.round().detach().numpy()
      y_pred_cutoff = np.where(y_predict > cutoff, 1, 0)
      y_pred_idx = np.argmax(y_predict, axis=1)

      acount = 0

      for i in range(y_pred_cutoff.shape[0]):
        pred = np.zeros(5) ## Number of classes, create 1 hot vec
        pred[y_pred_idx[i]] = 1
        y = y_batch.detach().numpy()

        actual = y[i]
        if (pred == actual).all():
          acount += 1

      acc_count += acount

      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

    if epoch % 2 == 0 and epoch != 0:
      print("Epoch {}".format(epoch))
      accuracy = acc_count/acc_total
      print("Loss: ", round(total_loss.item(), 4))
      print("Training Accuracy: ", round(accuracy, 4))
  return model

### LSTM

In [None]:
def train_RNN_LTSM(lr, X_train, y_train, train_iter=3):
  batch_size = 10
  input_size = 720 # just putting a random number for now
  model = RNN_LSTM(input_size, batch_size)

  epochs = 40
  criterion = nn.MSELoss()
  optimizer = torch.optim.Adam(model.parameters(), lr)

  for epoch in range(epochs + 1):
    total_loss = torch.Tensor([0])

    acc_count = 0
    acc_total = train_iter * batch_size

    for b in range(train_iter):
      batch_idx = np.random.randint(low=0, high=X_train.shape[0] , size=(batch_size,))
      x_batch = X_train[batch_idx]
      y_batch = y_train[batch_idx]

      y_predict = model(x_batch)
      loss = criterion(torch.squeeze(y_predict), y_batch)
      total_loss += loss

      y_batch = y_batch.long()
      y_predict = y_predict.round().detach().numpy()
      y_pred_idx = np.argmax(y_predict, axis=1)

      acount = 0

      for i in range(y_predict.shape[0]):
        pred = np.zeros(5) ## Number of classes, create 1 hot vec
        pred[y_pred_idx[i]] = 1
        y = y_batch.detach().numpy()

        actual = y[i]
        if (pred == actual).all():
          acount += 1

      acc_count += acount

      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

    if epoch % 2 == 0 and epoch != 0:
      print("Epoch: {}".format(epoch))
      accuracy = round(acc_count/acc_total, 4)
      print("Loss: ", round(total_loss.item(), 4))
      print("Training Accuracy: ", accuracy)

  return model

### Testing

In [None]:
def test_model(model, X_test, y_test, model_type="cnn"):
  if model_type == "cnn":
    criterion = nn.CrossEntropyLoss()
  else:
    criterion = nn.MSELoss()

  tp = 0
  fp = 0
  fn = 0
  tn = 0
  with torch.no_grad():
    y_predict = model(X_test)
    # loss
    loss = criterion(torch.squeeze(y_predict), y_test)
    # accuracy
    y_test = y_test.long()
    y_predict = y_predict.detach().numpy()
    y_pred_idx = np.argmax(y_predict, axis=1)
    acount = 0
    for i in range(y_predict.shape[0]):
      pred = np.zeros(5) ## Number of classes, create 1 hot vec
      pred[y_pred_idx[i]] = 1
      y = y_test.detach().numpy()

      actual = y[i]
      if (pred == actual).all():
        acount += 1
        if (pred[0] == 1 and actual[0] == 1):
          tp += 1
        elif (pred[0] != 1 and actual[0] != 1):
          tn += 1
      else:
        if (pred[0] == 1 and actual[0] != 1):
          fp += 1
        elif (pred[0] != 1 and actual[0] == 1):
          fn += 1
    accuracy = round(acount/y_predict.shape[0], 4)
    print("Test Loss: ", round(loss.item(), 4))
    print("Testing Accuracy: ", accuracy)
    print("True Positive: ", tp)
    print("True Negative: ", tn)
    print("False Positive: ", fp)
    print("False Negative: ", fn)
    print("sensitivity: ", tp / (tp + fn))
    print("specificity: ", tn / (tn + fp))

### Hyperparamters & Other Initializations

In [None]:
lr = 0.001

## Calling Functions

In [None]:
!mkdir models

In [None]:
!ls

mitdb	physionet.org	processed_data_challenge
models	processed_data	sample_data


### CNN

In [None]:
X_train, y_train, X_test, y_test = preprocess(False, split=0.3, size=260, save=False)
X_train = torch.Tensor(X_train).unsqueeze(1)
demo = np.sum(y_train, axis=0)
y_train = torch.Tensor(y_train)

print("Demo of Data: ", demo)

model = train_CNN(lr, X_train, y_train, 3)
pickle.dump(model, open('models/cnn', 'wb'))
#Test
X_test = torch.Tensor(X_test).unsqueeze(1)
demo = np.sum(y_test, axis=0)
y_test = torch.Tensor(y_test)
test_model(model, X_test, y_test, "cnn")

100%|██████████| 2559/2559 [00:53<00:00, 48.06it/s]
100%|██████████| 3626/3626 [01:06<00:00, 54.62it/s]
100%|██████████| 2919/2919 [01:01<00:00, 47.79it/s]
100%|██████████| 3681/3681 [01:13<00:00, 49.95it/s]
100%|██████████| 2643/2643 [01:01<00:00, 42.74it/s]
100%|██████████| 2627/2627 [01:02<00:00, 41.94it/s]
100%|██████████| 3450/3450 [01:11<00:00, 48.16it/s]
100%|██████████| 2306/2306 [00:45<00:00, 51.08it/s]
100%|██████████| 3494/3494 [01:00<00:00, 58.07it/s]
100%|██████████| 2556/2556 [00:58<00:00, 44.02it/s]
100%|██████████| 3494/3494 [01:06<00:00, 52.61it/s]
100%|██████████| 2539/2539 [00:56<00:00, 44.76it/s]
100%|██████████| 3486/3486 [00:58<00:00, 59.42it/s]
100%|██████████| 3485/3485 [00:58<00:00, 59.24it/s]
100%|██████████| 3598/3598 [01:01<00:00, 58.39it/s]
100%|██████████| 2529/2529 [00:52<00:00, 47.98it/s]
100%|██████████| 3079/3079 [00:44<00:00, 69.87it/s]
100%|██████████| 2846/2846 [00:58<00:00, 49.06it/s]
100%|██████████| 3751/3751 [01:15<00:00, 50.01it/s]
100%|███████

float64
Demo of Data:  [7.984e+03 1.000e+00 4.982e+03 5.580e+02 1.800e+01]
Epoch 2
Loss:  4.8048
Training Accuracy:  0.5951
Epoch 4
Loss:  4.2648
Training Accuracy:  0.6185
Epoch 6
Loss:  3.2852
Training Accuracy:  0.599
Epoch 8
Loss:  2.4813
Training Accuracy:  0.6328
Epoch 10
Loss:  2.2262
Training Accuracy:  0.6081
Epoch 12
Loss:  2.3126
Training Accuracy:  0.7188
Epoch 14
Loss:  1.9228
Training Accuracy:  0.776
Epoch 16
Loss:  1.3408
Training Accuracy:  0.8685
Epoch 18
Loss:  1.1073
Training Accuracy:  0.8724
Epoch 20
Loss:  1.0163
Training Accuracy:  0.8815
Test Loss:  0.3976
Testing Accuracy:  0.8682
True Positive:  3168
True Negative:  0
False Positive:  431
False Negative:  244
sensitivity:  0.9284876905041032
specificity:  0.0


In [None]:
!ls models

cnn  lstm


### LSTM

In [None]:
X_train, y_train, X_test, y_test = preprocess(False, split=0.2, size=720, save=False)
# X_train = X_train.astype(float)
print(X_train.dtype)
X_train = torch.Tensor(X_train).unsqueeze(1)
# X_train = torch.from_numpy(X_train).unsqueeze(1)

demo = np.sum(y_train, axis=0)
y_train = torch.Tensor(y_train)
print("Demo: ", demo)
model = train_RNN_LTSM(lr, X_train, y_train)
pickle.dump(model, open('models/lstm', 'wb'))
#Test
X_test = torch.Tensor(X_test).unsqueeze(1)
demo = np.sum(y_test, axis=0)
y_test = torch.Tensor(y_test)
test_model(model, X_test, y_test, "lstm")

100%|██████████| 3355/3355 [01:34<00:00, 35.40it/s]
100%|██████████| 3122/3122 [00:43<00:00, 71.56it/s]


float64
float64
Demo:  [736.   0. 110.   1.   0.]
Epoch: 2
Loss:  0.5883
Training Accuracy:  0.8667
Epoch: 4
Loss:  0.2264
Training Accuracy:  0.9
Epoch: 6
Loss:  0.1163
Training Accuracy:  0.9
Epoch: 8
Loss:  0.1772
Training Accuracy:  0.8333
Epoch: 10
Loss:  0.1445
Training Accuracy:  0.8667
Epoch: 12
Loss:  0.2135
Training Accuracy:  0.8
Epoch: 14
Loss:  0.2082
Training Accuracy:  0.8
Epoch: 16
Loss:  0.142
Training Accuracy:  0.8667
Epoch: 18
Loss:  0.08
Training Accuracy:  0.9333
Epoch: 20
Loss:  0.1416
Training Accuracy:  0.8667
Epoch: 22
Loss:  0.1716
Training Accuracy:  0.8333
Epoch: 24
Loss:  0.1199
Training Accuracy:  0.9
Epoch: 26
Loss:  0.0787
Training Accuracy:  0.9333
Epoch: 28
Loss:  0.1936
Training Accuracy:  0.8
Epoch: 30
Loss:  0.1525
Training Accuracy:  0.8333
Epoch: 32
Loss:  0.175
Training Accuracy:  0.8
Epoch: 34
Loss:  0.1324
Training Accuracy:  0.8667
Epoch: 36
Loss:  0.0076
Training Accuracy:  1.0
Epoch: 38
Loss:  0.0786
Training Accuracy:  0.9333
Epoch: 40
Los

## Transfer Learning

Installing Dataset

In [None]:
!wget -r -N -c -np https://physionet.org/files/challenge-2017/1.0.0/training/
!wget -r -N -c -np https://physionet.org/files/challenge-2017/1.0.0/validation/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Length: 13988 (14K) [text/plain]
Saving to: ‘physionet.org/files/challenge-2017/1.0.0/validation/A00057.mat’


2022-04-20 22:56:46 (300 MB/s) - ‘physionet.org/files/challenge-2017/1.0.0/validation/A00057.mat’ saved [13988/13988]

--2022-04-20 22:56:46--  https://physionet.org/files/challenge-2017/1.0.0/validation/A00058.hea
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 84 [text/plain]
Saving to: ‘physionet.org/files/challenge-2017/1.0.0/validation/A00058.hea’


2022-04-20 22:56:46 (12.6 MB/s) - ‘physionet.org/files/challenge-2017/1.0.0/validation/A00058.hea’ saved [84/84]

--2022-04-20 22:56:46--  https://physionet.org/files/challenge-2017/1.0.0/validation/A00058.mat
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 36024 (35K) [text/plain]
Saving to: ‘physionet.org/files/challenge-2017/1.0.0/validation/A00058

In [None]:
path = 'physionet.org/files/challenge-2017/1.0.0/'
# record and annotation for sample 100
record_train = wfdb.rdrecord(path + 'training/A00/A00001', sampto=3000)

In [None]:
wfdb.plot_wfdb(record=record_train, plot_sym=True,
                   time_units='seconds', title='Challenge-2017 Record A00 A00001',
                   figsize=(10,4), ecg_grids='all')

ImportError: ignored

<Figure size 720x288 with 1 Axes>

Initializations

In [None]:
training = True

### Loading Dataset

In [None]:
location = "validation/"
if training:
  location = "training/"

file_af = []
f = open(path + location + "RECORDS-af", "r")
for line in f:
  file_af.append(line[:-1])

f.close()

file_noisy = []
f = open(path + location + "RECORDS-noisy", "r")
for line in f:
  file_noisy.append(line[:-1])

f.close()

file_normal = []
f = open(path + location + "RECORDS-normal", "r")
for line in f:
  file_normal.append(line[:-1])

f.close()

file_other = []
f = open(path + location + "RECORDS-other", "r")
for line in f:
  file_other.append(line[:-1])

f.close()


file_name = file_af + file_noisy + file_normal + file_other
np.random.shuffle(file_name)
file_name = file_name[:len(file_name) // 4]


In [None]:
labels_list = ['AF','NOISY','NORMAL','OTHER']

def preprocess_challenge(small=False, split=0.0, size=720, save=False):
  full_data = list()
  full_label = list()
  for num in file_name:
    # If small, then sample only 6000
    if small:
      record_file = wfdb.rdrecord(path + location + num, sampto=3000, smooth_frames= True)
    else:
      record_file = wfdb.rdrecord(path + location + num, smooth_frames= True)

    # Find label of current data
    label = -1
    if num in file_af:
      label = 0
    elif num in file_noisy:
      label = 1
    elif num in file_normal:
      label = 2
    elif num in file_other:
      label = 3
    else:
      label = "-1"

    # Obtain signal and peak index
    signals_mlii = preprocessing.scale(np.nan_to_num(record_file.p_signal[:,0]))
    signals_mlii[np.isnan(signals_mlii)] = 0
    signals_mlii = signals_mlii.tolist()
    peaks_mlii, _ = find_peaks(signals_mlii, distance=150)

    # Create data point centered at peak
    for peak in tqdm(peaks_mlii[1:-1]):
      max_size = 0
      start = peak - size//2
      if start < 0:
        continue
      end = peak + size//2

      # remove some N classes:
      if label == 2 and np.random.random()>0.15:
        continue

      # Create one hot vector
      label_vec = [0] * (len(labels_list) + 1)
      label_vec[label] = 1
      label_vec = np.asarray(label_vec, dtype=float)

      signal = np.asarray(signals_mlii[start:end], dtype=float)
      if np.isnan(signal).any():
        continue
      # Padding lists
      if len(signal) < max_size:
        signal.extend([0] * (max_size - len(signal)))
      else:
        max_size = len(signal)
      # append label vector and data vector
      full_label.append(label_vec)
      full_data.append(signal)

  full_data = np.nan_to_num(full_data)
  full_data = np.asarray(full_data, dtype=float)
  full_label = np.asarray(full_label, dtype=float)

  # If no split, just return
  if save:
  # Save
    np.savetxt("processed_data_challenge/data_lstm.csv", full_data)
    np.savetxt("processed_data_challenge/label_lstm.csv", full_label)
  if split == 0.0:
    return full_data, full_label

  # Split
  X_train, X_test, y_train, y_test = model_selection.train_test_split(full_data, full_label, test_size=split, random_state=11)

  print(type(X_train))

  print("train shape:")
  print(X_train.shape)
  print(y_train.shape)

  print("test shape:")
  print(X_test.shape)
  print(y_test.shape)


    # Save test set if split > 0
  #   if split > 0.0:
  #     np.savetxt("processed_data_challenge/X_test.csv", X_test)
  #     np.savetxt("processed_data_challenge/y_test.csv", y_test)
  # return X_train, y_train, X_test, y_test

# !mkdir processed_data
# X_train, y_train, X_test, y_test = preprocess(True, split=0.3, size=260, save=False)
# print(np.sum(y_train, axis=0))
# print(np.sum(y_test, axis=0))
# print(y_train)
# print(y_test)



### Testing Both Networks with the Challenge Dataset

In [None]:
!mkdir processed_data_challenge

CNN

In [None]:
# X_train = X_train.unsqueeze(-1)
data, label = preprocess_challenge(False, split=0.0, size=260, save=False)
data_test, label_test = preprocess_challenge(False, split=0.0, size=260, save=False)
#Test
data = torch.Tensor(data).unsqueeze(1)
demo = np.sum(label, axis=0)
label = torch.Tensor(label)
model = pickle.load(open('models/cnn', 'rb'))
print(data.shape, label.shape)
test_model(model, data, label, "cnn")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
100%|██████████| 42/42 [00:00<00:00, 9300.50it/s]
100%|██████████| 66/66 [00:00<00:00, 12484.74it/s]
100%|██████████| 37/37 [00:00<00:00, 40235.74it/s]
100%|██████████| 17/17 [00:00<00:00, 27679.80it/s]
100%|██████████| 36/36 [00:00<00:00, 32360.68it/s]
100%|██████████| 50/50 [00:00<00:00, 10582.06it/s]
100%|██████████| 43/43 [00:00<00:00, 35798.94it/s]
100%|██████████| 35/35 [00:00<00:00, 43768.83it/s]
100%|██████████| 47/47 [00:00<00:00, 8915.98it/s]
100%|██████████| 38/38 [00:00<00:00, 52256.90it/s]
100%|██████████| 20/20 [00:00<00:00, 14854.98it/s]
100%|██████████| 33/33 [00:00<00:00, 45695.62it/s]
100%|██████████| 18/18 [00:00<00:00, 23823.75it/s]
100%|██████████| 34/34 [00:00<00:00, 47726.35it/s]
100%|██████████| 38/38 [00:00<00:00, 51984.20it/s]
100%|██████████| 66/66 [00:00<00:00, 56819.39it/s]
100%|██████████| 22/22 [00:00<00:00, 56402.62it/s]
100%|██████████| 39/39 [00:00<00:00, 9167.62it/s]
100%|██████████| 36/

torch.Size([182312, 1, 260]) torch.Size([182312, 5])
Test Loss:  2.9158
Testing Accuracy:  0.179
True Positive:  27460
True Negative:  5165
False Positive:  116945
False Negative:  5551
sensitivity:  0.8318439308109418
specificity:  0.042297928097616906


In [None]:
!ls physionet.org/files/challenge-2017/1.0.0/training/

A00  A04  A08	      RECORDS-af      REFERENCE.csv	REFERENCE-v3.csv
A01  A05  index.html  RECORDS-noisy   REFERENCE-v0.csv	SHA1SUMS
A02  A06  MD5SUMS     RECORDS-normal  REFERENCE-v1.csv	SHA256SUMS
A03  A07  RECORDS     RECORDS-other   REFERENCE-v2.csv


In [None]:
# sensitivity cnn
# Load CNN Data
from numpy import genfromtxt
import pickle
!unzip 260_processed.zip

X_train = genfromtxt('processed_data_260/X_train.csv')
y_train = genfromtxt('processed_data_260/y_train.csv')
X_test = genfromtxt('processed_data_260/X_test.csv')
y_test = genfromtxt('processed_data_260/y_test.csv')

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test: ", y_test.shape)
model = CNN_ECG(260)
model.load_state_dict(torch.load('cnn_full_model.pth'))
# model = torch.load('cnn_full_model.pth')

demo = np.sum(y_train, axis=0)
print("Demo of Data: ", demo)
X_test = torch.Tensor(X_test).unsqueeze(1)
demo = np.sum(y_test, axis=0)
y_test = torch.Tensor(y_test)
test_model(model, X_test, y_test, "cnn")


In [None]:
# sensitivity LSTM
# Load LSTM Data
from numpy import genfromtxt
import pickle
!unzip processed_data_8f.zip

X_train = genfromtxt('processed_data_8f/X_train.csv')
y_train = genfromtxt('processed_data_8f/y_train.csv')
X_test = genfromtxt('processed_data_8f/X_test.csv')
y_test = genfromtxt('processed_data_8f/y_test.csv')

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test: ", y_test.shape)
model = RNN_LSTM(720, 256)
model.load_state_dict(torch.load('lstm_model.pth'))
# model = torch.load('cnn_full_model.pth')

demo = np.sum(y_train, axis=0)
print("Demo of Data: ", demo)
X_test = torch.Tensor(X_test).unsqueeze(1)
demo = np.sum(y_test, axis=0)
y_test = torch.Tensor(y_test)
test_model(model, X_test, y_test, "cnn")
