In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F 
import scipy.io as scio
import numpy as np
import random
import torch.utils.data as data
import os
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from glob import glob
from torch.optim import lr_scheduler

In [20]:
# In this section, we will apply an CNN to extract features and implement a classification task.
# Firstly, we should build the model by PyTorch. We provide a baseline model here.
# You can use your own model for better performance
class Doubleconv_33(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(Doubleconv_33, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(ch_in, ch_out, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.Conv1d(ch_out, ch_out, kernel_size=3),
            nn.ReLU(inplace=True)
        )

    def forward(self, input):
        return self.conv(input)


class Doubleconv_35(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(Doubleconv_35, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(ch_in, ch_out, kernel_size=5),
            nn.ReLU(inplace=True),
            nn.Conv1d(ch_out, ch_out, kernel_size=5),
            nn.ReLU(inplace=True)
        )

    def forward(self, input):
        return self.conv(input)


class Doubleconv_37(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(Doubleconv_37, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(ch_in, ch_out, kernel_size=7),
            nn.ReLU(inplace=True),
            nn.Conv1d(ch_out, ch_out, kernel_size=7),
            nn.ReLU(inplace=True)
        )

    def forward(self, input):
        return self.conv(input)


class Tripleconv(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(Tripleconv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(ch_in, ch_out, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.Conv1d(ch_out, ch_out, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.Conv1d(ch_out, ch_out, kernel_size=3),
            nn.ReLU(inplace=True)
        )

    def forward(self, input):
        return self.conv(input)

class MLP(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(MLP, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(ch_in, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Linear(256, ch_out),
        )

    def forward(self, input):
        return self.fc(input)


class Mscnn(nn.Module):
    # TODO: Build a better model
    def __init__(self, ch_in, ch_out):
        super(Mscnn, self).__init__()
        self.conv11 = Doubleconv_33(ch_in, 64)
        self.pool11 = nn.MaxPool1d(3, stride=3)
        self.conv12 = Doubleconv_33(64, 128)
        self.pool12 = nn.MaxPool1d(3, stride=3)
        self.conv13 = Tripleconv(128, 256)
        self.pool13 = nn.MaxPool1d(2, stride=2)
        self.conv14 = Tripleconv(256, 512)
        self.pool14 = nn.MaxPool1d(2, stride=2)
        self.conv15 = Tripleconv(512, 512)
        self.pool15 = nn.MaxPool1d(2, stride=2)

        self.conv21 = Doubleconv_37(ch_in, 64)
        self.pool21 = nn.MaxPool1d(3, stride=3)
        self.conv22 = Doubleconv_37(64, 128)
        self.pool22 = nn.MaxPool1d(3, stride=3)
        self.conv23 = Tripleconv(128, 256)
        self.pool23 = nn.MaxPool1d(2, stride=2)
        self.conv24 = Tripleconv(256, 512)
        self.pool24 = nn.MaxPool1d(2, stride=2)
        self.conv25 = Tripleconv(512, 512)
        self.pool25 = nn.MaxPool1d(2, stride=2)

        self.out = MLP(512*27*2, ch_out)

    # def __init__(self, ch_in, ch_out):
    #     super(Mscnn, self).__init__()
    #     self.conv11 = Doubleconv_35(ch_in, 64)
    #     self.pool11 = nn.MaxPool1d(3, stride=3)
    #     self.conv12 = Doubleconv_35(64, 128)
    #     self.pool12 = nn.MaxPool1d(3, stride=3)
    #     self.conv13 = Tripleconv(128, 256)
    #     self.pool13 = nn.MaxPool1d(2, stride=2)
    #     self.conv14 = Tripleconv(256, 512)
    #     self.pool14 = nn.MaxPool1d(2, stride=2)
    #     self.conv15 = Tripleconv(512, 512)
    #     self.pool15 = nn.MaxPool1d(2, stride=2)

    #     self.out = MLP(512*27, ch_out)

    # def __init__(self, ch_in, ch_out):
    #     super(Mscnn, self).__init__()
    #     self.conv11 = Doubleconv_37(ch_in, 64)
    #     self.pool11 = nn.MaxPool1d(3, stride=3)
    #     self.conv12 = Doubleconv_37(64, 128)
    #     self.pool12 = nn.MaxPool1d(3, stride=3)
    #     self.conv13 = Tripleconv(128, 256)
    #     self.pool13 = nn.MaxPool1d(2, stride=2)
    #     self.conv14 = Tripleconv(256, 512)
    #     self.pool14 = nn.MaxPool1d(2, stride=2)
    #     self.conv15 = Tripleconv(512, 512)
    #     self.pool15 = nn.MaxPool1d(2, stride=2)

    #     self.out = MLP(512*27, ch_out) 

    def forward(self, x):
        c11 = self.conv11(x)
        p11 = self.pool11(c11)
        c12 = self.conv12(p11)
        p12 = self.pool12(c12)
        c13 = self.conv13(p12)
        p13 = self.pool13(c13)
        c14 = self.conv14(p13)
        p14 = self.pool14(c14)
        c15 = self.conv15(p14)
        p15 = self.pool15(c15)

        c21 = self.conv21(x)
        p21 = self.pool21(c21)
        c22 = self.conv22(p21)
        p22 = self.pool22(c22)
        c23 = self.conv23(p22)
        p23 = self.pool23(c23)
        c24 = self.conv24(p23)
        p24 = self.pool24(c24)
        c25 = self.conv25(p24)
        p25 = self.pool25(c25)

        merge = torch.cat((p15, p25), dim=1)
        merge = merge.view(merge.size()[0], -1)
        output = self.out(merge)
        output = F.sigmoid(output)

        return output

In [23]:
# Next, we need to construct the data loader for training. 
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
import scipy.io as io
from scipy.interpolate import interp1d
import torch
import matplotlib.pyplot as plt 

# Random clipping has been implemented, 
# and you need to add noise and random scaling. 
# Generally, the scaling should be done before the crop.
# In general, do not add scaling and noise enhancement options during testing

class ECG_dataset(Dataset):

    def __init__(self, base_file, cv=0, is_train=True):
        self.is_train = is_train
        self.file_list=[]
        self.base_file=base_file
        
        for i in range(5):
            data=pd.read_csv(base_file+'/cv/cv'+str(i)+'.csv')
            self.file_list.append(data.to_numpy())
        self.file=None
        if is_train:
            del self.file_list[cv]
            self.file=self.file_list[0]
            for i in range(1,4):
                self.file=np.append(self.file,self.file_list[i],axis=0)
        else:
            self.file=self.file_list[cv]

        
    def __len__(self):
        return self.file.shape[0]
    

    def load_data(self,file_name,label):
        #读取数据
        mat_file = self.base_file+'/training2017/'+file_name+'.mat'
        data = io.loadmat(mat_file)['val']
        if label=='N':
            one_hot=torch.tensor([0])
        elif label=='O':
            one_hot=torch.tensor([0])
        elif label=='A':
            one_hot=torch.tensor([1])
        elif label=='~':
            one_hot=torch.tensor([0])
            
        return data, one_hot


    
    def crop_padding(self,data,time):
        #随机crop
        if data.shape[0]<=time:
            data=np.pad(data, (0,time-data.shape[0]), 'constant')
        elif data.shape[0]>time:
            end_index=data.shape[0]-time
            start=np.random.randint(0, end_index)
            data=data[start:start+time]

        return data


    def data_process(self,data):
        # 学习论文以及数据集选择合适和采样率
        # 并完成随机gaussian 噪声和随机时间尺度放缩
        time_scale = np.random.randint(1, 4, 1)
        time_scale = int(time_scale[0])
        data = data[::time_scale]
        # data = data[::3]
        data = data-data.mean()
        data = data/data.std()
        # gassian_noise = np.random.normal(0, 1, data.shape)
        # data += gassian_noise
        data = self.crop_padding(data, 2400)
        data = torch.tensor(data)

        return data


    def __getitem__(self, idx):
        file_name = self.file[idx][1]
        label = self.file[idx][2]
        data,one_hot = self.load_data(file_name,label)
        data = self.data_process(data[0]).unsqueeze(0).float()
        one_hot = one_hot.unsqueeze(0).float()

        return data, one_hot, file_name


In [33]:
# Now, we will build the pipeline for deep learning based training.
# These functions may be useful :)
def save_loss(fold, value):
    path = 'loss' + str(fold) + '.txt'
    file = open(path, mode='a+')
    file.write(str(value)+'\n')  
    
# We will use GPU if cuda is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = Mscnn(1, 1).to(device)   # ch_in, ch_out

# Build pre-processing transformation 
# Note this pre-processing is in PyTorch
x_transforms = transforms.Compose([
        transforms.ToTensor(),  
])
y_transforms = transforms.ToTensor()

# scheduler = lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=num_epochs)

# TODO: fine tune hyper-parameters
batch_size = 128
criterion = torch.nn.MSELoss()
criterion2 = torch.nn.BCELoss()
criterion3 = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer1 = torch.optim.SGD(model.parameters(), lr=0.001)
train_ecg_dataset = ECG_dataset('./', is_train=True)
train_dataloader = DataLoader(train_ecg_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_ecg_dataset = ECG_dataset('./', is_train=False)
test_dataloaders = DataLoader(test_ecg_dataset, batch_size=1)
num_epochs = 40

cuda


In [34]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, roc_auc_score, f1_score, recall_score, cohen_kappa_score

def validation(model, criterion, test_dataloaders, device):
    # TODO: add more metrics for evaluation?
    # Evaluate 
    model.eval()
    predict = np.array([])
    target = np.array([])
    loss=0
    step=0
    with torch.no_grad():
        for x, mask, name in test_dataloaders:
            step += 1
            mask = mask.to(device)
            y = model(x.to(device))
            loss +=criterion(y, mask.squeeze(2)).item()
            y[y >= 0.5] = 1
            y[y < 0.5] = 0
            predict=np.append(predict, torch.squeeze(y).cpu().numpy())
            target=np.append(target, torch.squeeze(mask).cpu().numpy())
    acc = accuracy_score(target, predict)
    math_c = matthews_corrcoef(target, predict)
    roc_auc_s = roc_auc_score(target, predict)
    f1_sco = f1_score(target, predict)
    print('Accuracy: {}'.format(acc))
    print('Loss:', loss/step)
    model.train()

    return loss/step, acc, math_c, roc_auc_s, f1_sco

train_loss = []
train_acc = []
val_loss = []
val_acc = []
matthews_c = []
ras = []
f1_s = []
recall_s = []
cohen_ks = []

# Start training !
for epoch in range(1, num_epochs + 1):
        predict = np.array([])
        target = np.array([])
        print('Epoch {}/{}'.format(epoch, num_epochs))
        dt_size = len(train_dataloader.dataset)
        epoch_loss = 0
        step = 0
        process = tqdm(train_dataloader)
        for x, y, name in process:
            step += 1
            inputs = x.to(device)
            labels = y.to(device)
            optimizer1.zero_grad()
            outputs = model(inputs)
            loss = criterion3(outputs, labels.squeeze(2))
            loss.backward()
            optimizer1.step()
            epoch_loss += loss.item()
            process.set_description(
                "epoch: %d, train_loss:%0.8f" % (epoch, epoch_loss / step)
            )
            outputs[outputs >= 0.5] = 1
            outputs[outputs < 0.5] = 0
            predict=np.append(predict,torch.squeeze(outputs).detach().cpu().numpy())
            target=np.append(target,torch.squeeze(labels).detach().cpu().numpy())
        epoch_loss /= step
        train_loss.append(epoch_loss)
        acc = accuracy_score(target, predict)
        train_acc.append(acc)
        recall_sco = recall_score(target, predict)
        cohen_kappa_sco = cohen_kappa_score(target, predict)
        print('train_Accuracy: {}'.format(acc))
        save_loss(10, epoch_loss)
        v_loss, v_acc, math_c, roc_auc_s, f1_src = validation(model, criterion3, test_dataloaders, device)
        val_loss.append(v_loss)
        val_acc.append(v_acc)
        matthews_c.append(math_c)
        f1_s.append(f1_src)
        recall_s.append(recall_sco)
        cohen_ks.append(cohen_kappa_sco)
# Save model
torch.save(model.state_dict(), 'weights10_%d.pth' % (epoch))


Epoch 1/40


epoch: 1, train_loss:0.00000000: 100%|██████████| 54/54 [00:19<00:00,  2.83it/s]


train_Accuracy: 0.4758135444151275
Accuracy: 0.08914956011730206
Loss: 0.0
Epoch 2/40


epoch: 2, train_loss:0.00000000: 100%|██████████| 54/54 [00:18<00:00,  2.89it/s]


train_Accuracy: 0.4655526238639695
Accuracy: 0.20234604105571846
Loss: 0.0
Epoch 3/40


epoch: 3, train_loss:0.00000000: 100%|██████████| 54/54 [00:19<00:00,  2.81it/s]


train_Accuracy: 0.46379360891234245
Accuracy: 0.4621700879765396
Loss: 0.0
Epoch 4/40


epoch: 4, train_loss:0.00000000: 100%|██████████| 54/54 [00:19<00:00,  2.80it/s]


train_Accuracy: 0.4621811785400176
Accuracy: 0.48563049853372436
Loss: 0.0
Epoch 5/40


epoch: 5, train_loss:0.00000000:  61%|██████    | 33/54 [00:12<00:07,  2.72it/s]


KeyboardInterrupt: 

In [31]:
import matplotlib.pyplot as plt
import numpy as np

print(max(train_acc), min(train_loss), max(val_acc), min(val_loss))

# Now, we will build the pipeline for deep learning based training.
# These functions may be useful :)

# def save_indicators(indicators_name, indicators, batch_size, x):
#     path = 'model_'+str(x)+'_indicator_' + str(batch_size) +'.txt'
#     file = open(path, mode='a+')
#     for item in indicators_name:
#         file.write(str(item)+", ")
    
#     file.write('\n')
#     for i in range(len(indicators[0])):
#         for j in range(len(indicators)):
#             file.write(str(float(indicators[j][i])))
#             if j < len(indicators) - 1:
#                 file.write(', ')
#         file.write('\n')

# indicators = []
# indicators_name = ['train loss', 'train accuracy', 'validation loss', 'validation accuracy', 'matthews corrcoef',
#             'f1 score', 'recall score', 'cohen kappa score']
# indicators.append(train_loss)
# indicators.append(train_acc)
# indicators.append(val_loss)
# indicators.append(val_acc)
# indicators.append(matthews_c)
# # indicators.append(ras)
# indicators.append(f1_s)
# indicators.append(recall_s)
# indicators.append(cohen_ks)

# save_indicators(indicators_name, indicators, batch_size, 0)

0.9506009967751392 0.12196802637643284 0.955425219941349 0.1137430252095912
