In [2]:
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import os
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from pyampd.ampd import find_peaks, find_peaks_adaptive
from datetime import datetime
from scipy.stats import norm
from tqdm import tqdm
import timeit
from torch.nn.utils import weight_norm

In [3]:
class Data1Loader(Dataset):
    def __init__(self, fpath, idxs, opt):
        self.fpath = fpath
        self.opt = opt
        self.idxs = idxs
        with open(fpath, 'rb') as f:
            data = pickle.load(f)
        self.dset = data

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, idx):
        # call opt = opt() beforehand
        # returns [1, signal length, 1]
        subj = np.fromiter(self.dset.keys(), dtype=float)[idx].astype(int).astype(str)
        d = self.dset[subj]
        n = np.random.randint(0, d.shape[0])
        X1 = (d[n, 0, :] - np.median(d[n, 0, :]))/(np.percentile(d[n, 0, :], 75) - np.percentile(d[n, 0, :], 25))
        X1 = torch.tensor(X1, dtype=torch.float32)
        X2 = torch.tensor(d[n, 1, :], dtype=torch.float32)
        X = torch.cat([X1.unsqueeze(0), X2.unsqueeze(0)], axis=0)
        
        y = d[n, 2, :]
        try:
            sbp_idxs = find_peaks(y)
        except:
            sbp_idxs = y.argmin()
        try:
            dbp_idxs = find_peaks(-y)
        except:
            dbp_idxs = y.argmin()
        y = torch.tensor([y[sbp_idxs].mean(), y[dbp_idxs].mean()], dtype=torch.float32).to(opt.device)
        return X, y

In [4]:
class jeong_21(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        super(jeong_21, self).__init__()
        self.c = 
        self.l1 = 
        self.l2 = 
        self.l3 = 
        self.bn = 
        self.do = 

    def forward(self, x):
        """Inputs have to have dimension (N, C_in, L_in)"""
        x = x[:, 0] - x[:, 1]
        y1 = self.tcn(inputs)  # input should have dimension (N, C, L)
        o = self.linear(y1[:, :, -1])
        return o

SyntaxError: invalid syntax (<ipython-input-4-b568a104b592>, line 4)

In [38]:
c1 = nn.Conv1d(in_channels=1, out_channels=56, kernel_size=10)
b1 = nn.BatchNorm1d(num_features=56)
d1 = nn.Dropout(p=0.5)
l1 = nn.LSTM(input_size=241, hidden_size=28, num_layers=1, bidirectional=True)
l2 = nn.LSTM(input_size=241, hidden_size=28, num_layers=1, bidirectional=True)

h0 = torch.randn((2, 56, 241))
c0 = torch.randn((2, 56, 241))

a = torch.rand((4, 2, 250))
a = (a[:, 0, :] - a[:, 1, :]).unsqueeze(1)
a = c1(a)
a = b1(a)
a = d1(a).permute(0,2,1)
print(a.shape)
a, (hn, cn) = l1(a, (h0, c0))
print(a.shape)

torch.Size([4, 241, 56])


RuntimeError: input.size(-1) must be equal to input_size. Expected 241, got 56

In [4]:
def loss_fn(X, y, p):
    [m_s, sd_s, m_d, sd_d] = p
    return torch.mean(torch.abs(X[:, 0]-y[:, 0])*(1+torch.abs((y[:, 0]-m_s)/sd_s))**2) + torch.mean(torch.abs(X[:, 1]-y[:, 1])*(1+torch.abs((y[:, 1]-m_d)/sd_d))**2)
#     return torch.mean(torch.abs(X[:, 0]-y[:, 0])/torch.tensor(norm.pdf(y[:, 0].detach().cpu(), 126, 20.5)).cuda(0)) + torch.mean(torch.abs(X[:, 1]-y[:, 1])/torch.tensor(norm.pdf(y[:, 1].detach().cpu(), 68, 12.1)).cuda(0))

def loss_fn_EV(X, y):
    return X[:, 0]-y[:, 0], X[:, 1]-y[:, 1]

In [5]:
class options:
    def __init__(self):
        self.numWorkers = 0
        self.numEpoch = 50
        self.lr = 1e-4
        self.device = 'cuda:3'

        
        
# dataset_names = ['data1_none', 'data1_phys',
#                  'data1_skew_20_30', 'data1_skew_45_55', 'data1_skew_475_525', 'data1_skew_70_80', 'data1_skew_90_100',
#                  'data1_perf_0_25', 'data1_perf_25_50', 'data1_perf_50_75', 'data1_perf_75_100',
#                  'data1_goodness_0_25', 'data1_goodness_25_50', 'data1_goodness_50_75', 'data1_goodness_75_100']
dataset_names = ['data1_all']

start = timeit.default_timer() 
datapath = './datasets/MIMIC-II/'
dataset_names = os.listdir(datapath)

metadata = pd.DataFrame()
for dataset_name in dataset_names:
    
    ### Data Loaders        

    opt = options()
    
    fpath = datapath + dataset_name
    with open(fpath, 'rb') as f:
        data = pickle.load(f)
    idxs = np.arange(len(data.keys()))

    np.random.shuffle(idxs)
    train_idxs = idxs[np.arange(0, int(0.6*len(idxs)))]
    val_idxs = idxs[np.arange(int(0.6*len(idxs)), int(0.8*len(idxs)))]
    test_idxs = idxs[np.arange(int(0.8*len(idxs)), len(idxs))]

    partition = {}
    partition['train'] = train_idxs
    partition['val'] = val_idxs
    partition['test'] = test_idxs
    params_train = {'batch_size': 8,
                  'shuffle': True,
                  'num_workers': opt.numWorkers
                 }
    params_val = {'batch_size': 1,
                  'shuffle': True,
                  'num_workers': opt.numWorkers
                 }
    params_test = {'batch_size': 1,
                  'num_workers': opt.numWorkers
                 }
    device = torch.device(opt.device)
    train_loader = torch.utils.data.DataLoader(Data1Loader(fpath, partition['train'], opt), **params_train)
    val_loader = torch.utils.data.DataLoader(Data1Loader(fpath, partition['val'], opt), **params_val)
    test_loader = torch.utils.data.DataLoader(Data1Loader(fpath, partition['test'], opt), **params_test)

    ### Distribution Data

    tr_sbps = np.array([])
    tr_dbps = np.array([])
    for _, Y_train in train_loader:
        tr_sbps = np.append(tr_sbps, Y_train[:, 0].detach().cpu())
        tr_dbps = np.append(tr_dbps, Y_train[:, 1].detach().cpu())

    val_sbps = np.array([])
    val_dbps = np.array([])
    for _, Y_val in val_loader:
        val_sbps = np.append(val_sbps, Y_val[:, 0].detach().cpu())
        val_dbps = np.append(val_dbps, Y_val[:, 1].detach().cpu())

    te_sbps = np.array([])
    te_dbps = np.array([])
    for _, Y_te in test_loader:
        te_sbps = np.append(te_sbps, Y_te[:, 0].detach().cpu())
        te_dbps = np.append(te_dbps, Y_te[:, 1].detach().cpu())

    tr_sd_sbp = tr_sbps.std()
    tr_m_sbp = tr_sbps.mean()
    tr_sd_dbp = tr_dbps.std()
    tr_m_dbp = tr_dbps.mean()

    val_sd_sbp = val_sbps.std()
    val_m_sbp = val_sbps.mean()
    val_sd_dbp = val_dbps.std()
    val_m_dbp = val_dbps.mean()

    te_sd_sbp = te_sbps.std()
    te_m_sbp = te_sbps.mean()
    te_sd_dbp = te_dbps.std()
    te_m_dbp = te_dbps.mean()

    # fig, ax = plt.subplots(3, 2, figsize=(15,15), sharex=True)
    # ax[0,0].hist(tr_sbps, bins=20, alpha=0.3)
    # ax[0,0].axvline(tr_m_sbp, label='Mean SBP = ' + str(np.round(tr_m_sbp, 2)), linestyle='--', c='g')
    # ax[0,0].axvline(tr_m_sbp+tr_sd_sbp, label='STD SBP = ±' + str(np.round(tr_sd_sbp, 2)), linestyle='--', c='r')
    # ax[0,0].axvline(tr_m_sbp-tr_sd_sbp, linestyle='--', c='r')
    # ax[0,0].legend()
    # ax[0,0].set_xlabel('Training Set SBP (mmHg)')
    # ax[0,0].set_ylabel('Count')
    # ax[0,1].hist(tr_dbps, bins=20, alpha=0.3)
    # ax[0,1].axvline(tr_m_dbp, label='Mean DBP = ' + str(np.round(tr_m_dbp, 2)), linestyle='--', c='g')
    # ax[0,1].axvline(tr_m_dbp+tr_sd_dbp, label='STD DBP = ±' + str(np.round(tr_sd_dbp, 2)), linestyle='--', c='r')
    # ax[0,1].axvline(tr_m_dbp-tr_sd_dbp, linestyle='--', c='r')
    # ax[0,1].legend()
    # ax[0,1].set_xlabel('Training Set DBP (mmHg)')
    # ax[0,1].set_ylabel('Count')

    # ax[1,0].hist(val_sbps, bins=20, alpha=0.3)
    # ax[1,0].axvline(val_m_sbp, label='Mean SBP = ' + str(np.round(val_m_sbp, 2)), linestyle='--', c='g')
    # ax[1,0].axvline(val_m_sbp+val_sd_sbp, label='STD SBP = ±' + str(np.round(val_sd_sbp, 2)), linestyle='--', c='r')
    # ax[1,0].axvline(val_m_sbp-val_sd_sbp, linestyle='--', c='r')
    # ax[1,0].legend()
    # ax[1,0].set_xlabel('Validation Set SBP (mmHg)')
    # ax[1,0].set_ylabel('Count')
    # ax[1,1].hist(val_dbps, bins=20, alpha=0.3)
    # ax[1,1].axvline(val_m_dbp, label='Mean DBP = ' + str(np.round(val_m_dbp, 2)), linestyle='--', c='g')
    # ax[1,1].axvline(val_m_dbp+val_sd_dbp, label='STD DBP = ±' + str(np.round(val_sd_dbp, 2)), linestyle='--', c='r')
    # ax[1,1].axvline(val_m_dbp-val_sd_dbp, linestyle='--', c='r')
    # ax[1,1].legend()
    # ax[1,1].set_xlabel('Validation Set DBP (mmHg)')
    # ax[1,1].set_ylabel('Count')

    # ax[2,0].hist(te_sbps, bins=20, alpha=0.3)
    # ax[2,0].axvline(te_m_sbp, label='Mean SBP = ' + str(np.round(te_m_sbp, 2)), linestyle='--', c='g')
    # ax[2,0].axvline(te_m_sbp+te_sd_sbp, label='STD SBP = ±' + str(np.round(te_sd_sbp, 2)), linestyle='--', c='r')
    # ax[2,0].axvline(te_m_sbp-te_sd_sbp, linestyle='--', c='r')
    # ax[2,0].legend()
    # ax[2,0].set_xlabel('Testing Set SBP (mmHg)')
    # ax[2,0].set_ylabel('Count')
    # ax[2,1].hist(te_dbps, bins=20, alpha=0.3)
    # ax[2,1].axvline(te_m_dbp, label='Mean DBP = ' + str(np.round(te_m_dbp, 2)), linestyle='--', c='g')
    # ax[2,1].axvline(te_m_dbp+te_sd_dbp, label='STD DBP = ±' + str(np.round(te_sd_dbp, 2)), linestyle='--', c='r')
    # ax[2,1].axvline(te_m_dbp-te_sd_dbp, linestyle='--', c='r')
    # ax[2,1].legend()
    # ax[2,1].set_xlabel('Testing Set DBP (mmHg)')
    # ax[2,1].set_ylabel('Count')

    # Req BP Distribution SBP STD:  21.121632046210227
    # Req BP Distribution DBP STD:  12.310057157454578



    ### Training

    # torch.multiprocessing.set_start_method('spawn', force=True)
    model = TCN(2, 2, [25]*8, 7, 0.0).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    # Initializing in a separate cell so we can easily add more epochs to the same run
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    model_dir = './models'
    model_name = 'best_model_' + 'tcn1_' + dataset_name
    writer = SummaryWriter('runs/' + model_name + '_{}'.format(timestamp))
    epoch_number = 0

    try:
        EPOCHS = 10
        best_vloss = 1e10
        for epoch in tqdm(range(EPOCHS)):
            print('EPOCH {}:'.format(epoch_number + 1))
            model.train()
            running_loss = 0.0
            last_loss = 0.0

            i = 0
            for X_train, y_train in train_loader:
                optimizer.zero_grad()
                outputs = model(X_train.to(device))
                loss = loss_fn(outputs, y_train.cuda(0), [tr_m_sbp, tr_sd_sbp, tr_m_dbp, tr_sd_dbp])
                loss.backward()
                optimizer.step()
                i+=1
                running_loss += loss.item()
                if i % 10 == 9:
                    last_loss = running_loss / 10 # loss per batch
                    print('  batch {} loss: {}'.format(i + 1, last_loss))
                    tb_x = epoch_number * len(train_loader) + i + 1
                    writer.add_scalar('Loss/train', last_loss, tb_x)
                    running_loss = 0.0
            avg_loss = last_loss

            i=0
            model.eval()    
            running_vloss = 0.0
            vlosses_s = np.array([])
            vlosses_d = np.array([])
            for X_val, y_val in val_loader:
                voutputs = model(X_val.to(device))
                vloss = loss_fn(voutputs, y_val, [val_m_sbp, val_sd_sbp, val_m_dbp, val_sd_dbp])
                vloss_s, vloss_d = loss_fn_EV(voutputs, y_val)
                vlosses_s = np.append(vlosses_s, vloss_s.detach().cpu())
                vlosses_d = np.append(vlosses_d, vloss_d.detach().cpu())
                running_vloss += vloss
                i+=1
            EVd = (val_sd_dbp/vlosses_d.std())**2
            EVs = (val_sd_sbp/vlosses_s.std())**2
            avg_vloss = running_vloss / (i + 1)
            print('train_loss = {} \n valid_loss = {} \n EVs = {} \n EVd = {}'.format(avg_loss, avg_vloss, EVs, EVd))

            writer.add_scalars('Training vs. Validation Loss',
                            { 'Training' : avg_loss, 'Validation' : avg_vloss },
                            epoch_number + 1)
            writer.flush()

            if avg_vloss < best_vloss:
                best_vloss = avg_vloss
                model_path = model_dir + os.sep + model_name
                torch.save(model.state_dict(), model_path)

            epoch_number += 1




        ### Testing

        saved_model = TCN(2, 2, [25]*8, 7, 0.05).to(device)
        saved_model.load_state_dict(torch.load(model_path))
        saved_model.eval()

        i=0
        running_te_loss = 0.0
        running_s_loss = 0.0
        running_d_loss = 0.0
        te_losses_s = np.array([])
        te_losses_d = np.array([])
        est_s = np.array([])
        est_d = np.array([])
        gt_s = np.array([])
        gt_d = np.array([])
        for X_te, y_te in test_loader:
            te_outputs = model(X_te.to(device))
            te_loss = loss_fn(te_outputs, y_te, [te_m_sbp, te_sd_sbp, te_m_dbp, te_sd_dbp])
            te_loss_s, te_loss_d = loss_fn_EV(te_outputs, y_te)
            te_losses_s = np.append(te_losses_s, te_loss_s.detach().cpu())
            te_losses_d = np.append(te_losses_d, te_loss_d.detach().cpu())
            est_s = np.append(est_s, te_outputs[0, 0].detach().cpu())
            est_d = np.append(est_d, te_outputs[0, 1].detach().cpu())
            gt_s = np.append(gt_s, y_te[0, 0].detach().cpu())
            gt_d = np.append(gt_d, y_te[0, 1].detach().cpu())
            running_te_loss += te_loss
            i+=1
        te_EVd = (te_sd_dbp/te_losses_d.std())**2
        te_EVs = (te_sd_sbp/te_losses_s.std())**2
        avg_te_loss = running_te_loss.detach().cpu().item() / (i + 1)
        avg_s_loss = abs(te_losses_s).mean()
        avg_d_loss = abs(te_losses_d).mean()
        print('test_loss = {} \n sbp_loss = {} \n dbp_loss = {} \n EVs = {} \n EVd = {}'.format(avg_te_loss, avg_s_loss, avg_d_loss, te_EVs, te_EVd))

        p = np.polyfit(gt_s, est_s-gt_s, deg=1)
        # fig, ax = plt.subplots(1, 2, figsize=(15, 5))
        # ax[0].scatter(gt_s, est_s-gt_s)
        # ax[0].set_xlabel('Reference SBP (mmHg)')
        # ax[0].set_ylabel('SBP Error (mmHg)')
        # ax[1].scatter(gt_d, est_d-gt_d)
        # ax[1].set_xlabel('Reference DBP (mmHg)')
        # ax[1].set_ylabel('DBP Error (mmHg)')

        # plt.scatter(est_s, gt_s)
        # plt.xlim([gt_s.min(), gt_s.max()])
        # plt.ylim([gt_s.min(), gt_s.max()])       

        metadata = pd.concat([metadata, pd.DataFrame(data = [[dataset_name, len(test_loader), te_m_sbp, te_sd_sbp, te_m_dbp, te_sd_dbp, p[0], p[1], avg_te_loss, avg_s_loss, avg_d_loss, te_EVs, te_EVd]], columns=['dataset', 'num_samples', 'm_sbp', 'sd_sbp', 'm_dbp', 'sd_dbp', 'p0', 'p1', 'test loss', 'SBP MAE', 'DBP MAE', 'EV SBP', 'EV DBP'])])
        print(metadata)
    except Exception as e:
        print(e)

stop = timeit.default_timer()
print('Time: ', stop - start)

NameError: name 'TemporalBlock' is not defined

In [None]:
model_name = 'TCN'
metadata.to_csv('./results/' + model_name + '.csv', index=False)

In [None]:
metadata