In [57]:
import numpy as np
import argparse
import os
import imp
import re
import pickle
import datetime
import random
import math
import copy

from sklearn.model_selection import KFold, StratifiedKFold
import torch
from torch import nn
import torch.nn.utils.rnn as rnn_utils
from torch.utils import data
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset, Subset

from utils import metrics


### Base Transformer model

In [58]:
input_dim = 25
pad_token = np.zeros(input_dim)
def pad_sents(sents, pad_token):

    sents_padded = []

    max_length = max([len(_) for _ in sents])
    for i in sents:
        padded = list(i) + [pad_token]*(max_length-len(i))
        sents_padded.append(np.array(padded))

    return np.array(sents_padded)

In [59]:
def get_bce_loss(y_pred, y_true):
    loss = torch.nn.BCELoss()
    return loss(y_pred[:,-1], y_true)

In [60]:
def predict_last_visit_bce_loss(y_pred, x_lab_length, y_true):
    # print(y_pred.shape, y_true.shape)
    batch_size = len(y_true)
    loss = torch.nn.BCELoss()
    indices = torch.arange(batch_size, dtype=torch.int64)
    # print(type(indices), indices)
    losses = 0
    for i in indices:
        # print(type(i), i)
        # print('y_pred', y_pred[i][0][0], y_pred.shape)
        # print(x_lab_length)
        # print('x_lab_len', x_lab_length[i])
        # print(y_pred[i][[x_lab_length[i]-1])
        # print(y_true[i.long()])
        losses += (loss(y_pred[i][x_lab_length[i].long()-1], y_true[i]))

    # print(loss(y_pred[:,-1], y_true))
    # return loss(y_pred[:,-1], y_true)
    # print(losses/batch_size)
    return losses/batch_size

In [61]:
def predict_last_visit_mse_loss(y_pred, x_lab_length, y_true):
    # print('!!!')
    mini_batch_size = y_pred.shape[0]
    loss = torch.nn.MSELoss()
    indices = torch.arange(mini_batch_size, dtype=torch.int64)
    losses = 0
    # print('bs:', mini_batch_size)
    for i in indices:
        losses += (loss(y_pred[i][x_lab_length[i].long()-1], y_true[i][x_lab_length[i].long()-1]))
    return losses/mini_batch_size

In [62]:
class Dataset(data.Dataset):
    def __init__(self, x_lab, x_lab_length, x_demo, y_outcome, y_los):
        self.x_lab = x_lab
        self.x_lab_length = x_lab_length
        self.x_demo = x_demo
        self.y_outcome = y_outcome
        self.y_los = y_los

    def __getitem__(self, index): # 返回的是tensor
        return self.x_lab[index], self.x_lab_length[index], self.x_demo[index], self.y_outcome[index], self.y_los[index]

    def __len__(self):
        return len(self.y_outcome)

In [63]:
data_path = './dataset/tongji/processed_data/'
file_name = './ckpt/gru.pth'

batch_size = 64
num_epochs = 30
device = torch.device("cuda:0" if torch.cuda.is_available() == True else 'cpu')
#device = torch.device('cpu')
print("available device: {}".format(device))


x_lab = pickle.load(open('./dataset/tongji/processed_data/train_x_labtest.pkl', 'rb'))
x_lab = np.array(x_lab, dtype=object)
x_lab = [torch.Tensor(_) for _ in x_lab]

x_demo = pickle.load(open('./dataset/tongji/processed_data/train_x_demographic.pkl', 'rb'))
x_demo = np.array(x_demo)

y_outcome = pickle.load(open('./dataset/tongji/processed_data/train_y_outcome.pkl', 'rb'))
y_outcome = np.array(y_outcome)

y_los = pickle.load(open('./dataset/tongji/processed_data/train_y_LOS.pkl', 'rb'))
y_los = np.array(y_los, dtype=object)
y_los = [torch.Tensor(_) for _ in y_los]

# print(len(x_lab[0][0]))
# x_lab = pad_sents(x_lab, pad_token)
# print(x_lab.shape)
x_lab_length = [len(_) for _ in x_lab]
x_lab_length = np.array(x_lab_length)
x_lab = torch.nn.utils.rnn.pad_sequence((x_lab), batch_first=True)
y_los = torch.nn.utils.rnn.pad_sequence(y_los, batch_first=True)
# y_los = (y_los-35)/10
# print(x_lab.shape)
# x_lab = torch.nn.utils.rnn.pack_sequence((x_lab), enforce_sorted=False)
# print(x_lab_length)
# len(x_lab[7])

available device: cpu


In [64]:
train_dataset = Dataset(x_lab, x_lab_length, x_demo, y_outcome, y_los)
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

print(x_lab.shape)
print(x_demo.shape)
# print(len(y[0]))
# print(len(y[1]))
# print(y)

# print(train_dataset.x_lab)


torch.Size([361, 13, 25])
(361, 2)


In [65]:
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED) # numpy
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED) # cpu
torch.cuda.manual_seed(RANDOM_SEED) # gpu
torch.backends.cudnn.deterministic=True # cudnn
np.set_printoptions(threshold=np.inf, precision=2, suppress=True)


def train_epoch(model, device, dataloader, loss_fn, optimizer):
    train_loss = []
    model.train()
    for step, data in enumerate(dataloader):   
        batch_x_lab, batch_x_lab_length, batch_x_demo, batch_y_outcome, batch_y_los = data
        batch_x_lab, batch_x_lab_length, batch_x_demo, batch_y_outcome, batch_y_los = batch_x_lab.float(),batch_x_lab_length.float() , batch_x_demo.float().to(device), batch_y_outcome.float(), batch_y_los.float()
        batch_y = batch_y_los
        batch_y = batch_y.unsqueeze(-1)
        optimizer.zero_grad()
        output = model(batch_x_lab, batch_x_lab_length, batch_x_demo)
        # print(output.shape, batch_x_lab_length.shape, batch_y.shape)
        loss = loss_fn(output, batch_x_lab_length, batch_y)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
    return np.array(train_loss).mean()

def val_epoch(model, device, dataloader, loss_fn):
    val_loss = []
    y_pred = []
    y_true = []
    evaluation_scores = {}
    model.eval()
    with torch.no_grad():
        for step, data in enumerate(dataloader):   
            batch_x_lab, batch_x_lab_length, batch_x_demo, batch_y_outcome, batch_y_los = data
            batch_x_lab, batch_x_lab_length, batch_x_demo, batch_y_outcome, batch_y_los = batch_x_lab.float(),batch_x_lab_length.float() , batch_x_demo.float().to(device), batch_y_outcome.float(), batch_y_los.float()
            batch_y = batch_y_los
            batch_y = batch_y.unsqueeze(-1)
            output = model(batch_x_lab, batch_x_lab_length, batch_x_demo)
            loss = loss_fn(output, batch_x_lab_length, batch_y)
            val_loss.append(loss.item())
            y_pred += [output[i][batch_x_lab_length[i].long()-1][0] for i in range(len(batch_y))]
            # y_pred += [output[i][batch_x_lab_length[i].long()-1][0] for i in range(len(batch_y))]
            # y_true += list(batch_y.cpu().numpy().flatten())
            y_true += [batch_y[i][batch_x_lab_length[i].long()-1][0] for i in range(len(batch_y))]
    # y_pred = np.array(y_pred)
    # y_pred = np.stack([1 - y_pred, y_pred], axis=1)
    try:
        evaluation_scores = metrics.print_metrics_regression(y_true, y_pred)
    except:
        print('error')
    return np.array(val_loss).mean(), evaluation_scores

def test_epoch(model, device, dataloader, loss_fn):
    test_loss = []
    y_pred = []
    y_true = []
    evaluation_scores = {}
    model.eval()
    with torch.no_grad():
        for step, data in enumerate(dataloader):   
            batch_x_lab, batch_x_lab_length, batch_x_demo, batch_y_outcome, batch_y_los = data
            batch_x_lab, batch_x_lab_length, batch_x_demo, batch_y_outcome, batch_y_los = batch_x_lab.float(),batch_x_lab_length.float() , batch_x_demo.float().to(device), batch_y_outcome.float(), batch_y_los.float()
            batch_y = batch_y_los
            batch_y = batch_y.unsqueeze(-1)
            output = model(batch_x_lab, batch_x_lab_length, batch_x_demo)
            loss = loss_fn(output, batch_x_lab_length, batch_y)
            test_loss.append(loss.item())
            
            # y_pred += list(output.cpu().numpy().flatten())
            y_pred += [output[i][batch_x_lab_length[i].long()-1].cpu().numpy().flatten() for i in range(len(batch_y))]
            # y_true += list(batch_y.cpu().numpy().flatten())
            y_true += [batch_y[i][batch_x_lab_length[i].long()-1][0] for i in range(len(batch_y))]
    # y_pred = np.array(y_pred)
    # y_pred = np.stack([1 - y_pred, y_pred], axis=1)
    try:
        evaluation_scores = metrics.print_metrics_regression(y_true, y_pred)
    except:
        print('error')
    return np.array(test_loss).mean(), evaluation_scores

# train_epoch(model, device, train_loader, get_bce_loss, optimizer)

In [66]:
class Transformer(nn.Module):
    def __init__(self, input_lab_dim, input_demo_dim, hidden_dim, output_dim, act_layer=nn.GELU, drop=0.):
        super(Transformer, self).__init__()

        # hyperparameters
        self.input_lab_dim = input_lab_dim
        self.input_demo_dim = input_demo_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        
        self.demo_proj = nn.Linear(input_demo_dim, hidden_dim)
        self.lab_proj = nn.Linear(input_lab_dim, hidden_dim)

        self.encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=4, dim_feedforward=512, activation='gelu')
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=1)

        self.gru = nn.GRU(input_size = hidden_dim, hidden_size = hidden_dim, num_layers = 1, batch_first = True)
        
        self.act = act_layer()
        self.fc = nn.Linear(2*hidden_dim, output_dim)
        self.drop = nn.Dropout(drop)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x_lab, x_lab_length, x_demo):
        batch_size, max_length, input_dim = x_lab.shape

        x_lab = self.lab_proj(x_lab)
        x_lab = self.act(x_lab)
        # print('1', x_lab.shape)
        x_lab, h_n = self.gru(x_lab)
        # x_lab = self.transformer_encoder(x_lab)
        # print('2', x_lab.shape)

        x_demo = self.demo_proj(x_demo)
        x_demo = self.act(x_demo) # (batch_size, hidden_dim)

        x_demo = torch.reshape(x_demo.repeat(1, max_length), (batch_size, max_length, self.hidden_dim))

        # print(x_lab.shape, x_demo.shape)
        x = torch.cat((x_lab, x_demo), 2) # (batch_size, 2*hidden_dim)

        # print(x.shape)
        x = self.drop(x)
        x = self.fc(x)
        x = self.drop(x)
        
        x = self.sigmoid(x)
        return x


In [67]:
model = Transformer(input_lab_dim=25, input_demo_dim=2, hidden_dim=32, output_dim=1, act_layer=nn.GELU, drop=0.).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = predict_last_visit_mse_loss
train_loader = DataLoader(train_dataset, batch_size=batch_size)
# train_loss = train_epoch(model, device, train_loader, criterion, optimizer)
train_loss, evaluation_scores = val_epoch(model, device, train_loader, criterion)

Custom bins confusion matrix:
[[194   0   0   0]
 [  8   0   0   0]
 [153   0   0   0]
 [  6   0   0   0]]
Mean absolute deviation (MAD) = 31.737485885620117
Mean squared error (MSE) = 2076.579345703125
Mean absolute percentage error (MAPE) = 142.18716621398926
Cohen kappa score = 0.0


In [68]:
# print(evaluation_scores['mad'])
# print(evaluation_scores['mse'])
# print(evaluation_scores['mape'])

### Train, Validate and Test

In [69]:
all_history={}
test_performance={'test_loss': [],'test_mad':[], 'test_mse':[], 'test_mape':[]}
dataset = train_dataset
num_folds = 5
kfold_test = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=RANDOM_SEED)

for fold_test, (train_and_val_idx, test_idx) in enumerate(kfold_test.split(np.arange(len(dataset)), dataset.y_outcome)):
    print('====== Test Fold {} ======'.format(fold_test + 1))
    kfold_val = StratifiedKFold(n_splits=num_folds-1, shuffle=True, random_state=RANDOM_SEED)
    sub_dataset = Dataset(dataset.x_lab[train_and_val_idx], dataset.x_lab_length[train_and_val_idx], dataset.x_demo[train_and_val_idx], dataset.y_outcome[train_and_val_idx], dataset.y_los[train_and_val_idx])
    all_history['test_fold_{}'.format(fold_test+1)] = {}
    for fold_val, (train_idx, val_idx) in enumerate(kfold_val.split(np.arange(len(sub_dataset)), sub_dataset.y_outcome)):
        print('Val Fold {}'.format(fold_val + 1))
        train_sampler = SubsetRandomSampler(train_idx)
        val_sampler = SubsetRandomSampler(val_idx)
        train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
        val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)
        model = Transformer(input_lab_dim=25, input_demo_dim=2, hidden_dim=32, output_dim=1, act_layer=nn.GELU, drop=0.).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        criterion = predict_last_visit_mse_loss # get_bce_loss
        history = {'train_loss': [], 'val_loss': [],'val_mad':[], 'val_mse':[], 'val_mape':[]}
        # history = {}
        for epoch in range(num_epochs):
            train_loss=train_epoch(model, device, train_loader, criterion, optimizer)
            val_loss, val_evaluation_scores=val_epoch(model, device, val_loader, criterion)
            # save performance history on validation set
            print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Val Loss:{:.3f}".format(epoch + 1, num_epochs, train_loss, val_loss))
            history['train_loss'].append(train_loss)
            history['val_loss'].append(val_loss)
            history['val_mad'].append(val_evaluation_scores['mad'])
            history['val_mse'].append(val_evaluation_scores['mse'])
            history['val_mape'].append(val_evaluation_scores['mape'])
        all_history['test_fold_{}'.format(fold_test+1)]['fold{}'.format(fold_val+1)] = history
    
    # at the same epoch, calculate average auroc of each fold, set the best one as the best model
    best_epoch = 0
    best_performance = 0.0
    for step in range(num_epochs):
        mape_list = []
        for fold in range(num_folds-1):
            mape_list.append(all_history['test_fold_{}'.format(fold_test+1)]['fold{}'.format(fold+1)]['val_mape'][step])
        performance = np.array(mape_list).mean()
        if performance > best_performance:
            best_performance = performance
            best_epoch = step + 1
        print('Epoch:{}/{} AVG MAPE:{:.3f}'.format(step+1, num_epochs, performance))

    print(f"Best epoch: {best_epoch} Best performance: {best_performance}")
    print('Begin to evaluate the best model on test set')
    # train model again with train_and_val set (num_epochs = best_epoch), then test on test dataset 
    train_sampler = SubsetRandomSampler(train_and_val_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
    model = Transformer(input_lab_dim=25, input_demo_dim=2, hidden_dim=32, output_dim=1, act_layer=nn.GELU, drop=0.).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = predict_last_visit_mse_loss # get_bce_loss
    for epoch in range(best_epoch):
        train_loss=train_epoch(model, device, train_loader, criterion, optimizer)
        print("Epoch:{}/{} AVG Training Loss:{:.3f}".format(epoch + 1, num_epochs, train_loss))
    test_loss, test_evaluation_scores = test_epoch(model, device, test_loader, criterion)
    test_performance['test_loss'].append(test_loss)
    test_performance['test_mad'].append(test_evaluation_scores['mad'])
    test_performance['test_mse'].append(test_evaluation_scores['mse'])
    test_performance['test_mape'].append(test_evaluation_scores['mape'])
    print(f"Performance on test set {fold_test+1}: MAD = {test_evaluation_scores['mape']}, MSE = {test_evaluation_scores['mse']}, MAPE = {test_evaluation_scores['mape']}")

    


Val Fold 1
Custom bins confusion matrix:
[[49  0  0  0]
 [ 2  0  0  0]
 [20  0  0  0]
 [ 1  0  0  0]]
Mean absolute deviation (MAD) = 22.11336326599121
Mean squared error (MSE) = 1433.1431884765625
Mean absolute percentage error (MAPE) = 148.52372407913208
Cohen kappa score = 0.0
Epoch:1/30 AVG Training Loss:1572.763 AVG Val Loss:1583.534
Custom bins confusion matrix:
[[49  0  0  0]
 [ 2  0  0  0]
 [20  0  0  0]
 [ 1  0  0  0]]
Mean absolute deviation (MAD) = 22.10232925415039
Mean squared error (MSE) = 1432.1876220703125
Mean absolute percentage error (MAPE) = 150.91769695281982
Cohen kappa score = 0.0
Epoch:2/30 AVG Training Loss:1532.718 AVG Val Loss:1347.440
Custom bins confusion matrix:
[[49  0  0  0]
 [ 2  0  0  0]
 [20  0  0  0]
 [ 1  0  0  0]]
Mean absolute deviation (MAD) = 22.091535568237305
Mean squared error (MSE) = 1431.23486328125
Mean absolute percentage error (MAPE) = 153.3866047859192
Cohen kappa score = 0.0
Epoch:3/30 AVG Training Loss:1534.662 AVG Val Loss:1559.885
C

KeyError: 'mad'

In [None]:
# Calculate average performance on 10-fold test set
print(test_performance)
test_mad_list = np.array(test_performance['test_mad'])
test_mse_list = np.array(test_performance['test_mse'])
test_mape_list = np.array(test_performance['test_mape'])
print('MAD: mean={:.3f}, std={:.3f}'.format(test_mad_list.mean(), test_mad_list.std()))
print('MSE: mean={:.3f}, std={:.3f}'.format(test_mse_list.mean(), test_mse_list.std()))
print('MAPE: mean={:.3f}, std={:.3f}'.format(test_mape_list.mean(), test_mape_list.std()))

{'test_loss': [2495.5590209960938, 2629.3311767578125, 1640.9576416015625], 'test_mad': [31.26084, 31.661581, 30.76814], 'test_mse': [2026.9153, 2069.3574, 1958.2748], 'test_mape': [179.81278896331787, 179.05292510986328, 177.4882435798645]}
MAD: mean=31.230, std=0.365
MSE: mean=2018.182, std=45.768
MAPE: mean=178.785, std=0.968
