In [None]:
import numpy as np
import argparse
import os
import imp
import re
import pickle
import datetime
import random
import math
import copy

from sklearn.model_selection import KFold, StratifiedKFold, StratifiedShuffleSplit
import torch
from torch import nn
import torch.nn.utils.rnn as rnn_utils
from torch.utils import data
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset, Subset

from utils import metrics


In [None]:
class MultiTaskLoss(nn.Module):
    def __init__(self, task_num):
        super(MultiTaskLoss, self).__init__()
        self.task_num = task_num
        self.log_vars = nn.Parameter(torch.zeros((task_num)))

    def forward(self, outcome_pred, outcome, los_pred, los):
        mse, bce = torch.nn.MSELoss(), torch.nn.BCELoss()
        loss0 = bce(outcome_pred, outcome)
        loss1 = mse(los_pred, los)
        precision0 = torch.exp(-self.log_vars[0])
        loss0 = precision0*loss0 + self.log_vars[0]
        precision1 = torch.exp(-self.log_vars[1])
        loss1 = precision1*loss1 + self.log_vars[1]
        return loss0+loss1

In [None]:
def get_multi_task_loss(y_outcome_pred, y_outcome_true, y_los_pred, y_los_true, x_lab_length):
    batch_size = len(y_outcome_pred)
    loss = MultiTaskLoss(2)
    indices = torch.arange(batch_size, dtype=torch.int64)
    losses = 0
    for i in indices:
        losses += loss(y_outcome_pred[i][:x_lab_length[i].long()], y_outcome_true[i][:x_lab_length[i].long()], y_los_pred[i][:x_lab_length[i].long()], y_los_true[i][:x_lab_length[i].long()])
    return losses/batch_size

In [None]:
class Dataset(data.Dataset):
    def __init__(self, x, y, x_lab_length):
        self.x= x
        self.y = y
        self.x_lab_length = x_lab_length
        
    def __getitem__(self, index): # 返回的是tensor
        return self.x[index], self.y[index], self.x_lab_length[index]

    def __len__(self):
        return len(self.y)

In [None]:
batch_size = 64
num_epochs = 100
num_folds = 10

demo_dim=2
lab_dim=97
max_visits=299
hidden_dim=128

device = torch.device("cuda:0" if torch.cuda.is_available() == True else 'cpu')
# device = torch.device('cpu')

data_path = "../datasets/hm/processed_data/"
x = pickle.load(open(data_path + "x.pkl", "rb"))
y = pickle.load(open(data_path + "y.pkl", "rb"))
x_lab_length = pickle.load(open(data_path + "visits_length.pkl", "rb"))
train_dataset = Dataset(x, y, x_lab_length)

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED) # numpy
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED) # cpu
torch.cuda.manual_seed(RANDOM_SEED) # gpu
torch.backends.cudnn.deterministic=True # cudnn
np.set_printoptions(threshold=np.inf, precision=2, suppress=True)


In [None]:
class Transformer(nn.Module):
    def __init__(self, lab_dim, demo_dim, max_visits, hidden_dim, output_dim, act_layer=nn.GELU, drop=0.):
        super(Transformer, self).__init__()

        # hyperparameters
        self.lab_dim = lab_dim
        self.demo_dim = demo_dim
        self.max_visits = max_visits
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        
        self.proj = nn.Linear(demo_dim+lab_dim, hidden_dim)
        self.bn = nn.BatchNorm1d(max_visits)
        
        self.gru = nn.GRU(input_size = hidden_dim, hidden_size = hidden_dim, num_layers = 1, batch_first = True)

        self.encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=4, dim_feedforward=512, activation='gelu')
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=1)
        
        self.prediction_head_outcome = nn.Sequential(
            nn.Dropout(drop),
            nn.Linear(hidden_dim, output_dim),
            nn.Dropout(drop),
            nn.Sigmoid(),
        )
    
        self.prediction_head_los = nn.Sequential(
            nn.Dropout(drop),
            nn.Linear(hidden_dim, output_dim),
            nn.Dropout(drop),
        )

    def forward(self, x):
        x = self.proj(x)
        # x = self.act(x)
        # x = self.bn(x)

        # x, _ = self.gru(x)
        x = self.transformer_encoder(x)

        outcome = self.prediction_head_outcome(x)
        los =  self.prediction_head_los(x)
        return outcome, los


In [None]:
def train_epoch(model, device, dataloader, loss_fn, optimizer):
    train_loss = []
    model.train()
    for step, data in enumerate(dataloader):   
        batch_x, batch_y, batch_x_lab_length = data
        batch_x, batch_y, batch_x_lab_length = batch_x.float(), batch_y.float(), batch_x_lab_length.float()
        batch_y_outcome = batch_y[:,:,0].unsqueeze(-1)
        batch_y_los = batch_y[:,:,1].unsqueeze(-1)
        optimizer.zero_grad()
        outcome, los = model(batch_x)
        loss = loss_fn(outcome, batch_y_outcome, los, batch_y_los, batch_x_lab_length)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
    return np.array(train_loss).mean()

def val_epoch(model, device, dataloader, loss_fn):
    """
    val / test
    """
    val_loss = []
    y_outcome_pred = []
    y_outcome_true = []
    y_los_pred = []
    y_los_true = []
    model.eval()
    with torch.no_grad():
        for step, data in enumerate(dataloader):   
            batch_x, batch_y, batch_x_lab_length = data
            batch_x, batch_y, batch_x_lab_length = batch_x.float(), batch_y.float(), batch_x_lab_length.float()
            batch_y_outcome = batch_y[:,:,0].unsqueeze(-1)
            batch_y_los = batch_y[:,:,1].unsqueeze(-1)
            outcome, los = model(batch_x)
            loss = loss_fn(outcome, batch_y_outcome, los, batch_y_los, batch_x_lab_length)
            val_loss.append(loss.item())
            los = torch.squeeze(los)
            batch_y_los = torch.squeeze(batch_y_los)
            for i in range(len(batch_y_outcome)):
                y_outcome_pred.extend(outcome[i][:batch_x_lab_length[i].long()].tolist())
                y_outcome_true.extend(batch_y_outcome[i][:batch_x_lab_length[i].long()].tolist())
                y_los_pred.extend(los[i][:batch_x_lab_length[i].long()].tolist())
                y_los_true.extend(batch_y_los[i][:batch_x_lab_length[i].long()].tolist())
    y_outcome_true = np.array(y_outcome_true)
    y_outcome_pred = np.array(y_outcome_pred)
    y_outcome_pred = np.stack([1 - y_outcome_pred, y_outcome_pred], axis=1)
    outcome_evaluation_scores = metrics.print_metrics_binary(y_outcome_true, y_outcome_pred)
    los_evaluation_scores = metrics.print_metrics_regression(y_los_true, y_los_pred)
    return np.array(val_loss).mean(), outcome_evaluation_scores, los_evaluation_scores

In [None]:
model = Transformer(lab_dim=lab_dim, demo_dim=demo_dim, max_visits=max_visits, hidden_dim=hidden_dim, output_dim=1, act_layer=nn.GELU, drop=0.).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = get_multi_task_loss
train_loader = DataLoader(train_dataset, batch_size=128)
train_loss = train_epoch(model, device, train_loader, criterion, optimizer)
val_loss, outcome_evaluation_scores, los_evaluation_scores = val_epoch(model, device, train_loader, criterion)
print('train_loss', train_loss, outcome_evaluation_scores, los_evaluation_scores)

In [None]:
all_history={}
test_performance={'test_loss': [],'test_mad':[], 'test_mse':[], 'test_mape':[], 'test_accuracy':[], 'test_auroc':[], 'test_auprc':[]}
dataset = train_dataset
kfold_test = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=RANDOM_SEED)
skf = kfold_test.split(np.arange(len(dataset)), dataset.y[:, 0, 0])
fold_test = 0
train_and_val_idx, test_idx = next(skf)
sss = StratifiedShuffleSplit(
    n_splits=1, test_size=1 / (num_folds - 1), random_state=RANDOM_SEED
)

test_sampler = SubsetRandomSampler(test_idx)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
sub_dataset = Dataset(dataset.x[train_and_val_idx], dataset.y[train_and_val_idx], dataset.x_lab_length[train_and_val_idx])
all_history['test_fold_{}'.format(fold_test+1)] = {}

train_idx, val_idx = next(
    sss.split(np.arange(len(train_and_val_idx)), sub_dataset.y[:, 0, 0])
)

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)
train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)
model = Transformer(lab_dim=lab_dim, demo_dim=demo_dim, max_visits=max_visits, hidden_dim=hidden_dim, output_dim=1, act_layer=nn.GELU, drop=0.).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = get_multi_task_loss # get_bce_loss
history = {'train_loss': [], 'val_loss': [],'val_mad':[], 'val_mse':[], 'val_mape':[]}
best_val_performance = 1e8
for epoch in range(num_epochs):
    train_loss=train_epoch(model, device, train_loader, criterion, optimizer)
    val_loss, val_outcome_evaluation_scores, val_los_evaluation_scores=val_epoch(model, device, val_loader, criterion)
    # save performance history on validation set
    print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Val Loss:{:.3f}".format(epoch + 1, num_epochs, train_loss, val_loss))
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['val_mad'].append(val_los_evaluation_scores['mad'])
    history['val_mse'].append(val_los_evaluation_scores['mse'])
    history['val_mape'].append(val_los_evaluation_scores['mape'])
    
    # if mad is lower, than set the best mad, save the model, and test it on the test set
    if val_los_evaluation_scores['mad'] < best_val_performance:
        best_val_performance = val_los_evaluation_scores['mad']
        torch.save(model.state_dict(), './checkpoints/transformer_multi_best_mad.pth')
all_history['test_fold_{}'.format(fold_test+1)] = history  
print(
    f"Best performance on val set {fold_test+1}: \
    MAE = {best_val_performance}"
)
model = Transformer(lab_dim=lab_dim, demo_dim=demo_dim, max_visits=max_visits, hidden_dim=hidden_dim, output_dim=1, act_layer=nn.GELU, drop=0.).to(device)
model.load_state_dict(torch.load('./checkpoints/transformer_multi_best_mad.pth'))
test_loss, test_outcome_evaluation_scores, test_los_evaluation_scores = val_epoch(model, device, test_loader, criterion)
test_performance['test_loss'].append(test_loss)
test_performance['test_mad'].append(test_los_evaluation_scores['mad'])
test_performance['test_mse'].append(test_los_evaluation_scores['mse'])
test_performance['test_mape'].append(test_los_evaluation_scores['mape'])
test_performance['test_accuracy'].append(test_outcome_evaluation_scores['acc'])
test_performance['test_auroc'].append(test_outcome_evaluation_scores['auroc'])
test_performance['test_auprc'].append(test_outcome_evaluation_scores['auprc'])
print(f"Performance on test set {fold_test+1}: MAE = {test_los_evaluation_scores['mape']}, MSE = {test_los_evaluation_scores['mse']}, MAPE = {test_los_evaluation_scores['mape']}, ACC = {test_outcome_evaluation_scores['acc']}, AUROC = {test_outcome_evaluation_scores['auroc']}, AUPRC = {test_outcome_evaluation_scores['auprc']}")


In [None]:
# Calculate average performance on 10-fold test set
test_mad_list = np.array(test_performance['test_mad'])
test_mse_list = np.array(test_performance['test_mse'])
test_mape_list = np.array(test_performance['test_mape'])
test_accuracy_list = np.array(test_performance['test_accuracy'])
test_auroc_list = np.array(test_performance['test_auroc'])
test_auprc_list = np.array(test_performance['test_auprc'])

print("MAE: {:.3f} ({:.3f})".format(test_mad_list.mean(), test_mad_list.std()))
print("MSE: {:.3f} ({:.3f})".format(test_mse_list.mean(), test_mse_list.std()))
print(
    "MAPE: {:.3f} ({:.3f})".format(test_mape_list.mean(), test_mape_list.std())
)
print(
    "ACC: {:.3f} ({:.3f})".format(
        test_accuracy_list.mean(), test_accuracy_list.std()
    )
)
print(
    "AUROC: {:.3f} ({:.3f})".format(
        test_auroc_list.mean(), test_auroc_list.std()
    )
)
print(
    "AUPRC: {:.3f} ({:.3f})".format(
        test_auprc_list.mean(), test_auprc_list.std()
    )
)