In [1]:
# Standard library
import math
import os
import sys
import time
from copy import deepcopy
from pathlib import Path
from typing import Dict

# Third-party
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import roc_auc_score, precision_recall_curve, confusion_matrix, auc
from torch.utils.data import DataLoader, TensorDataset
import tqdm
import wandb

cur_path = Path(".").resolve()
base_path = cur_path.parents[1]

print(f"Current File Path: {cur_path}")
print(f"Base Path: {base_path}")

os.chdir(str(base_path))
sys.path.append(str(base_path))

# Local modules
from mimic3benchmark.readers import InHospitalMortalityReader
from mimic3models import common_utils
from mimic3models.in_hospital_mortality import utils
from mimic3models.preprocessing import Discretizer, Normalizer

Current File Path: /home/jovyan/mimic3-sand/mimic3models/in_hospital_mortality
Base Path: /home/jovyan/mimic3-sand


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Arch

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, seq_len = 10000) -> None:
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model

        pe = torch.zeros(seq_len, d_model)

        for pos in range(seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i) / d_model)))
                pe[pos, i+1] = math.cos(pos / (10000 ** ((2 * (i+1)) / d_model)))

        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x) -> torch.Tensor:
        seq_len = x.shape[1]
        x = math.sqrt(self.d_model) * x
        x = x + self.pe[:, :seq_len].requires_grad_(False)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, layer: nn.Module, embed_dim: int, p=0.1) -> None:
        super(ResidualBlock, self).__init__()
        self.layer = layer
        self.dropout = nn.Dropout(p=p)
        self.norm = nn.LayerNorm(embed_dim)
        self.attn_weights = None
    
    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        """
        :param x: [N, seq_len, features]
        :return: [N, seq_len, features]
        """
        
        if isinstance(self.layer, nn.MultiheadAttention):
            BS, seq_len, _ = x.shape
            src = x.transpose(0, 1)     # [seq_len, N, features]
            output, self.attn_weights = self.layer(src, src, src, key_padding_mask=pad_mask)
            output = output.transpose(0, 1)     # [N, seq_len, features]
        else:
            output = self.layer(x)

        output = self.dropout(output)
        output = self.norm(x + output)
        return output

class PositionWiseFeedForward(nn.Module):
    def __init__(self, hidden_size: int) -> None:
        super(PositionWiseFeedForward, self).__init__()
        self.hidden_size = hidden_size

        self.conv = nn.Sequential(
            nn.Conv1d(hidden_size, hidden_size * 2, 1),
            nn.ReLU(),
            nn.Conv1d(hidden_size * 2, hidden_size, 1)
        )

    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
        tensor = tensor.transpose(1, 2)
        tensor = self.conv(tensor)
        tensor = tensor.transpose(1, 2)

        return tensor

class EncoderBlock(nn.Module):
    def __init__(self, embed_dim: int, num_head: int, dropout_rate=0.1) -> None:
        super(EncoderBlock, self).__init__()
        self.attention = ResidualBlock(
            nn.MultiheadAttention(embed_dim, num_head, dropout=dropout_rate), embed_dim, p=dropout_rate
        )
        self.ffn = ResidualBlock(PositionWiseFeedForward(embed_dim), embed_dim, p=dropout_rate)

    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        x = self.attention(x, pad_mask)
        x = self.ffn(x)
        return x

class ClassificationModule(nn.Module):
    def __init__(self, d_model: int, num_class: int) -> None:
        super(ClassificationModule, self).__init__()
        self.d_model = d_model
        self.num_class = num_class

        self.fc = nn.Linear(d_model, num_class)

        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.fc(x)
        return x


In [4]:
class EncoderLayerForSAnD(nn.Module):
    def __init__(self, input_features, n_heads, n_layers, d_model=128, dropout_rate=0.2) -> None:
        super(EncoderLayerForSAnD, self).__init__()
        self.d_model = d_model

        self.input_embedding = nn.Conv1d(input_features, d_model, 1)
        self.positional_encoding = PositionalEncoding(d_model)
        self.blocks = nn.ModuleList([
            EncoderBlock(d_model, n_heads, dropout_rate) for _ in range(n_layers)
        ])

        self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
        nn.init.normal_(self.cls_token, std=0.02)

    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        BS, seq_len, emb_dim = x.shape
        
        x = x.transpose(1, 2)
        x = self.input_embedding(x)
        x = x.transpose(1, 2)
        
        cls = self.cls_token.expand(BS, -1, -1).to(device)  
        x = torch.cat([cls, x], dim=1)
        
        if pad_mask is not None:
            pad_mask = torch.cat(
                [torch.zeros((BS, 1), dtype=torch.bool, device=device), pad_mask], 
                dim=1
            )

        x = self.positional_encoding(x)

        for l in self.blocks:
            x = l(x, pad_mask)

        return x

class SAnD(nn.Module):
    """
    Simply Attend and Diagnose model

    The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)

    `Attend and Diagnose: Clinical Time Series Analysis Using Attention Models <https://arxiv.org/abs/1711.03905>`_
    Huan Song, Deepta Rajan, Jayaraman J. Thiagarajan, Andreas Spanias
    """
    def __init__(
            self, input_features: int, n_heads: int,
            n_class: int, n_layers: int, d_model: int = 128, dropout_rate: float = 0.2
    ) -> None:
        super(SAnD, self).__init__()
        
        self.hyperparams = {
             "input_features" : input_features,
            "n_heads" : n_heads, 
            "n_class" : n_class,
            "n_layers" : n_layers,
            "d_model" : d_model,
            "dropout_rate" : dropout_rate
        }
        
        self.d_model = d_model

        self.encoder = EncoderLayerForSAnD(input_features, n_heads, n_layers, d_model, dropout_rate)

        self.clf = ClassificationModule(d_model, n_class)

    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        x = self.encoder(x, pad_mask)
        
        x = x[:, 0, :]
        x = self.clf(x)
        return x


In [5]:
def evaluate_model(model, dataloader, criterion=None, device=device):
    model.eval()
    all_labels = []
    all_probs = []

    total_loss = 0.0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            logits = model(inputs)
            loss = criterion(logits, labels)
            total_loss += loss.item()

            # Handle logits: either shape [B] or [B, 2]
            if logits.dim() == 1 or logits.size(1) == 1:
                # Single-logit binary classifier
                probs_pos = torch.sigmoid(logits).unsqueeze(1)
                probs = torch.cat([1 - probs_pos, probs_pos], dim=1)
            else:
                # Two-logit classifier
                probs = torch.softmax(logits, dim=1)

            all_labels.append(labels.cpu())
            all_probs.append(probs.cpu())

    all_labels = torch.cat(all_labels).numpy()
    all_probs = torch.cat(all_probs).numpy()

    # Metrics
    preds = all_probs.argmax(axis=1)
    cf = confusion_matrix(all_labels, preds).astype(np.float32)

    acc = (cf[0][0] + cf[1][1]) / np.sum(cf)
    prec0 = cf[0][0] / (cf[0][0] + cf[1][0] + 1e-8)
    prec1 = cf[1][1] / (cf[1][1] + cf[0][1] + 1e-8)
    rec0 = cf[0][0] / (cf[0][0] + cf[0][1] + 1e-8)
    rec1 = cf[1][1] / (cf[1][1] + cf[1][0] + 1e-8)

    auroc = roc_auc_score(all_labels, all_probs[:, 1])

    precisions, recalls, _ = precision_recall_curve(all_labels, all_probs[:, 1])
    auprc = auc(recalls, precisions)
    minpse = np.max([min(p, r) for p, r in zip(precisions, recalls)])

    return {
        "loss": total_loss,
        "accuracy": acc,
        "precision_class0": prec0,
        "precision_class1": prec1,
        "recall_class0": rec0,
        "recall_class1": rec1,
        "AUROC": auroc,
        "AUPRC": auprc,
        "minpse": minpse
    }

In [6]:
class NeuralNetworkClassifier:
    def __init__(self, model, criterion, optimizer, optimizer_config: dict, experiment) -> None:
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.optimizer = optimizer(self.model.parameters(), **optimizer_config)
        self.criterion = criterion

        self.hyper_params = optimizer_config
        self._start_epoch = 0
        self.hyper_params["epochs"] = self._start_epoch
        self.__num_classes = None
        self._is_parallel = False
        
        self.run = wandb.init(
            project='sand-mimic3',
            config={
                "task" : "in-hospital-mortality",
                "hyperparams" : self.model.hyperparams
            }
        )

        if torch.cuda.device_count() > 1:
            self.model = nn.DataParallel(self.model)
            self._is_parallel = True

            notice = "Running on {} GPUs.".format(torch.cuda.device_count())
            print("\033[33m" + notice + "\033[0m")            

    def fit(self, loader: Dict[str, DataLoader], epochs: int, checkpoint_path: str = None, validation: bool = True) -> None:
        len_of_train_dataset = len(loader["train"].dataset)
        epochs = epochs + self._start_epoch

        self.hyper_params["epochs"] = epochs
        self.hyper_params["batch_size"] = loader["train"].batch_size
        self.hyper_params["train_ds_size"] = len_of_train_dataset
        
        best_model = None
        best_val_loss = float('inf')

        if validation:
            len_of_val_dataset = len(loader["val"].dataset)
            self.hyper_params["val_ds_size"] = len_of_val_dataset

        for epoch in range(self._start_epoch, epochs):
            if checkpoint_path is not None and epoch % 10 == 0:
                self.save_to_file(checkpoint_path)

            correct = 0.0
            total = 0.0

            self.model.train()
            pbar = tqdm.tqdm(total=len_of_train_dataset)
            total_loss = 0
            for x, y in loader["train"]:
                b_size = y.shape[0]
                total += y.shape[0]
                x = x.to(self.device) if isinstance(x, torch.Tensor) else [i.to(self.device) for i in x]
                y = y.to(self.device)

                pbar.set_description(
                    "\033[36m" + "Training" + "\033[0m" + " - Epochs: {:03d}/{:03d}".format(epoch+1, epochs)
                )
                pbar.update(b_size)

                self.optimizer.zero_grad()
                outputs = self.model(x)
                loss = self.criterion(outputs, y)
                loss.backward()
                self.optimizer.step()

                _, predicted = torch.max(outputs, 1)
                correct += (predicted == y).sum().float().cpu().item()

                total_loss += loss.cpu().item()

            total_loss = total_loss / len(loader['train'])

            if validation:
                with torch.no_grad():
                    self.model.eval()
                    eval_result = evaluate_model(self.model, loader['val'], criterion=self.criterion)

                    if eval_result['loss'] < best_val_loss:
                        best_val_loss = eval_result['loss']
                        state = self.model.module.state_dict() if self._is_parallel else self.model.state_dict()
                        best_model = deepcopy(state)

            if validation:
                self.run.log({
                    'train_loss' : total_loss, 
                    'train_accuracy' : float(correct / total), 
                    'val_loss' : eval_result['loss'], 
                    'val_AUROC' : eval_result['AUROC'],
                    "val_AUPRC" : eval_result['AUPRC'],
                    'val_minpse' : eval_result['minpse']
                })
            else:
                self.run.log({
                    'train_loss' : total_loss, 
                    'train_accuracy' : float(correct / total)
                })

            pbar.close()

        if best_model is not None:
            if self._is_parallel:
                self.model.module.load_state_dict(best_model)
            else:
                self.model.load_state_dict(best_model)
                
        if checkpoint_path is not None:
            self.save_to_file(checkpoint_path)
                
        

    def save_checkpoint(self) -> dict:
        """
        The method of saving trained PyTorch model.

        Note,  return value contains
            - the number of last epoch as `epochs`
            - optimizer state as `optimizer_state_dict`
            - model state as `model_state_dict`

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )

            clf.fit(train_loader, epochs=10)
            checkpoints = clf.save_checkpoint()

        :return: dict {'epoch', 'optimizer_state_dict', 'model_state_dict'}
        """

        checkpoints = {
            "epoch": deepcopy(self.hyper_params["epochs"]),
            "optimizer_state_dict": deepcopy(self.optimizer.state_dict())
        }

        if self._is_parallel:
            checkpoints["model_state_dict"] = deepcopy(self.model.module.state_dict())
        else:
            checkpoints["model_state_dict"] = deepcopy(self.model.state_dict())

        return checkpoints

    def save_to_file(self, path: str) -> str:
        """
        | The method of saving trained PyTorch model to file.
        | Those weights are uploaded to comet.ml as backup.
        | check "Asserts".

        Note, .pth file contains
            - the number of last epoch as `epochs`
            - optimizer state as `optimizer_state_dict`
            - model state as `model_state_dict`

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )

            clf.fit(train_loader, epochs=10)
            filename = clf.save_to_file('path/to/save/dir/')

        :param path: path to saving directory. : string
        :return: path to file : string
        """
        if not os.path.isdir(path):
            os.mkdir(path)

        file_name = "model_params-epochs_{}-{}.pth".format(
            self.hyper_params["epochs"], time.ctime().replace(" ", "_")
        )
        path = path + file_name

        checkpoints = self.save_checkpoint()

        torch.save(checkpoints, path)

        return path

    def restore_checkpoint(self, checkpoints: dict) -> None:
        """
        The method of loading trained PyTorch model.

        :param checkpoints: dictionary which contains {'epoch', 'optimizer_state_dict', 'model_state_dict'}
        :return: None
        """
        self._start_epoch = checkpoints["epoch"]
        if not isinstance(self._start_epoch, int):
            raise TypeError

        if self._is_parallel:
            self.model.module.load_state_dict(checkpoints["model_state_dict"])
        else:
            self.model.load_state_dict(checkpoints["model_state_dict"])

        self.optimizer.load_state_dict(checkpoints["optimizer_state_dict"])

    def restore_from_file(self, path: str, map_location: str = "cpu") -> None:
        """
        The method of loading trained PyTorch model from file.

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )
            clf.restore_from_file('path/to/trained/weights.pth')

        :param path: path to saved directory. : str
        :param map_location: default cpu: str
        :return: None
        """
        checkpoints = torch.load(path, map_location=map_location)
        self.restore_checkpoint(checkpoints)



# Loading Data

In [7]:
# Args Values (Hardcoded)
data_dir = "/search-data/evan/data/in-hospital-mortality/" # input Your Data Dir Here Pointing To /in-hospital-mortality
timestep = 1.0
normalizer_state = None
imputation = 'previous'

train_reader = InHospitalMortalityReader(
    dataset_dir=os.path.join(data_dir, 'train'),
    listfile=os.path.join(data_dir, 'train_listfile.csv'),
    period_length=48.0
)

val_reader = InHospitalMortalityReader(
    dataset_dir=os.path.join(data_dir, 'train'),
    listfile=os.path.join(data_dir, 'val_listfile.csv'),
    period_length=48.0
)

discretizer = Discretizer(
    timestep=float(timestep),
    store_masks=True,
    impute_strategy='previous',
    start_time='zero'
)

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
normalizer_state = normalizer_state
if normalizer_state is None:
    normalizer_state = 'ihm_ts{}.input_str-{}.start_time-zero.normalizer'.format(timestep, imputation)
    normalizer_state = os.path.join(cur_path, normalizer_state)
    
normalizer.load_params(normalizer_state)

In [None]:
small_part = False

train_x, train_y = utils.load_data(train_reader, discretizer, normalizer, small_part)
val_x, val_y = utils.load_data(val_reader, discretizer, normalizer, small_part)

train_y = np.array(train_y)
val_y = np.array(val_y)

train_x = torch.tensor(train_x, dtype=torch.float32)
train_y = torch.tensor(train_y, dtype=torch.long)  # classification labels
val_x = torch.tensor(val_x, dtype=torch.float32)
val_y = torch.tensor(val_y, dtype=torch.long)

train_ds = TensorDataset(train_x, train_y)
val_ds = TensorDataset(val_x, val_y)

In [8]:
test_reader = InHospitalMortalityReader(
    dataset_dir=os.path.join(data_dir, 'test'),
    listfile=os.path.join(data_dir, 'test_listfile.csv'),
    period_length=48.0
)
test_x, test_y = utils.load_data(
    test_reader, 
    discretizer, 
    normalizer, 
    small_part
)

# Convert DataFrame to tensor (float)
test_x = torch.tensor(test_x, dtype=torch.float32)
test_y = torch.tensor(test_y, dtype=torch.long)

test_ds = TensorDataset(test_x, test_y)

NameError: name 'small_part' is not defined

In [None]:
torch.save(train_x, f"{data_dir}/in_hospitality_mortality_train_x.pt")
torch.save(train_y, f"{data_dir}/in_hospitality_mortality_train_y.pt")
torch.save(val_x,   f"{data_dir}/in_hospitality_mortality_val_x.pt")
torch.save(val_y,   f"{data_dir}/in_hospitality_mortality_val_y.pt")
torch.save(test_x, f"{data_dir}/in_hospitality_mortality_test_x.pt")
torch.save(test_y, f"{data_dir}/in_hospitality_mortality_test_y.pt")

# Model Training

In [24]:
# Define model parameters
in_feature = 76
n_heads = 8 # Number of heads for multi-head attention layer: Should be fixed at 8
num_class = 2 # Number of output class
num_layers = 2 # Number of multi-head attention layers (N): This depends on the task at hand
d_model = 128 # Original 256
dropout_rate = 0.4
optimizer_config = {
    'lr' : 0.0001,
    'betas' : (0.9, 0.98),
    'eps' : 1e-08,
}
num_epochs = 20
batch_size = 256 # Original 256



In [25]:
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

In [26]:
sand = SAnD(
    input_features = in_feature,
    n_heads = n_heads,
    n_class = num_class, 
    n_layers = num_layers, 
    d_model = d_model,
    dropout_rate = dropout_rate
)

# Build the model
model = NeuralNetworkClassifier(
    sand,
    nn.CrossEntropyLoss(),
    optim.Adam,
    optimizer_config=optimizer_config,
    experiment=None
)

In [27]:
model.fit(
    {"train": train_loader,
     "val": val_loader},
    epochs = num_epochs,
    checkpoint_path="/search-data/evan/in_hospital_mortality_model/"
)

[36mTraining[0m - Epochs: 001/020: 100%|██████████| 14681/14681 [00:08<00:00, 1795.01it/s]
[36mTraining[0m - Epochs: 002/020: 100%|██████████| 14681/14681 [00:08<00:00, 1827.80it/s]
[36mTraining[0m - Epochs: 003/020: 100%|██████████| 14681/14681 [00:08<00:00, 1775.96it/s]
[36mTraining[0m - Epochs: 004/020: 100%|██████████| 14681/14681 [00:08<00:00, 1798.30it/s]
[36mTraining[0m - Epochs: 005/020: 100%|██████████| 14681/14681 [00:08<00:00, 1785.36it/s]
[36mTraining[0m - Epochs: 006/020: 100%|██████████| 14681/14681 [00:08<00:00, 1834.45it/s]
[36mTraining[0m - Epochs: 007/020: 100%|██████████| 14681/14681 [00:08<00:00, 1816.05it/s]
[36mTraining[0m - Epochs: 008/020: 100%|██████████| 14681/14681 [00:08<00:00, 1818.82it/s]
[36mTraining[0m - Epochs: 009/020: 100%|██████████| 14681/14681 [00:08<00:00, 1796.85it/s]
[36mTraining[0m - Epochs: 010/020: 100%|██████████| 14681/14681 [00:08<00:00, 1804.73it/s]
[36mTraining[0m - Epochs: 011/020: 100%|██████████| 14681/14681 [00:

# Model Testing

In [28]:
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
test_result = evaluate_model(model.model, test_loader, model.criterion)

In [29]:
model.run.log({
    'test_AUROC' : test_result['AUROC'],
    "test_AUPRC" : test_result['AUPRC'],
    'test_minpse' : test_result['minpse']
})
model.run.finish()

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
test_AUPRC,▁
test_AUROC,▁
test_minpse,▁
train_accuracy,▁▂▃▃▄▄▄▄▅▅▆▆▆▆▆▇▇▇██
train_loss,█▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁
val_AUPRC,▁▄▅▆▆▇▇▇████████████
val_AUROC,▁▄▅▆▆▆▆▆▇▇▇▇▇▇██▇███
val_loss,█▄▃▃▂▁▁▁▃▁▂▂▁▃▂▁▃▂▄▃
val_minpse,▁▄▅▆▆▇█▆███▇▇▆▆▇▆▇▆▆

0,1
test_AUPRC,0.44086
test_AUROC,0.82167
test_minpse,0.44652
train_accuracy,0.89783
train_loss,0.25677
val_AUPRC,0.49524
val_AUROC,0.83121
val_loss,4.14633
val_minpse,0.46101


In [30]:
model.save_to_file("/search-data/evan/in_hospital_mortality_model/")

'/search-data/evan/in_hospital_mortality_model/model_params-epochs_20-Tue_Nov_18_04:21:48_2025.pth'

# Save & Load (Run Separately) - To Skip Loading Data

In [7]:
data_dir = "/search-data/evan/data/in-hospital-mortality/"

In [8]:
train_x = torch.load(f"{data_dir}/in_hospitality_mortality_train_x.pt", mmap=True)
train_y = torch.load(f"{data_dir}/in_hospitality_mortality_train_y.pt", mmap=True)
val_x   = torch.load(f"{data_dir}/in_hospitality_mortality_val_x.pt", mmap=True)
val_y   = torch.load(f"{data_dir}/in_hospitality_mortality_val_y.pt", mmap=True)
test_x   = torch.load(f"{data_dir}/in_hospitality_mortality_test_x.pt", mmap=True)
test_y   = torch.load(f"{data_dir}/in_hospitality_mortality_test_y.pt", mmap=True)

In [9]:
train_ds = TensorDataset(train_x, train_y)
val_ds = TensorDataset(val_x, val_y)
test_ds = TensorDataset(test_x, test_y)

# Baseline

In [None]:
run = wandb.init(
    project='sand-mimic3',
    config={
        "task" : "in-hospital-mortality",
    },
    name='in-hospital-mortality-baseline'
)
run.log({
    'test_AUROC' : 0.857,
    "test_AUPRC" : 0.518,
    'test_minpse' : 0.5
})
run.finish()