# SAnD model predictions for length of Stay

### 0. Read in libraries

In [1]:
# Standard library
import math
import os
import sys
import time
from copy import deepcopy
from pathlib import Path
from typing import Dict

# Third-party
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import confusion_matrix, cohen_kappa_score, mean_absolute_error, mean_squared_error
from torch.utils.data import DataLoader, TensorDataset, Dataset
import tqdm
import wandb

cur_path = Path(".").resolve()
base_path = cur_path.parents[1]

print(f"Current File Path: {cur_path}")
print(f"Base Path: {base_path}")

os.chdir(str(base_path))
sys.path.append(str(base_path))

# Local modules
from mimic3benchmark.readers import LengthOfStayReader
from mimic3models import common_utils, metrics
from mimic3models.length_of_stay import utils
from mimic3models.preprocessing import Discretizer, Normalizer

Current File Path: /home/jovyan/mimic3-sand/mimic3models/length_of_stay
Base Path: /home/jovyan/mimic3-sand


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [5]:
data_dir = os.getcwd() + '/data/length_of_stay'

### 1. Define model parameters

In [11]:
# Define model parameters
in_feature = 76
n_heads = 8 # Number of heads for multi-head attention layer: Should be fixed at 8
num_class = 10 # Number of output class
num_layers = 3 # Number of multi-head attention layers (N): This depends on the task at hand
d_model = 256 # Original 256
dropout_rate = 0.4
optimizer_config = {
    'lr' : 0.001,
    'betas' : (0.9, 0.98),
    'eps' : 1e-08,
}
num_epochs = 1
batch_size = 32 # Original 128

num_chunks = 60

### 2. Define model architecture for length of stay

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, seq_len = 10000) -> None:
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model

        pe = torch.zeros(seq_len, d_model)

        for pos in range(seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i) / d_model)))
                pe[pos, i+1] = math.cos(pos / (10000 ** ((2 * (i+1)) / d_model)))

        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x) -> torch.Tensor:
        seq_len = x.shape[1]
        x = math.sqrt(self.d_model) * x
        x = x + self.pe[:, :seq_len].requires_grad_(False)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, layer: nn.Module, embed_dim: int, p=0.1) -> None:
        super(ResidualBlock, self).__init__()
        self.layer = layer
        self.dropout = nn.Dropout(p=p)
        self.norm = nn.LayerNorm(embed_dim)
        self.attn_weights = None
    
    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        """
        :param x: [N, seq_len, features]
        :return: [N, seq_len, features]
        """
        
        if isinstance(self.layer, nn.MultiheadAttention):
            BS, seq_len, _ = x.shape
            src = x.transpose(0, 1)     # [seq_len, N, features]
            output, self.attn_weights = self.layer(src, src, src, key_padding_mask=pad_mask)
            output = output.transpose(0, 1)     # [N, seq_len, features]
        else:
            output = self.layer(x)

        output = self.dropout(output)
        output = self.norm(x + output)
        return output

class PositionWiseFeedForward(nn.Module):
    def __init__(self, hidden_size: int) -> None:
        super(PositionWiseFeedForward, self).__init__()
        self.hidden_size = hidden_size

        self.conv = nn.Sequential(
            nn.Conv1d(hidden_size, hidden_size * 2, 1),
            nn.ReLU(),
            nn.Conv1d(hidden_size * 2, hidden_size, 1)
        )

    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
        tensor = tensor.transpose(1, 2)
        tensor = self.conv(tensor)
        tensor = tensor.transpose(1, 2)

        return tensor

class EncoderBlock(nn.Module):
    def __init__(self, embed_dim: int, num_head: int, dropout_rate=0.1) -> None:
        super(EncoderBlock, self).__init__()
        self.attention = ResidualBlock(
            nn.MultiheadAttention(embed_dim, num_head), embed_dim, p=dropout_rate
        )
        self.ffn = ResidualBlock(PositionWiseFeedForward(embed_dim), embed_dim, p=dropout_rate)

    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        x = self.attention(x, pad_mask)
        x = self.ffn(x)
        return x

class ClassificationModule(nn.Module):
    def __init__(self, d_model: int, num_class: int) -> None:
        super(ClassificationModule, self).__init__()
        self.d_model = d_model
        self.num_class = num_class

        self.fc = nn.Linear(d_model, num_class)

        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.fc(x)
        return x


In [4]:
class EncoderLayerForSAnD(nn.Module):
    def __init__(self, input_features, n_heads, n_layers, d_model=128, dropout_rate=0.2) -> None:
        super(EncoderLayerForSAnD, self).__init__()
        self.d_model = d_model

        self.input_embedding = nn.Conv1d(input_features, d_model, 1)
        self.positional_encoding = PositionalEncoding(d_model)
        self.blocks = nn.ModuleList([
            EncoderBlock(d_model, n_heads, dropout_rate) for _ in range(n_layers)
        ])

        self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
        nn.init.normal_(self.cls_token, std=0.02)

    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        BS, seq_len, emb_dim = x.shape
        
        x = x.transpose(1, 2)
        x = self.input_embedding(x)
        x = x.transpose(1, 2)
        
        cls = self.cls_token.expand(BS, -1, -1).to(device)  
        x = torch.cat([cls, x], dim=1)
        
        if pad_mask is not None:
            pad_mask = torch.cat(
                [torch.zeros((BS, 1), dtype=torch.bool, device=device), pad_mask], 
                dim=1
            )

        x = self.positional_encoding(x)

        for l in self.blocks:
            x = l(x, pad_mask)

        return x

class SAnD(nn.Module):
    """
    Simply Attend and Diagnose model

    The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)

    `Attend and Diagnose: Clinical Time Series Analysis Using Attention Models <https://arxiv.org/abs/1711.03905>`_
    Huan Song, Deepta Rajan, Jayaraman J. Thiagarajan, Andreas Spanias
    """
    def __init__(
            self, input_features: int, n_heads: int,
            n_class: int, n_layers: int, d_model: int = 128, dropout_rate: float = 0.2
    ) -> None:
        super(SAnD, self).__init__()
        
        self.hyperparams = {
             "input_features" : input_features,
            "n_heads" : n_heads, 
            "n_class" : n_class,
            "n_layers" : n_layers,
            "d_model" : d_model,
            "dropout_rate" : dropout_rate
        }
        
        self.d_model = d_model

        self.encoder = EncoderLayerForSAnD(input_features, n_heads, n_layers, d_model, dropout_rate)

        self.clf = ClassificationModule(d_model, n_class)

    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        x = self.encoder(x, pad_mask)
        
        x = x[:, 0, :]
        x = self.clf(x)
        return x


In [5]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / (y_true + 0.1))) * 100

class CustomBins:
    inf = 1e18
    bins = [(-inf, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 14), (14, +inf)]
    nbins = len(bins)
    means = [11.450379, 35.070846, 59.206531, 83.382723, 107.487817,
             131.579534, 155.643957, 179.660558, 254.306624, 585.325890]


def get_bin_custom(x, nbins, one_hot=False):
    for i in range(nbins):
        a = CustomBins.bins[i][0] * 24.0
        b = CustomBins.bins[i][1] * 24.0
        if a <= x < b:
            if one_hot:
                ret = np.zeros((CustomBins.nbins,))
                ret[i] = 1
                return ret
            return i
    return None


def get_estimate_custom(prediction, nbins):
    bin_id = np.argmax(prediction)
    assert 0 <= bin_id < nbins
    return CustomBins.means[bin_id]

def evaluate_model(model, dataloader, criterion, device=device):
    model.eval()
    all_labels = []
    all_labels_binned = []
    all_probs = []

    total_loss = 0.0

    with torch.no_grad():
        for inputs, labels, labels_binned, pad_mask in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels_binned = labels_binned.to(device)
            pad_mask = pad_mask.to(device)

            logits = model(inputs, pad_mask)

            loss = criterion(logits, labels_binned)
            total_loss += loss.cpu().item()

            probs = F.softmax(logits, dim=1)

            all_labels.append(labels.cpu())
            all_labels_binned.append(labels_binned.cpu())
            all_probs.append(probs.cpu())

    
    y_true = torch.cat(all_labels).numpy()
    y_true_binned = torch.cat(all_labels_binned).numpy()
    y_pred = torch.cat(all_probs).numpy()
    y_pred_binned = np.argmax(y_pred, axis=1)

    # Must turn y_pred into bins
    kappa = cohen_kappa_score(y_true_binned, y_pred_binned, weights="linear")
    
    y_pred_means = [get_estimate_custom(pred_prob, 10) for pred_prob in y_pred]
    # regression metrics (same as print_metrics_regression)
    mad = mean_absolute_error(y_true, y_pred_means)
    mse = mean_squared_error(y_true, y_pred_means)
    mape = mean_absolute_percentage_error(y_true, y_pred_means)

    return {
        "loss": total_loss,
        "MAD": mad,
        "MSE": mse,
        "MAPE": mape,
        "kappa": kappa
    }


In [6]:
class NeuralNetworkClassifier:
    def __init__(self, model, criterion, optimizer, optimizer_config: dict, experiment) -> None:
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.optimizer = optimizer(self.model.parameters(), **optimizer_config)
        self.criterion = criterion

        self.hyper_params = optimizer_config
        self._start_epoch = 0
        self.hyper_params["epochs"] = self._start_epoch
        self.__num_classes = None
        self._is_parallel = False
        
        self.run = wandb.init(
            project='sand-mimic3',
            config={
                "task" : "length_of_stay",
                "hyperparams" : self.model.hyperparams
            }
        )

        if torch.cuda.device_count() > 1:
            self.model = nn.DataParallel(self.model)
            self._is_parallel = True

            notice = "Running on {} GPUs.".format(torch.cuda.device_count())
            print("\033[33m" + notice + "\033[0m")            

    def fit(self, loader: Dict[str, DataLoader], epochs: int, checkpoint_path: str = None, validation: bool = True) -> None:
        len_of_train_dataset = len(loader["train"].dataset)
        epochs = epochs + self._start_epoch

        self.hyper_params["epochs"] = epochs
        self.hyper_params["batch_size"] = loader["train"].batch_size
        self.hyper_params["train_ds_size"] = len_of_train_dataset
        
        best_model = None
        best_val_loss = float('inf')

        if validation:
            len_of_val_dataset = len(loader["val"].dataset)
            self.hyper_params["val_ds_size"] = len_of_val_dataset

        for epoch in range(self._start_epoch, epochs):
            if checkpoint_path is not None and epoch % 10 == 0:
                self.save_to_file(checkpoint_path)

            correct = 0.0
            total = 0.0

            self.model.train()
            pbar = tqdm.tqdm(total=len_of_train_dataset)
            total_loss = 0
            for x, y_reg, y, pad_mask in loader["train"]:
                b_size = y.shape[0]
                total += y.shape[0]
                x = x.to(self.device) if isinstance(x, torch.Tensor) else [i.to(self.device) for i in x]
                y = y.to(self.device)
                pad_mask = pad_mask.to(self.device)

                pbar.set_description(
                    "\033[36m" + "Training" + "\033[0m" + " - Epochs: {:03d}/{:03d}".format(epoch+1, epochs)
                )
                pbar.update(b_size)

                self.optimizer.zero_grad()
                outputs = self.model(x, pad_mask)
                loss = self.criterion(outputs, y)
                loss.backward()
                self.optimizer.step()

                _, predicted = torch.max(outputs, 1)
                correct += (predicted == y).sum().float().cpu().item()

                total_loss += loss.cpu().item()

            total_loss = total_loss / len(loader['train'])

            if validation:
                with torch.no_grad():
                    self.model.eval()
                    eval_result = evaluate_model(self.model, loader['val'], criterion=self.criterion)

                    if eval_result['loss'] < best_val_loss:
                        best_val_loss = eval_result['loss']
                        state = self.model.module.state_dict() if self._is_parallel else self.model.state_dict()
                        best_model = deepcopy(state)

            if validation:
                self.run.log({
                    'train_loss' : total_loss, 
                    'train_accuracy' : float(correct / total), 
                    'val_loss' : eval_result['loss'], 
                    'val_kappa' : eval_result['kappa'],
                    "val_MSE" : eval_result['MSE'],
                    'val_MAPE' : eval_result['MAPE']
                })
            else:
                self.run.log({
                    'train_loss' : total_loss, 
                    'train_accuracy' : float(correct / total)
                })

            pbar.close()

        if best_model is not None:
            if self._is_parallel:
                self.model.module.load_state_dict(best_model)
            else:
                self.model.load_state_dict(best_model)
        
        if checkpoint_path is not None:
            self.save_to_file(checkpoint_path)

    def save_checkpoint(self) -> dict:
        """
        The method of saving trained PyTorch model.

        Note,  return value contains
            - the number of last epoch as `epochs`
            - optimizer state as `optimizer_state_dict`
            - model state as `model_state_dict`

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )

            clf.fit(train_loader, epochs=10)
            checkpoints = clf.save_checkpoint()

        :return: dict {'epoch', 'optimizer_state_dict', 'model_state_dict'}
        """

        checkpoints = {
            "epoch": deepcopy(self.hyper_params["epochs"]),
            "optimizer_state_dict": deepcopy(self.optimizer.state_dict())
        }

        if self._is_parallel:
            checkpoints["model_state_dict"] = deepcopy(self.model.module.state_dict())
        else:
            checkpoints["model_state_dict"] = deepcopy(self.model.state_dict())

        return checkpoints

    def save_to_file(self, path: str) -> str:
        """
        | The method of saving trained PyTorch model to file.
        | Those weights are uploaded to comet.ml as backup.
        | check "Asserts".

        Note, .pth file contains
            - the number of last epoch as `epochs`
            - optimizer state as `optimizer_state_dict`
            - model state as `model_state_dict`

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )

            clf.fit(train_loader, epochs=10)
            filename = clf.save_to_file('path/to/save/dir/')

        :param path: path to saving directory. : string
        :return: path to file : string
        """
        if not os.path.isdir(path):
            os.mkdir(path)

        file_name = "model_params-epochs_{}-{}.pth".format(
            self.hyper_params["epochs"], time.ctime().replace(" ", "_")
        )
        path = path + file_name

        checkpoints = self.save_checkpoint()

        torch.save(checkpoints, path)

        return path

    def restore_checkpoint(self, checkpoints: dict) -> None:
        """
        The method of loading trained PyTorch model.

        :param checkpoints: dictionary which contains {'epoch', 'optimizer_state_dict', 'model_state_dict'}
        :return: None
        """
        self._start_epoch = checkpoints["epoch"]
        if not isinstance(self._start_epoch, int):
            raise TypeError

        if self._is_parallel:
            self.model.module.load_state_dict(checkpoints["model_state_dict"])
        else:
            self.model.load_state_dict(checkpoints["model_state_dict"])

        self.optimizer.load_state_dict(checkpoints["optimizer_state_dict"])

    def restore_from_file(self, path: str, map_location: str = "cpu") -> None:
        """
        The method of loading trained PyTorch model from file.

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )
            clf.restore_from_file('path/to/trained/weights.pth')

        :param path: path to saved directory. : str
        :param map_location: default cpu: str
        :return: None
        """
        checkpoints = torch.load(path, map_location=map_location)
        self.restore_checkpoint(checkpoints)

### 3. Data load-in

In [7]:
# Args Values (Hardcoded)
timestep = 1.0
normalizer_state = None
imputation = 'previous'

train_reader = LengthOfStayReader(
    dataset_dir=os.path.join(data_dir, 'train'),
    listfile=os.path.join(data_dir, 'train_listfile.csv')
)

val_reader = LengthOfStayReader(
    dataset_dir=os.path.join(data_dir, 'train'),
    listfile=os.path.join(data_dir, 'val_listfile.csv')
)

discretizer = Discretizer(
    timestep=timestep,
    store_masks=True,
    impute_strategy='previous',
    start_time='zero'
)

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
normalizer_state = None

if normalizer_state is None:
    normalizer_state = 'los_ts{}.input_str-previous.start_time-zero.n5e4.normalizer'.format(timestep)
    normalizer_state = os.path.join(cur_path, normalizer_state)
normalizer.load_params(normalizer_state)

train_reader.random_shuffle()
val_reader.random_shuffle()

In [8]:
def preprocess_chunk(data, ts, discretizer, normalizer=None):
    data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)]
    if normalizer is not None:
        data = [normalizer.transform(X) for X in data]
    return data

def collate_fn(batch):
    xs, ys, ys_binned, lengths = zip(*batch)
    lengths = torch.tensor(lengths)

    # pad x to [batch, max_len, 76]
    padded_x = nn.utils.rnn.pad_sequence(xs, batch_first=True)

    # build mask
    max_len = padded_x.size(1)
    mask = torch.arange(max_len)[None, :] >= lengths[:, None]

    # labels to tensor
    ys = torch.tensor(ys)
    ys_binned = torch.tensor(ys_binned)

    return padded_x, ys, ys_binned, mask

class VarLenDataset(Dataset):
    def __init__(self, data, labels, labels_binned):
        self.data = data
        self.labels = labels
        self.labels_binned = labels_binned

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.tensor(self.data[idx], dtype=torch.float32)  # convert to tensor
        y = torch.tensor(self.labels[idx])
        y_binned = torch.tensor(self.labels_binned[idx])
        length = x.size(0)
        return x, y, y_binned, length

def load_data(reader, discretizer, normalizer, chunk_size=None):
    if chunk_size is None:
        N = reader.get_number_of_examples()
    else:
        N = chunk_size
        
    ret = common_utils.read_chunk(reader, N)
    data = ret["X"]
    ts = ret["t"]
    ys = ret["y"]
    names = ret["name"]
    
    data = preprocess_chunk(data, ts, discretizer, normalizer)
    
    ys_binned = [metrics.get_bin_custom(x, 10) for x in ys]
    
    y = np.array(ys)
    y_binned = np.array(ys_binned)
    
    dataset = VarLenDataset(data, y, y_binned)
    
    return dataset


In [9]:
test_reader = LengthOfStayReader(
    dataset_dir=os.path.join(data_dir, 'test'),
    listfile=os.path.join(data_dir, 'test_listfile.csv')
)

### 4. Train SAnD model

In [12]:
sand = SAnD(
    input_features = in_feature,
    n_heads = n_heads,
    n_class = num_class, 
    n_layers = num_layers, 
    d_model = d_model,
    dropout_rate = dropout_rate
)

# Build the model
model = NeuralNetworkClassifier(
    sand,
    nn.CrossEntropyLoss(),
    optim.Adam,
    optimizer_config=optimizer_config,
    experiment=None
)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmillikanevan[0m ([33mmillikanevan-personal[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
for i in range(20):

    train_ds = load_data(train_reader, discretizer, normalizer, 20000)
    val_ds = load_data(val_reader, discretizer, normalizer, 1000)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, num_workers=2)
    # val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, num_workers=2)

    model.fit(
        {
            "train": train_loader,
            # "val": val_loader
        },
        epochs=num_epochs,
        validation=False,
        checkpoint_path=data_dir
    )

[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [26:50<00:00, 12.42it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [24:17<00:00, 13.72it/s] 
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [26:54<00:00, 12.39it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [25:12<00:00, 13.23it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [24:09<00:00, 13.80it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [23:05<00:00, 14.44it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [24:51<00:00, 13.41it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [25:37<00:00, 13.01it/s] 
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [24:54<00:00, 13.38it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [25:27<00:00, 13.09it/s]
[36mTraining[0m - Epochs: 001/001: 100%|██████████| 20000/20000 [24:19<00:00, 13.70it/

### 5. Test SAnD model predictions

In [14]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / (y_true + 0.1))) * 100

class CustomBins:
    inf = 1e18
    bins = [(-inf, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 14), (14, +inf)]
    nbins = len(bins)
    means = [11.450379, 35.070846, 59.206531, 83.382723, 107.487817,
             131.579534, 155.643957, 179.660558, 254.306624, 585.325890]


def get_bin_custom(x, nbins, one_hot=False):
    for i in range(nbins):
        a = CustomBins.bins[i][0] * 24.0
        b = CustomBins.bins[i][1] * 24.0
        if a <= x < b:
            if one_hot:
                ret = np.zeros((CustomBins.nbins,))
                ret[i] = 1
                return ret
            return i
    return None


def get_estimate_custom(prediction, nbins):
    bin_id = np.argmax(prediction)
    assert 0 <= bin_id < nbins
    return CustomBins.means[bin_id]

def custom_evaluate_model(model, dataloader, criterion, device=device):
    model.eval()
    all_labels = []
    all_labels_binned = []
    all_probs = []

    total_loss = 0.0

    with torch.no_grad():
        for inputs, labels, labels_binned, pad_mask in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels_binned = labels_binned.to(device)
            pad_mask = pad_mask.to(device)

            logits = model(inputs, pad_mask)

            loss = criterion(logits, labels_binned)
            total_loss += loss.cpu().item()

            probs = F.softmax(logits, dim=1)

            all_labels.append(labels.cpu())
            all_labels_binned.append(labels_binned.cpu())
            all_probs.append(probs.cpu())

    
    y_true = torch.cat(all_labels).numpy()
    y_true_binned = torch.cat(all_labels_binned).numpy()
    y_pred = torch.cat(all_probs).numpy()
    y_pred_binned = np.argmax(y_pred, axis=1)

    return y_true, y_true_binned, y_pred, y_pred_binned

In [15]:
N = 525912
chunk_size = 21913
y_true = []
y_true_binned = []
y_pred = []
y_pred_binned = []
for i in tqdm.tqdm(range(N // chunk_size)):
    test_ds = load_data(test_reader, discretizer, normalizer, chunk_size)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, num_workers=2)
    _y_true, _y_true_binned, _y_pred, _y_pred_binned = custom_evaluate_model(model.model, test_loader, model.criterion)
    y_true.append(_y_true)
    y_true_binned.append(_y_true_binned)
    y_pred.append(_y_pred)
    y_pred_binned.append(_y_pred_binned)

y_true = np.concat(y_true)
y_true_binned = np.concat(y_true_binned)
y_pred = np.concat(y_pred)
y_pred_binned = np.concat(y_pred_binned)

# Must turn y_pred into bins
kappa = cohen_kappa_score(y_true_binned, y_pred_binned, weights="linear")

y_pred_means = [get_estimate_custom(pred_prob, 10) for pred_prob in y_pred]
# regression metrics (same as print_metrics_regression)
mad = mean_absolute_error(y_true, y_pred_means)
mse = mean_squared_error(y_true, y_pred_means)
mape = mean_absolute_percentage_error(y_true, y_pred_means)
    

100%|██████████| 24/24 [1:20:44<00:00, 201.86s/it]


In [16]:
model.run.log({
    'test_kappa' : kappa,
    "test_MSE" : mse,
    'test_MAPE' : mape
})
model.run.finish()

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
test_MAPE,▁
test_MSE,▁
test_kappa,▁

0,1
test_MAPE,118.25639
test_MSE,57298.02809
test_kappa,0.0


In [17]:
model.save_to_file(data_dir)

'/search-data/evan/length_of_stay_model/model_params-epochs_0-Thu_Nov_20_16:18:51_2025.pth'

### 6. Compare with paper baseline

In [None]:
run = wandb.init(
    project='sand-mimic3',
    config={
        "task" : "length_of_stay",
    },
    name='length-of-stay-baseline'
)
run.log({
    'test_kappa' : 0.429,
    "test_MSE" : 40373,
    'test_MAPE' : 167.3
})
run.finish()