# SAnD model predictions for Phenotyping

### 0. Read in libraries

In [1]:
# Standard library
import math
import os
import sys
import time
from copy import deepcopy
from pathlib import Path
from typing import Dict

# Third-party
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.preprocessing import label_binarize
from torch.utils.data import DataLoader, TensorDataset, Dataset
import tqdm
import wandb

cur_path = Path(".").resolve()
base_path = cur_path.parents[1]

print(f"Current File Path: {cur_path}")
print(f"Base Path: {base_path}")

os.chdir(str(base_path))
sys.path.append(str(base_path))

# Local modules
from mimic3benchmark.readers import PhenotypingReader
from mimic3models import common_utils
from mimic3models.phenotyping import utils
from mimic3models.preprocessing import Discretizer, Normalizer

Current File Path: /home/jovyan/mimic3-sand/mimic3models/phenotyping
Base Path: /home/jovyan/mimic3-sand


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [5]:
data_dir = os.getcwd() + '/data/phenotyping'

### 1. Define model parameters

In [13]:
# Define model parameters
in_feature = 76
seq_len = 300
n_heads = 8 # Number of heads for multi-head attention layer: Should be fixed at 8
factor = 120 # Dense interpolation factor (M): This depends on the task at hand
num_class = 25 # Number of output class
num_layers = 2 # Number of multi-head attention layers (N): This depends on the task at hand
d_model = 256 # Original 256
dropout_rate = 0.4
lookback = 96
optimizer_config = {
    'lr' : 0.0005,
    'betas' : (0.9, 0.98),
    'eps' : 1e-08,
}
num_epochs = 40
batch_size = 128 # Original 128

### 2. Define model architecture

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, seq_len) -> None:
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model

        pe = torch.zeros(seq_len, d_model)

        for pos in range(seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i) / d_model)))
                pe[pos, i+1] = math.cos(pos / (10000 ** ((2 * (i+1)) / d_model)))

        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x) -> torch.Tensor:
        seq_len = x.shape[1]
        x = math.sqrt(self.d_model) * x
        x = x + self.pe[:, :seq_len].requires_grad_(False)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, layer: nn.Module, embed_dim: int, seq_len, lookback=None, p=0.1) -> None:
        super(ResidualBlock, self).__init__()
        self.layer = layer
        self.dropout = nn.Dropout(p=p)
        self.norm = nn.LayerNorm(embed_dim)
        self.attn_weights = None
        self.seq_len = seq_len
        self.lookback = lookback

        # Build Attention Window Causal Mask
        with torch.no_grad():
            idx = torch.arange(seq_len)
            i = idx.unsqueeze(1)
            j = idx.unsqueeze(0)
            
            future_mask = j > i
            if lookback is None:
                invalid_attention_mask = torch.zeros_like(future_mask)
            else:
                invalid_attention_mask = j < (i - lookback)
            
            # Set mask to 1 for all values that we shouldn't be paying attention to
            combined_mask = future_mask | invalid_attention_mask
            combined_mask = combined_mask.to(torch.bool)
            
        self.register_buffer("attn_mask", combined_mask)

    def forward(self, x: torch.Tensor, pad_mask = None) -> torch.Tensor:
        """
        :param x: [N, seq_len, features]
        :return: [N, seq_len, features]
        """
        if isinstance(self.layer, nn.MultiheadAttention):
            src = x.transpose(0, 1)     # [seq_len, N, features]
            
            # Combine the padding mask and lookback mask into a single attention mask.
            # In some cases, a query may be completely masked out by the combination of lookback and padding,
            # which would result in all -inf attention scores and produce NaNs after softmax.
            # To prevent this, we always allow each query to attend to itself by unmasking the diagonal.
            mask = self.attn_mask.to(device)
            mask = mask.unsqueeze(0).expand(x.shape[0] * self.layer.num_heads, -1, -1)
            pad_mask = pad_mask.repeat_interleave(self.layer.num_heads, dim=0)
            final_mask = mask + pad_mask.unsqueeze(1)
            diagonal = torch.arange(self.seq_len, device=device)
            final_mask[:, diagonal, diagonal] = False

            output, self.attn_weights = self.layer(src, src, src, attn_mask=final_mask)
            output = output.transpose(0, 1)     # [N, seq_len, features]

        else:
            output = self.layer(x)

        output = self.dropout(output)
        output = self.norm(x + output)
        return output

class PositionWiseFeedForward(nn.Module):
    def __init__(self, hidden_size: int) -> None:
        super(PositionWiseFeedForward, self).__init__()
        self.hidden_size = hidden_size

        self.conv = nn.Sequential(
            nn.Conv1d(hidden_size, hidden_size * 2, 1),
            nn.ReLU(),
            nn.Conv1d(hidden_size * 2, hidden_size, 1)
        )

    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
        tensor = tensor.transpose(1, 2)
        tensor = self.conv(tensor)
        tensor = tensor.transpose(1, 2)

        return tensor

class EncoderBlock(nn.Module):
    def __init__(self, embed_dim: int, num_head: int, seq_len, lookback=None, dropout_rate=0.1) -> None:
        super(EncoderBlock, self).__init__()
        self.attention = ResidualBlock(
            nn.MultiheadAttention(embed_dim, num_head), embed_dim, seq_len, lookback, p=dropout_rate
        )
        self.ffn = ResidualBlock(PositionWiseFeedForward(embed_dim), embed_dim, seq_len, lookback, p=dropout_rate)

    def forward(self, x: torch.Tensor, pad_mask) -> torch.Tensor:
        x = self.attention(x, pad_mask)
        x = self.ffn(x)
        return x
    
class DenseInterpolation(nn.Module):
    def __init__(self, seq_len: int, factor: int) -> None:
        """
        :param seq_len: sequence length
        :param factor: factor M
        """
        super(DenseInterpolation, self).__init__()

        s = torch.linspace(factor / seq_len, factor, steps=seq_len)
        m = torch.arange(1, factor + 1).unsqueeze(1)
        
        tmp = 1 - torch.abs(s - m) / factor
        w = tmp.clamp(min=0).pow(2)

        W = w.unsqueeze(0)
        self.register_buffer("W", W)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        w = self.W.expand(x.size(0), -1, -1)
        u = torch.bmm(w, x)
        return u.transpose(1, 2)

class ClassificationModule(nn.Module):
    def __init__(self, d_model: int, factor: int, num_class: int) -> None:
        super(ClassificationModule, self).__init__()
        self.d_model = d_model
        self.factor = factor
        self.num_class = num_class

        self.fc = nn.Linear(int(d_model * factor), num_class)

        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.contiguous().view(-1, int(self.factor * self.d_model))
        x = self.fc(x)
        return x

class RegressionModule(nn.Module):
    def __init__(self, d_model: int, factor: int, output_size: int) -> None:
        super(RegressionModule, self).__init__()
        self.d_model = d_model
        self.factor = factor
        self.output_size = output_size
        self.fc = nn.Linear(int(d_model * factor), output_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.contiguous().view(-1, int(self.factor * self.d_model))
        x = self.fc(x)
        return x


In [4]:
class EncoderLayerForSAnD(nn.Module):
    def __init__(self, input_features, seq_len, n_heads, n_layers, d_model=128, lookback=None, dropout_rate=0.2) -> None:
        super(EncoderLayerForSAnD, self).__init__()
        self.d_model = d_model

        self.input_embedding = nn.Conv1d(input_features, d_model, 1)
        self.positional_encoding = PositionalEncoding(d_model, seq_len)
        self.blocks = nn.ModuleList([
            EncoderBlock(d_model, n_heads, seq_len, lookback, dropout_rate) for _ in range(n_layers)
        ])

    def forward(self, x: torch.Tensor, pad_mask) -> torch.Tensor:
        x = x.transpose(1, 2)
        x = self.input_embedding(x)
        x = x.transpose(1, 2)

        x = self.positional_encoding(x)

        for l in self.blocks:
            x = l(x, pad_mask)

        return x

class SAnD(nn.Module):
    """
    Simply Attend and Diagnose model

    The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)

    `Attend and Diagnose: Clinical Time Series Analysis Using Attention Models <https://arxiv.org/abs/1711.03905>`_
    Huan Song, Deepta Rajan, Jayaraman J. Thiagarajan, Andreas Spanias
    """
    def __init__(
            self, input_features: int, seq_len: int, n_heads: int, factor: int,
            n_class: int, n_layers: int, d_model: int = 128, lookback=None, dropout_rate: float = 0.2
    ) -> None:
        super(SAnD, self).__init__()
        
        self.hyperparams = {
             "input_features" : input_features,
            "seq_len" : seq_len,
            "n_heads" : n_heads, 
            "factor" : factor,
            "n_class" : n_class,
            "n_layers" : n_layers,
            "d_model" : d_model,
            "lookback" : lookback,
            "dropout_rate" : dropout_rate
        }
        
        self.encoder = EncoderLayerForSAnD(input_features, seq_len, n_heads, n_layers, d_model, lookback, dropout_rate)
        self.dense_interpolation = DenseInterpolation(seq_len, factor)
        self.clf = ClassificationModule(d_model, factor, n_class)

    def forward(self, x: torch.Tensor, pad_mask) -> torch.Tensor:
        x = self.encoder(x, pad_mask)
        x = self.dense_interpolation(x)
        x = self.clf(x)
        return x


In [5]:
def evaluate_model(model, dataloader, criterion=None, device=device, num_classes=25):
    model.eval()
    all_labels = []
    all_probs = []

    total_loss = 0.0
    with torch.no_grad():
        for inputs, labels, pad_mask in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            logits = model(inputs, pad_mask)  # shape: [batch_size, num_classes]
            loss = criterion(logits, labels)
            
            probs = F.softmax(logits, dim=1)  # probabilities for all classes

            all_labels.append(labels.cpu())
            all_probs.append(probs.cpu())
            
            total_loss += loss.cpu().item()

    all_labels = torch.cat(all_labels).numpy()
    all_probs = torch.cat(all_probs).numpy()

    # Multi-class classification
    all_labels_bin = label_binarize(all_labels, classes=list(range(num_classes)))
    auc_micro = roc_auc_score(all_labels_bin, all_probs, average='micro', multi_class='ovr')
    auc_macro = roc_auc_score(all_labels_bin, all_probs, average='macro', multi_class='ovr')
    auc_weighted = roc_auc_score(all_labels_bin, all_probs, average='weighted', multi_class='ovr')
    return {
        "loss" : total_loss,
        "AUROC_micro": auc_micro,
        "AUROC_macro": auc_macro,
        "AUROC_weighted": auc_weighted
    }

In [6]:
class NeuralNetworkClassifier:
    def __init__(self, model, criterion, optimizer, optimizer_config: dict, experiment) -> None:
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.optimizer = optimizer(self.model.parameters(), **optimizer_config)
        self.criterion = criterion

        self.hyper_params = optimizer_config
        self._start_epoch = 0
        self.hyper_params["epochs"] = self._start_epoch
        self.__num_classes = None
        self._is_parallel = False
        
        self.run = wandb.init(
            project='sand-mimic3',
            config={
                "task" : "phenotype",
                "hyperparams" : self.model.hyperparams
            }
        )

        if torch.cuda.device_count() > 1:
            self.model = nn.DataParallel(self.model)
            self._is_parallel = True

            notice = "Running on {} GPUs.".format(torch.cuda.device_count())
            print("\033[33m" + notice + "\033[0m")
            
            

    def fit(self, loader: Dict[str, DataLoader], epochs: int, checkpoint_path: str = None, validation: bool = True) -> None:
        len_of_train_dataset = len(loader["train"].dataset)
        epochs = epochs + self._start_epoch

        self.hyper_params["epochs"] = epochs
        self.hyper_params["batch_size"] = loader["train"].batch_size
        self.hyper_params["train_ds_size"] = len_of_train_dataset
        
        best_model = None
        best_val_loss = float('inf')

        if validation:
            len_of_val_dataset = len(loader["val"].dataset)
            self.hyper_params["val_ds_size"] = len_of_val_dataset

        for epoch in range(self._start_epoch, epochs):
            if checkpoint_path is not None and epoch % 10 == 0:
                self.save_to_file(checkpoint_path)

            correct = 0.0
            total = 0.0

            self.model.train()
            pbar = tqdm.tqdm(total=len_of_train_dataset)
            total_loss = 0
            for x, y, pad_mask in loader["train"]:
                b_size = y.shape[0]
                total += y.shape[0]
                x = x.to(self.device) if isinstance(x, torch.Tensor) else [i.to(self.device) for i in x]
                y = y.to(self.device)
                pad_mask = pad_mask.to(self.device)

                pbar.set_description(
                    "\033[36m" + "Training" + "\033[0m" + " - Epochs: {:03d}/{:03d}".format(epoch+1, epochs)
                )
                pbar.update(b_size)

                self.optimizer.zero_grad()
                outputs = self.model(x, pad_mask)
                loss = self.criterion(outputs, y)
                loss.backward()
                self.optimizer.step()

                _, predicted = torch.max(outputs, 1)
                correct += (predicted == y).sum().float().cpu().item()

                total_loss += loss.cpu().item()

            total_loss = total_loss / len(loader['train'])

            if validation:
                with torch.no_grad():
                    self.model.eval()
                    eval_result = evaluate_model(self.model, loader['val'], criterion=self.criterion)

                    if eval_result['loss'] < best_val_loss:
                        best_val_loss = eval_result['loss']
                        state = self.model.module.state_dict() if self._is_parallel else self.model.state_dict()
                        best_model = deepcopy(state)

            if validation:
                self.run.log({
                    'train_loss' : total_loss, 
                    'train_accuracy' : float(correct / total), 
                    'val_loss' : eval_result['loss'], 
                    'val_AUROC_micro' : eval_result['AUROC_micro'],
                    "val_AUROC_macro" : eval_result['AUROC_macro'],
                    'val_AUROC_weighted' : eval_result['AUROC_weighted']
                })
            else:
                self.run.log({
                    'train_loss' : total_loss, 
                    'train_accuracy' : float(correct / total)
                })

            pbar.close()

        if best_model is not None:
            if self._is_parallel:
                self.model.module.load_state_dict(best_model)
            else:
                self.model.load_state_dict(best_model)
        
        if checkpoint_path is not None:
            self.save_to_file(checkpoint_path)

    def save_checkpoint(self) -> dict:
        """
        The method of saving trained PyTorch model.

        Note,  return value contains
            - the number of last epoch as `epochs`
            - optimizer state as `optimizer_state_dict`
            - model state as `model_state_dict`

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )

            clf.fit(train_loader, epochs=10)
            checkpoints = clf.save_checkpoint()

        :return: dict {'epoch', 'optimizer_state_dict', 'model_state_dict'}
        """

        checkpoints = {
            "epoch": deepcopy(self.hyper_params["epochs"]),
            "optimizer_state_dict": deepcopy(self.optimizer.state_dict())
        }

        if self._is_parallel:
            checkpoints["model_state_dict"] = deepcopy(self.model.module.state_dict())
        else:
            checkpoints["model_state_dict"] = deepcopy(self.model.state_dict())

        return checkpoints

    def save_to_file(self, path: str) -> str:
        """
        | The method of saving trained PyTorch model to file.
        | Those weights are uploaded to comet.ml as backup.
        | check "Asserts".

        Note, .pth file contains
            - the number of last epoch as `epochs`
            - optimizer state as `optimizer_state_dict`
            - model state as `model_state_dict`

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )

            clf.fit(train_loader, epochs=10)
            filename = clf.save_to_file('path/to/save/dir/')

        :param path: path to saving directory. : string
        :return: path to file : string
        """
        if not os.path.isdir(path):
            os.mkdir(path)

        file_name = "model_params-epochs_{}-{}.pth".format(
            self.hyper_params["epochs"], time.ctime().replace(" ", "_")
        )
        path = path + file_name

        checkpoints = self.save_checkpoint()

        torch.save(checkpoints, path)

        return path

    def restore_checkpoint(self, checkpoints: dict) -> None:
        """
        The method of loading trained PyTorch model.

        :param checkpoints: dictionary which contains {'epoch', 'optimizer_state_dict', 'model_state_dict'}
        :return: None
        """
        self._start_epoch = checkpoints["epoch"]
        if not isinstance(self._start_epoch, int):
            raise TypeError

        if self._is_parallel:
            self.model.module.load_state_dict(checkpoints["model_state_dict"])
        else:
            self.model.load_state_dict(checkpoints["model_state_dict"])

        self.optimizer.load_state_dict(checkpoints["optimizer_state_dict"])

    def restore_from_file(self, path: str, map_location: str = "cpu") -> None:
        """
        The method of loading trained PyTorch model from file.

        ::

            clf = NeuralNetworkClassifier(
                    Network(), nn.CrossEntropyLoss(),
                    optim.Adam, optimizer_config, experiment
                )
            clf.restore_from_file('path/to/trained/weights.pth')

        :param path: path to saved directory. : str
        :param map_location: default cpu: str
        :return: None
        """
        checkpoints = torch.load(path, map_location=map_location)
        self.restore_checkpoint(checkpoints)



### 3. Data load-in

In [7]:
# Args Values (Hardcoded)
timestep = 1.0
normalizer_state = None
imputation = 'previous'

train_reader = PhenotypingReader(
    dataset_dir=os.path.join(data_dir, 'train'),
    listfile=os.path.join(data_dir, 'train_listfile.csv'),
)

val_reader = PhenotypingReader(
    dataset_dir=os.path.join(data_dir, 'train'),
    listfile=os.path.join(data_dir, 'val_listfile.csv'),
)

discretizer = Discretizer(
    timestep=float(timestep),
    store_masks=True,
    impute_strategy='previous',
    start_time='zero'
)

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
normalizer_state = normalizer_state
if normalizer_state is None:
    normalizer_state = 'ph_ts{}.input_str-previous.start_time-zero.normalizer'.format(timestep)
    normalizer_state = os.path.join(cur_path, normalizer_state)
normalizer.load_params(normalizer_state)


In [8]:
def load_data(reader, discretizer, normalizer, max_len, small_part=False):
    N = reader.get_number_of_examples()
    if small_part:
        N = 1000

    ret = common_utils.read_chunk(reader, N)
    data = ret["X"]
    ts = ret["t"]
    ys = ret["y"]
    names = ret["name"]

    # Apply discretizer and normalizer
    data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)]
    if normalizer is not None:
        data = [normalizer.transform(X) for X in data]

    # Pad sequences so they all have the same length
    in_feat = data[0].shape[1]
    data_padded = np.zeros((len(data), max_len, in_feat), dtype=np.float32)
    mask = np.zeros((len(data), max_len), dtype=np.bool_)
    for i, x in enumerate(data):
        data_padded[i, :x.shape[0], :] = x[-max_len:].astype(np.float32)
        mask[i, x.shape[0]:] = True

    # Convert labels to array
    ys = np.array(ys, dtype=np.int32)
    ys_int = np.argmax(ys, axis=1)
    
    X = torch.tensor(data_padded, dtype=torch.float32)
    y = torch.tensor(ys_int, dtype=torch.long)
    mask = torch.tensor(mask, dtype=torch.bool)
    

    return (X, y, mask)

In [9]:
small_part = False

train_x, train_y, train_mask = load_data(train_reader, discretizer, normalizer, max_len=300, small_part=small_part)
val_x, val_y, val_mask = load_data(val_reader, discretizer, normalizer, max_len=300, small_part=small_part)

train_ds = TensorDataset(train_x, train_y, train_mask)
val_ds = TensorDataset(val_x, val_y, val_mask)

In [10]:
test_reader = PhenotypingReader(
    dataset_dir=os.path.join(data_dir, 'test'),
    listfile=os.path.join(data_dir, 'test_listfile.csv')
)
test_x, test_y, test_mask = load_data(
    test_reader, 
    discretizer, 
    normalizer, 
    max_len=300,
    small_part=small_part
)

test_ds = TensorDataset(test_x, test_y, test_mask)


In [11]:
torch.save(train_x, f"{data_dir}/phenotype_train_x.pt")
torch.save(train_y, f"{data_dir}/phenotype_train_y.pt")
torch.save(train_mask, f"{data_dir}/phenotype_train_mask.pt")
torch.save(val_x,   f"{data_dir}/phenotype_val_x.pt")
torch.save(val_y,   f"{data_dir}/phenotype_val_y.pt")
torch.save(val_mask,   f"{data_dir}/phenotype_val_mask.pt")
torch.save(test_x, f"{data_dir}/phenotype_test_x.pt")
torch.save(test_y, f"{data_dir}/phenotype_test_y.pt")
torch.save(test_mask, f"{data_dir}/phenotype_test_mask.pt")

### 4. Train SAnD model

In [13]:
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4)

In [14]:
sand = SAnD(
    input_features = in_feature, 
    seq_len = seq_len, 
    n_heads = n_heads, 
    factor = factor,        
    n_class = num_class,
    n_layers = num_layers, 
    d_model = d_model, 
    lookback = lookback, 
    dropout_rate = dropout_rate
)

# Build the model
model = NeuralNetworkClassifier(
    sand,
    nn.CrossEntropyLoss(),
    optim.Adam,
    optimizer_config=optimizer_config,
    experiment=None
)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmillikanevan[0m ([33mmillikanevan-personal[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
model.fit(
    {"train": train_loader,
     "val": val_loader},
    epochs = num_epochs,
    checkpoint_path=data_dir
)


[36mTraining[0m - Epochs: 001/040: 100%|██████████| 29250/29250 [07:38<00:00, 63.76it/s]
[36mTraining[0m - Epochs: 002/040: 100%|██████████| 29250/29250 [07:10<00:00, 67.89it/s]
[36mTraining[0m - Epochs: 003/040: 100%|██████████| 29250/29250 [07:05<00:00, 68.73it/s]
[36mTraining[0m - Epochs: 004/040: 100%|██████████| 29250/29250 [07:07<00:00, 68.42it/s]
[36mTraining[0m - Epochs: 005/040: 100%|██████████| 29250/29250 [07:17<00:00, 66.89it/s]
[36mTraining[0m - Epochs: 006/040: 100%|██████████| 29250/29250 [07:21<00:00, 66.24it/s]
[36mTraining[0m - Epochs: 007/040: 100%|██████████| 29250/29250 [07:20<00:00, 66.44it/s]
[36mTraining[0m - Epochs: 008/040: 100%|██████████| 29250/29250 [07:20<00:00, 66.36it/s]
[36mTraining[0m - Epochs: 009/040: 100%|██████████| 29250/29250 [07:21<00:00, 66.31it/s]
[36mTraining[0m - Epochs: 010/040: 100%|██████████| 29250/29250 [07:18<00:00, 66.77it/s]
[36mTraining[0m - Epochs: 011/040: 100%|██████████| 29250/29250 [07:20<00:00, 66.47it/s]

### 5. Test SAnD model predictions

In [None]:
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
test_result = evaluate_model(model.model, test_loader, model.criterion)

In [None]:
model.run.log({
    'test_AUROC_micro' : test_result['AUROC_micro'],
    "test_AUROC_macro" : test_result['AUROC_macro'],
    'test_AUROC_weighted' : test_result['AUROC_weighted']
})
model.run.finish()

In [None]:
model.save_to_file(data_dir)

### 6. Compare with paper baseline

In [16]:
run = wandb.init(
    project='sand-mimic3',
    config={
        "task" : "phenotype",
    },
    name='phenotyping-baseline'
)
run.log({
    'test_AUROC_micro' : 0.816,
    "test_AUROC_macro" : 0.766,
    'test_AUROC_weighted' : 0.754
})
run.finish()

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
train_accuracy,▁▁▁▂▂▂▂▂▃▃▃▄▅▅▆▆▇▇██
train_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_AUROC_macro,▁▁▁▂▁▄▄▄▃▄▃▂▄▃▅▅▃▄█▆
val_AUROC_micro,▁▅▆▇▇▇█▅▆▇▅▃▃▅▇▃▄██▇
val_AUROC_weighted,▁▂▃▃▄▅▅▄▃▄▃▁▃▄▅▅▃▆█▆
val_loss,█▅▄▃▃▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁

0,1
train_accuracy,0.31682
train_loss,2.44181
val_AUROC_macro,0.60217
val_AUROC_micro,0.73259
val_AUROC_weighted,0.60625
val_loss,286.17506


0,1
test_AUROC_macro,▁
test_AUROC_micro,▁
test_AUROC_weighted,▁

0,1
test_AUROC_macro,0.766
test_AUROC_micro,0.816
test_AUROC_weighted,0.754
