# Healthcare No Show Modeling

- Train a ML model to forecast no show
- Model analysis

In [1]:
import sys
sys.path.append("../..")  # add src to environment path so that custom modules can be found

import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

## Load dataset

In [2]:
idx_cross_val = 0
full_dataset = torch.load(
    f"../../data/healthcare_no_show/healthcare_datasets_{idx_cross_val}.pt",
    weights_only=False
)
train_dataset = full_dataset["train_dataset"]
val_dataset = full_dataset["val_dataset"]
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

## Define model

In [None]:
class TransformerClassifier(nn.Module):
    """A transformer-based classifier for sequence data.
    
    This model uses a transformer encoder architecture followed by a classification layer
    to perform sequence classification tasks.
    
    Args:
        input_dim (int): Dimension of input features
        num_classes (int): Number of output classes
        d_model (int, optional): Dimension of transformer model. Defaults to 512.
        nhead (int, optional): Number of attention heads. Defaults to 8.
        num_encoder_layers (int, optional): Number of transformer encoder layers. Defaults to 3.
        dim_feedforward (int, optional): Dimension of feedforward network. Defaults to 2048.
        dropout (float, optional): Dropout rate. Defaults to 0.1.
    """
    
    def __init__(
            self, 
            input_dim: int, 
            num_classes: int, 
            d_model: int = 512, 
            nhead: int = 8, 
            num_encoder_layers: int = 3, 
            dim_feedforward: int = 2048, 
            dropout: float = 0.1
    ) -> None:
        super().__init__()
        
        # Input projection layer
        self.input_projection = nn.Linear(input_dim, d_model)
        self.bn_input = nn.LayerNorm(d_model)
        
        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_encoder_layers
        )
        
        # Output classifier
        self.classifier = nn.Linear(d_model, num_classes)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Forward pass of the model.
        
        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_dim)
            
        Returns:
            torch.Tensor: Output tensor of shape (batch_size, num_classes)
        """
        # Project input to d_model dimensions
        x = self.input_projection(x)
        x = self.bn_input(x)
        
        # Apply transformer encoder
        x = self.transformer_encoder(x.unsqueeze(1))  # Add sequence dimension
        
        # Classification layer
        output = self.classifier(x.squeeze())
        return output

## Hyperparameters and functions initialization

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCEWithLogitsLoss()

In [11]:
exp = 3
writer = SummaryWriter(f"../../runs/healthcare_no_show_exp{exp}_data{idx_cross_val}")

In [12]:
torch.manual_seed(1234)
n_features = train_dataset[0][0].shape[0]
n_classes = 1
model = TransformerClassifier(
    input_dim=n_features,
    num_classes=n_classes,
    num_encoder_layers=3
).to(device)

In [13]:
match exp:
    case 0:
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-2)  # exp0
    case 1:
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)  # exp1
    case 2:
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)  # exp2
    case 3:
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)  # exp3
    case _:
        raise ValueError(f"Unknown experiment {exp}")
    
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 90], gamma=0.1)

## Train model

In [14]:
num_epochs = 100
for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()
    for iter_idx, (features, labels) in enumerate(train_loader):
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels.unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        writer.add_scalar("Loss/train", loss.item(), epoch * len(train_loader) + iter_idx)
        
    scheduler.step()
    
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        for features, labels in val_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            val_loss += criterion(outputs, labels.unsqueeze(1)).item()

        writer.add_scalar("Loss/val", val_loss / len(val_loader), epoch)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

Epoch [1/100], Loss: 0.5037, Val Loss: 0.4748
Epoch [2/100], Loss: 0.4589, Val Loss: 0.4854
Epoch [3/100], Loss: 0.4602, Val Loss: 0.4598
Epoch [4/100], Loss: 0.4692, Val Loss: 0.4695
Epoch [5/100], Loss: 0.4641, Val Loss: 0.4723
Epoch [6/100], Loss: 0.4706, Val Loss: 0.4756
Epoch [7/100], Loss: 0.4658, Val Loss: 0.4704
Epoch [8/100], Loss: 0.4658, Val Loss: 0.4791
Epoch [9/100], Loss: 0.4646, Val Loss: 0.4627
Epoch [10/100], Loss: 0.4584, Val Loss: 0.4664
Epoch [11/100], Loss: 0.4598, Val Loss: 0.4690
Epoch [12/100], Loss: 0.4664, Val Loss: 0.4693
Epoch [13/100], Loss: 0.4917, Val Loss: 0.4960
Epoch [14/100], Loss: 0.4927, Val Loss: 0.4964
Epoch [15/100], Loss: 0.4703, Val Loss: 0.4764
Epoch [16/100], Loss: 0.4672, Val Loss: 0.4708
Epoch [17/100], Loss: 0.4672, Val Loss: 0.4709
Epoch [18/100], Loss: 0.4672, Val Loss: 0.4702
Epoch [19/100], Loss: 0.4672, Val Loss: 0.4709
Epoch [20/100], Loss: 0.4671, Val Loss: 0.4705
Epoch [21/100], Loss: 0.4670, Val Loss: 0.4700
Epoch [22/100], Loss: 

## Compute accuracy

In [15]:
accuracies = []
model.eval()
with torch.no_grad():
    for features, labels in val_loader:
        features = features.to(device)
        outputs = model(features)
        predictions = torch.sigmoid(outputs.squeeze()).cpu().numpy()
        accuracies.append((predictions > 0.5) == labels.numpy())

print(f"Validation Accuracy: {np.concat(accuracies, axis=0).mean():.4f}")

Validation Accuracy: 0.7960


Results (acc1 refers to accuracy of dataset 1):
| exp | opt type | weight decay | acc0 | acc1 | acc2 | avg acc |
|----|----|----|----|----|----|----|
| 0 | Adam | 1e-2 | 0.7955 | ? | ? | ? |
| 1 | Adam | 1e-4 | 0.7960 | ? | ? | ? |
| 2 | AdamW | 1e-2 | 0.7960 | ? | ? | ? |
| 3 | AdamW | 1e-4 | 0.7960 | ? | ? | ? |

## Store model

In [None]:
torch.save(model.state_dict(), f"../../models/healthcare_no_show/transformer_classifier_exp{exp}_data{idx_cross_val}.pth")