# Healthcare No Show Modeling

- Train a ML model to forecast no show
- Model analysis

In [17]:
import sys
sys.path.append("../..")  # add src to environment path so that custom modules can be found

import numpy as np
from sklearn.metrics import precision_recall_fscore_support

import torch
from torch import nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from torch.utils.tensorboard import SummaryWriter

from src.models.logistic_regression import LogisticRegressionClassifier
from src.models.mlp import MLPClassifier
from src.models.fttransformer import FTTransformerClassifier

## Load dataset

In [4]:
idx_cross_val = 0
full_dataset = torch.load(
    f"../../data/healthcare_no_show/healthcare_datasets_base_{idx_cross_val}.pt",
    weights_only=False
)
train_dataset = full_dataset["train_dataset"]
val_dataset = full_dataset["val_dataset"]
feature_sizes = full_dataset["feature_sizes"]
n_classes = full_dataset["class_size"] if full_dataset["class_size"] > 2 else 1
sampler = WeightedRandomSampler(torch.DoubleTensor(full_dataset["sample_weights"]), int(full_dataset["total_samples"]))
train_loader = DataLoader(train_dataset, batch_size=128, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=64)

## Hyperparameters, functions, and model initialization

In [None]:
# For tensorboard
writer = SummaryWriter(f"../../runs/healthcare_no_show_data{idx_cross_val}")

In [5]:
# Initializations that don't change with experiments
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCEWithLogitsLoss()

In [6]:
# Reset random seed
torch.manual_seed(1234)

<torch._C.Generator at 0x7ced3c2ebb30>

In [None]:
model = LogisticRegressionClassifier(
    feature_cats=feature_sizes,
    num_classes=n_classes,
).to(device)

In [None]:
model = MLPClassifier(
    feature_cats=feature_sizes,
    num_classes=n_classes,
    num_hidden_neurons=3,
    num_hidden_layers=256
).to(device)

In [7]:
model = FTTransformerClassifier(
    feature_cats=feature_sizes,
    num_classes=n_classes,
    d_model=256,
    num_encoder_layers=3,
    dim_feedforward=384,
    dropout=0.1
).to(device)

In [8]:
params_set1 = []
params_set2 = []
for name, param in model.named_parameters():
    if "cls_token" not in name and "embeddings" not in name and "bias" not in name and "norm" not in name:
        params_set1.append(param)
    else:
        params_set2.append(param)

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)

In [None]:
# Used by FT-Transformer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)

In [9]:
# Separate optimizer for bias, norm and inputs with weight decay = 0
optimizer1 = torch.optim.AdamW(params_set1, lr=1e-4, weight_decay=1e-5)
optimizer2 = torch.optim.AdamW(params_set2, lr=1e-4, weight_decay=0)
optimizer = [optimizer1, optimizer2]

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, weight_decay=1e-2)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)

In [None]:
# Schedule learning rate reduction
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 90], gamma=0.1)

## Train model

In [10]:
num_epochs = 100
for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()
    for iter_idx, (features, labels) in enumerate(train_loader):
        features, labels = features.to(device), labels.to(device)

        if isinstance(optimizer, list):
            [x.zero_grad() for x in optimizer]
        else:
            optimizer.zero_grad()

        outputs = model(features)
        loss = criterion(outputs, labels.unsqueeze(1))
        loss.backward()

        if isinstance(optimizer, list):
            [x.step() for x in optimizer]
        else:
            optimizer.step()

        running_loss += loss.item()

        if "writer" in globals():
            writer.add_scalar("Loss/train", loss.item(), epoch * len(train_loader) + iter_idx)
    
    if "scheduler" in globals():
        scheduler.step()
    
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        for features, labels in val_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            val_loss += criterion(outputs, labels.unsqueeze(1)).item()

        if "writer" in globals():
            writer.add_scalar("Loss/val", val_loss / len(val_loader), epoch)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

Epoch [1/100], Loss: 0.6547, Val Loss: 0.6562
Epoch [2/100], Loss: 0.6259, Val Loss: 0.5820
Epoch [3/100], Loss: 0.6178, Val Loss: 0.6155
Epoch [4/100], Loss: 0.6161, Val Loss: 0.6005
Epoch [5/100], Loss: 0.6083, Val Loss: 0.5864
Epoch [6/100], Loss: 0.6042, Val Loss: 0.6064
Epoch [7/100], Loss: 0.6082, Val Loss: 0.5694
Epoch [8/100], Loss: 0.6032, Val Loss: 0.5943
Epoch [9/100], Loss: 0.6002, Val Loss: 0.6596
Epoch [10/100], Loss: 0.5962, Val Loss: 0.6197
Epoch [11/100], Loss: 0.5995, Val Loss: 0.6089
Epoch [12/100], Loss: 0.5969, Val Loss: 0.6150
Epoch [13/100], Loss: 0.5998, Val Loss: 0.5965
Epoch [14/100], Loss: 0.5945, Val Loss: 0.5987
Epoch [15/100], Loss: 0.5948, Val Loss: 0.5947
Epoch [16/100], Loss: 0.5927, Val Loss: 0.6112
Epoch [17/100], Loss: 0.5974, Val Loss: 0.5713
Epoch [18/100], Loss: 0.5931, Val Loss: 0.6152
Epoch [19/100], Loss: 0.5966, Val Loss: 0.6117
Epoch [20/100], Loss: 0.5945, Val Loss: 0.6105
Epoch [21/100], Loss: 0.5924, Val Loss: 0.6002
Epoch [22/100], Loss: 

## Compute accuracy

In [None]:
threshold = 0.9
preds = []
trues = []
model.eval()
with torch.no_grad():
    for features, labels in val_loader:
        features = features.to(device)
        outputs = model(features)
        predictions = torch.sigmoid(outputs.squeeze()).cpu().numpy()
        preds.append(predictions)
        trues.append(labels.numpy())

preds = np.concat(preds, axis=0) > threshold
trues = np.concat(trues, axis=0)
precision, recall, fscore, _ = precision_recall_fscore_support(preds, trues, average='weighted')
print(f"Validation Accuracy: {(preds == trues).mean():.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F-score: {fscore:.4f}")

Validation Accuracy: 0.7948
Precision: 0.9946201071156904
Recall: 0.7947975571137752
F-score: 0.8824003890680735


Notes:
* acc1 refers to accuracy of dataset 1

Results of MLP:

| exp | opt type | LR | weight decay | acc0 | acc1 | acc2 | avg acc |
|----|----|----|----|----|----|----|----|
| ?* | SGD | 1e-2 | 1e-4 | 0.7961 | ? | ? | ? |
| ? | SGD | 1e-3 | 1e-2 | 0.7961 | ? | ? | ? |
| ?* | Adam | 1e-3 | 0 | ? | ? | ? | ? |
| ?* | Adam | 1e-3 | 1e-5 | ? | ? | ? | ? |
| ? | Adam | 1e-3 | 1e-2 | ? | ? | ? | ? |
| ? | Adam | 1e-3 | 1e-4 | ? | ? | ? | ? |
| ?* | AdamW | 1e-4 | 1e-2 | ? | ? | ? | ? |
| ?* | AdamW | 1e-3 | 1e-2 | ? | ? | ? | ? |
| ? | AdamW | 1e-3 | 1e-4 | ? | ? | ? | ? |

LR: 0.7915

smaller lr no scheduler: 0.7937

MLP: 0.7944

TFTrans: 0.7944

smaller lr no scheduler: 0.7951

## Store model

In [None]:
torch.save(model.state_dict(), f"../../models/healthcare_no_show/transformer_classifier_data{idx_cross_val}.pth")