In [1]:
import time
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.optim as optim
import torchvision
from torchvision.models.efficientnet import MBConvConfig, FusedMBConvConfig

import optuna

sys.path.append("/jet/home/azhang19/stat 214/stat-214-lab2-group6/code/modeling")
from preprocessing import to_NCHW, pad_to_384x384, standardize_images
from autoencoder import EfficientNetEncoder, EfficientNetDecoder, AutoencoderConfig

device = "cuda" if torch.cuda.is_available() else "cpu"

torch.set_float32_matmul_precision('high')
torch.backends.cudnn.benchmark = True

use_amp = True

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load and preprocess data
data = np.load("/jet/home/azhang19/stat 214/stat-214-lab2-group6/data/array_data.npz")
unlabeled_images, unlabeled_masks, labeled_images, labeled_masks, labels = data["unlabeled_images"], data["unlabeled_masks"], data["labeled_images"], data["labeled_masks"], data["labels"]

unlabeled_images = pad_to_384x384(to_NCHW(unlabeled_images))
unlabeled_masks = pad_to_384x384(unlabeled_masks)

labeled_images = pad_to_384x384(to_NCHW(labeled_images))
labeled_masks = pad_to_384x384(labeled_masks)
labels = pad_to_384x384(labels)

# Convert to tensors and move to GPU
unlabeled_images = torch.tensor(unlabeled_images, dtype=torch.float32).to(device)  # [161, 8, 384, 384]
unlabeled_masks = torch.tensor(unlabeled_masks, dtype=torch.bool).to(device)    # [161, 384, 384]

labeled_images = torch.tensor(labeled_images, dtype=torch.float32).to(device)      # [3, 8, 384, 384]
labeled_masks = torch.tensor(labeled_masks, dtype=torch.bool).to(device)        # [3, 384, 384]
labels = torch.tensor(labels, dtype=torch.long).to(device)                      # [3, 384, 384]


# Standardize images
unlabeled_images, std_channel, mean_channel = standardize_images(unlabeled_images, unlabeled_masks)
labeled_images, _, _ = standardize_images(labeled_images, labeled_masks, std_channel, mean_channel)

In [3]:
encoder_config = [
    FusedMBConvConfig(1, 3, 1, 16, 16, 1),  # 384x384x8 -> 384x384x16
    FusedMBConvConfig(4, 3, 2, 16, 32, 1),  # 384x384x16 -> 192x192x32
    MBConvConfig(4, 3, 2, 32, 64, 1),       # 192x192x32 -> 96x96x64
]

# Build encoder and decoder
encoder = EfficientNetEncoder(
    inverted_residual_setting=encoder_config,
    dropout=0.1,
    input_channels=8,
    last_channel=64,
)

decoder = EfficientNetDecoder()

autoencoder = nn.Sequential(encoder, decoder).train().to(device)
autoencoder.load_state_dict(torch.load(f"/jet/home/azhang19/stat 214/stat-214-lab2-group6/code/modeling/ckpt/AutoencoderConfig([1, 1, 1], flip=True, rotate=True)/autoencoder_12800.pth"))

encoder = encoder.eval()
with torch.inference_mode():
    feature = encoder(labeled_images)
    feature = nn.functional.interpolate(feature, size=384, mode="bicubic", antialias=True)

In [4]:
def l1_reg(model):
    l1 = 0
    for name, param in model.named_parameters():
        if "bias" not in name:
            l1 += torch.norm(param, p=1)
    return l1

In [5]:
def accuracy(pred_class, target):
    return (pred_class == target).float().mean()

def marco_f1_score(pred_class, target):
    # F1 for positive class (1)
    tp_pos = ((pred_class == 1) & (target == 1)).sum().float()
    fp_pos = ((pred_class == 1) & (target == 0)).sum().float()
    fn_pos = ((pred_class == 0) & (target == 1)).sum().float()
    f1_pos = 2 * tp_pos / (2 * tp_pos + fp_pos + fn_pos + 1e-8)

    # F1 for negative class (-1, mapped to 0)
    tp_neg = ((pred_class == 0) & (target == 0)).sum().float()
    fp_neg = ((pred_class == 0) & (target == 1)).sum().float()
    fn_neg = ((pred_class == 1) & (target == 0)).sum().float()
    f1_neg = 2 * tp_neg / (2 * tp_neg + fp_neg + fn_neg + 1e-8)

    macro_f1 = (f1_pos + f1_neg) / 2
    return macro_f1

In [6]:
def masked_bce_loss_acc(pred, label):
    # pred: (N, 1, H, W); label: (N, H, W), with label values -1 (negative), 1 (positive), 0 (masked)
    pred = pred.flatten()
    label = label.flatten()
    mask = (label != 0)     # valid indices

    pred_valid = pred[mask]
    label_valid = label[mask]

    # -1/1 -> 0/1
    target_valid = (label_valid + 1) / 2

    # Compute binary cross entropy loss with logits
    loss = torch.nn.functional.binary_cross_entropy_with_logits(pred_valid, target_valid)

    pred_class = (pred_valid > 0).float()

    return loss, accuracy(pred_class, target_valid), marco_f1_score(pred_class, target_valid)

In [7]:
def objective(trial):
    # Suggest hyperparameters with updated API calls.
    epochs = trial.suggest_int("epochs", 200, 600)
    lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["SGD", "AdamW"])
    kernel_size = trial.suggest_categorical("kernel_size", [1, 2, 3])
    loss_name = trial.suggest_categorical("loss_fn", ["bce"])#, "soft_margin"])
    l1 = trial.suggest_float("l1", 1e-5, 1e-1, log=True)
    
    # Map string to actual optimizer class.
    optimizer_class = torch.optim.SGD if optimizer_name == "SGD" else torch.optim.AdamW

    # Map string to loss function (assumes these are defined).
    loss_fn = masked_bce_loss_acc# if loss_name == "bce" else masked_soft_margin_loss

    # Cross-validation indices (modify as needed)
    train_val_idx = [0, 1]
    
    # Container for metrics from each fold.
    fold_records = torch.zeros(len(train_val_idx))

    for i in train_val_idx:
        # Use leave-one-out style split over train_val_idx.
        train_idx = [j for j in train_val_idx if j != i]
        val_idx = [i]

        # Create the classifier with the chosen kernel size.
        classifier = nn.Conv2d(64, 1, kernel_size=kernel_size, 
                               padding="same", padding_mode="replicate").to(device)
        classifier.train()

        # Instantiate the optimizer with chosen hyperparameters.
        classifier_optimizer = optimizer_class(classifier.parameters(), lr=lr, weight_decay=weight_decay)
        
        # Get training and validation data (assumes feature and labels are defined).
        train = feature[train_idx]
        train_labels = labels[train_idx]
        val = feature[val_idx]
        val_labels = labels[val_idx]
        
        # Record metrics for each epoch: [train_loss, train_acc, train_f1, val_loss, val_acc, val_f1].
        for epoch in range(epochs):
            # Training step.
            classifier.train()
            classifier_optimizer.zero_grad(set_to_none=True)
            pred = classifier(train)
            loss, acc, f1 = loss_fn(pred, train_labels)
            # Add l1 regularization.
            loss = loss + l1 * l1_reg(classifier)
            loss.backward()
            classifier_optimizer.step()
            
        # Evaluate on validation set (inference mode).
        classifier.eval()
        with torch.inference_mode():
            val_pred = classifier(val)
            val_loss, val_acc, val_f1 = loss_fn(val_pred, val_labels)
        
        fold_records[i] = val_f1
    
    # Average metrics across folds (folds x epochs x metrics).
    return fold_records.mean().item()

In [8]:
study = optuna.create_study(direction="maximize")

# Optimize the study by running a number of trials (e.g., 100 trials).
study.optimize(objective, n_trials=100)

[I 2025-03-08 03:39:19,531] A new study created in memory with name: no-name-d7b24191-189b-4594-b581-a398451f7f93


[I 2025-03-08 03:39:20,842] Trial 0 finished with value: 0.6958315968513489 and parameters: {'epochs': 236, 'lr': 0.0003109685945826442, 'weight_decay': 0.02201885603338423, 'optimizer': 'AdamW', 'kernel_size': 1, 'loss_fn': 'bce', 'l1': 0.01021019434109759}. Best is trial 0 with value: 0.6958315968513489.
[I 2025-03-08 03:39:23,400] Trial 1 finished with value: 0.7348808646202087 and parameters: {'epochs': 561, 'lr': 0.0031996421692630397, 'weight_decay': 0.005877214620126896, 'optimizer': 'AdamW', 'kernel_size': 1, 'loss_fn': 'bce', 'l1': 0.060368648473350627}. Best is trial 1 with value: 0.7348808646202087.
[I 2025-03-08 03:39:25,899] Trial 2 finished with value: 0.7406752109527588 and parameters: {'epochs': 474, 'lr': 0.00027993568979523437, 'weight_decay': 0.00023106337374583777, 'optimizer': 'AdamW', 'kernel_size': 3, 'loss_fn': 'bce', 'l1': 7.407924939164565e-05}. Best is trial 2 with value: 0.7406752109527588.
[I 2025-03-08 03:39:27,573] Trial 3 finished with value: 0.769132554

In [9]:
# Print out the best trial.
print("Best trial:")
best_trial = study.best_trial
print("  Best Validation F1 Score: {:.4f}".format(best_trial.value))
print("  Hyperparameters:")
for param_name, param_value in best_trial.params.items():
    print("    {}: {}".format(param_name, param_value))

Best trial:
  Best Validation F1 Score: 0.8262
  Hyperparameters:
    epochs: 325
    lr: 0.0003618543621266028
    weight_decay: 1.979201053287094e-05
    optimizer: SGD
    kernel_size: 3
    loss_fn: bce
    l1: 0.0047670779365748895
