In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
desired_cpu_cores = "0-12"
pid = os.getpid()
os.system(f"taskset -p -c {desired_cpu_cores} {pid}")

import torch
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm.auto import tqdm
import aim

from library.dataset import TrainDataset, TestDataset, ImageDataset
from library.model import VectorQuantizer, VQVAE, EnhancedVQVAE
from library.trainer import AdvancedTrainer
from library.threshold import ThresholdOptimizer
from library.evaluator import Evaluator

def run_experiment(
    optimizer_class=torch.optim.AdamW,
    optimizer_kwargs=None,
    model_class=EnhancedVQVAE,
    epochs=5,
    fine_tune_epochs=1,
    batch_size=512,
    use_perceptual=True,
    image_size=128,
):
    # Set up Aim run and log hyperparameters
    run = aim.Run()
    if optimizer_kwargs is None:
        optimizer_kwargs = {'lr': 1e-4, 'weight_decay': 1e-5}
    hparams = {
        "optimizer_class": optimizer_class.__name__,
        **optimizer_kwargs,
        "model_class": model_class.__name__,
        "epochs": epochs,
        "fine_tune_epochs": fine_tune_epochs,
        "batch_size": batch_size,
        "use_perceptual": use_perceptual,
        "image_size": image_size,
    }
    run["hparams"] = hparams

    # Device setup
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Define transforms
    transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ])

    # Load datasets
    train_dir = "dataset/train"
    proliv_dir = "dataset/proliv"
    test_dir = "dataset/test/imgs"
    annotation_path = "dataset/test/test_annotation.txt"

    train_dataset = TrainDataset(train_dir, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    proliv_dataset = ImageDataset(proliv_dir, transform=transform)
    proliv_loader = DataLoader(proliv_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

    # Split datasets
    normal_train, normal_val = torch.utils.data.random_split(train_dataset, [0.8, 0.2])
    val_dataset = torch.utils.data.ConcatDataset([normal_val, proliv_dataset])
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

    # Initialize model and optimizer
    model = model_class().to(DEVICE)
    optimizer = optimizer_class(model.parameters(), **optimizer_kwargs)

    # Initialize trainer
    trainer = AdvancedTrainer(
        model=model,
        train_loader=train_loader,
        optimizer=optimizer,
        device=DEVICE,
        val_loader=val_loader,
        use_perceptual=use_perceptual,
        run=run
    )

    # Train the model
    trainer.train(epochs, fine_tune_epochs=fine_tune_epochs)

    # Load the best model
    model.load_state_dict(torch.load('final_model.pth')['model_state_dict'])

    # Threshold optimization
    threshold_optimizer = ThresholdOptimizer(
        model,
        DataLoader(normal_val, batch_size=batch_size),
        DataLoader(proliv_dataset, batch_size=batch_size, num_workers=0),
        DEVICE
    )
    optimal_threshold = threshold_optimizer.find_optimal_threshold()
    run.track(optimal_threshold, name='optimal_threshold')

    # Evaluation
    test_dataset = TestDataset(test_dir, annotation_path, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

    evaluator = Evaluator(model, DEVICE)
    tpr, tnr = evaluator.evaluate(test_loader, optimal_threshold)
    train_errors = evaluator.compute_errors(DataLoader(train_dataset, batch_size=batch_size, num_workers=0))
    threshold = evaluator.determine_threshold(train_errors, 95)
    print(f"Final TPR: {tpr}, Final TNR: {tnr}")
    run.track(threshold, name='percentile_threshold')
    run.track(tpr, name='test_tpr')
    run.track(tnr, name='test_tnr')

    # Plot reconstructions
    # model.eval()
    # with torch.no_grad():
    #     test_images, _, _ = next(iter(test_loader))
    #     test_images = test_images[:10].to(DEVICE)
    #     reconstructions, _ = model(test_images)

    # plt.figure(figsize=(16, 4))
    # for i in range(10):
    #     plt.subplot(2, 10, i + 1)
    #     img = test_images[i].cpu().permute(1, 2, 0).numpy() * 0.5 + 0.5
    #     plt.imshow(img)
    #     plt.axis('off')
    #     plt.subplot(2, 10, i + 11)
    #     recon = reconstructions[i].cpu().permute(1, 2, 0).numpy() * 0.5 + 0.5
    #     plt.imshow(recon)
    #     plt.axis('off')
    # plt.tight_layout()
    # plt.show()

    return {
        "optimal_threshold": optimal_threshold,
        "percentile_threshold": threshold,
        "test_tpr": tpr,
        "test_tnr": tnr,
    }



pid 1379189's current affinity list: 0-79
pid 1379189's new affinity list: 0-12


#### SGD Optimizer

In [2]:
results = run_experiment(
    optimizer_class=torch.optim.SGD,
    optimizer_kwargs={'lr': 0.01, 'momentum': 0.9},
    epochs=10,
    batch_size=256,
    use_perceptual=False
)



  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0119

Epoch 1/10
Train Loss: 0.0132 | Val Loss: 0.0119


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0116

Epoch 2/10
Train Loss: 0.0095 | Val Loss: 0.0116


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0116

Epoch 3/10
Train Loss: 0.0095 | Val Loss: 0.0116


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0115

Epoch 4/10
Train Loss: 0.0094 | Val Loss: 0.0115


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0115

Epoch 5/10
Train Loss: 0.0094 | Val Loss: 0.0115


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0115

Epoch 6/10
Train Loss: 0.0094 | Val Loss: 0.0115


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0115

Epoch 7/10
Train Loss: 0.0094 | Val Loss: 0.0115


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0115

Epoch 8/10
Train Loss: 0.0094 | Val Loss: 0.0115


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0115

Epoch 9/10
Train Loss: 0.0094 | Val Loss: 0.0115


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0115

Epoch 10/10
Train Loss: 0.0094 | Val Loss: 0.0115

Starting fine-tuning on full dataset
Loaded best checkpoint with val loss 0.0115


  checkpoint = torch.load(self.checkpoint_path)


  0%|          | 0/48 [00:00<?, ?it/s]


Fine-tune Epoch 1/1
Train Loss: 0.0096
Final model saved to final_model.pth


  model.load_state_dict(torch.load('final_model.pth')['model_state_dict'])


0.032467532467532464 0.992
Final TPR: 0.9302325581395349, Final TNR: 0.82537517053206


#### SGD optimizer with perceptual

In [3]:
results = run_experiment(
    optimizer_class=torch.optim.SGD,
    optimizer_kwargs={'lr': 0.01, 'momentum': 0.9},
    epochs=10,
    batch_size=256,
    use_perceptual=True
)



  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0515

Epoch 1/10
Train Loss: 0.0510 | Val Loss: 0.0515


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0512

Epoch 2/10
Train Loss: 0.0474 | Val Loss: 0.0512


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0510

Epoch 3/10
Train Loss: 0.0473 | Val Loss: 0.0510


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0510

Epoch 4/10
Train Loss: 0.0472 | Val Loss: 0.0510


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0509

Epoch 5/10
Train Loss: 0.0472 | Val Loss: 0.0509


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0509

Epoch 6/10
Train Loss: 0.0470 | Val Loss: 0.0509


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0508

Epoch 7/10
Train Loss: 0.0470 | Val Loss: 0.0508


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0508

Epoch 8/10
Train Loss: 0.0469 | Val Loss: 0.0508


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0507

Epoch 9/10
Train Loss: 0.0469 | Val Loss: 0.0507


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0506

Epoch 10/10
Train Loss: 0.0469 | Val Loss: 0.0506

Starting fine-tuning on full dataset
Loaded best checkpoint with val loss 0.0506


  0%|          | 0/48 [00:00<?, ?it/s]


Fine-tune Epoch 1/1
Train Loss: 0.0470
Final model saved to final_model.pth


  model.load_state_dict(torch.load('final_model.pth')['model_state_dict'])


0.045454545454545456 0.9925
Final TPR: 0.9302325581395349, Final TNR: 0.7923601637107777


#### AdamW optimizer without perceptual

In [4]:
results = run_experiment(
    optimizer_class=torch.optim.AdamW,
    optimizer_kwargs={'lr':1e-4, 'weight_decay': 1e-5},
    epochs=5,
    batch_size=256,
    use_perceptual=False
)


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0117

Epoch 1/5
Train Loss: 0.0126 | Val Loss: 0.0117


  0%|          | 0/40 [00:00<?, ?it/s]


Epoch 2/5
Train Loss: 0.0186 | Val Loss: 0.0496


  0%|          | 0/40 [00:00<?, ?it/s]


Epoch 3/5
Train Loss: 0.0527 | Val Loss: 0.0726


  0%|          | 0/40 [00:00<?, ?it/s]


Epoch 4/5
Train Loss: 0.0421 | Val Loss: 0.0531


  0%|          | 0/40 [00:00<?, ?it/s]


Epoch 5/5
Train Loss: 0.0356 | Val Loss: 0.0532

Starting fine-tuning on full dataset
Loaded best checkpoint with val loss 0.0117


  0%|          | 0/48 [00:00<?, ?it/s]


Fine-tune Epoch 1/1
Train Loss: 0.0096
Final model saved to final_model.pth


  model.load_state_dict(torch.load('final_model.pth')['model_state_dict'])


0.025974025974025976 0.991
Final TPR: 0.9147286821705426, Final TNR: 0.8343792633015007


#### AdamW optimizer with perceptual

In [5]:
results = run_experiment(
    optimizer_class=torch.optim.AdamW,
    optimizer_kwargs={'lr':1e-4, 'weight_decay': 1e-5},
    epochs=5,
    batch_size=256,
    use_perceptual=True
)


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0499

Epoch 1/5
Train Loss: 0.0516 | Val Loss: 0.0499


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0472

Epoch 2/5
Train Loss: 0.0444 | Val Loss: 0.0472


  0%|          | 0/40 [00:00<?, ?it/s]


Saved new best model with val loss 0.0471

Epoch 3/5
Train Loss: 0.0434 | Val Loss: 0.0471


  0%|          | 0/40 [00:00<?, ?it/s]


Epoch 4/5
Train Loss: 0.0433 | Val Loss: 0.0477


  0%|          | 0/40 [00:00<?, ?it/s]


Epoch 5/5
Train Loss: 0.0567 | Val Loss: 0.1144

Starting fine-tuning on full dataset
Loaded best checkpoint with val loss 0.0471


  0%|          | 0/48 [00:00<?, ?it/s]


Fine-tune Epoch 1/1
Train Loss: 0.0436
Final model saved to final_model.pth


  model.load_state_dict(torch.load('final_model.pth')['model_state_dict'])


0.045454545454545456 0.9925
Final TPR: 0.9302325581395349, Final TNR: 0.8057298772169168


In [6]:
!ls exps

adamw_perc.png	adamw_simple.png  sgd_perc.png	sgd_simple.png
