# Test with pretrained models

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.utils.class_weight import compute_class_weight
import random
import numpy as np
import os
import cpuinfo
from tqdm import tqdm
from torchinfo import summary
from PIL import Image

In [2]:
# Control randomness
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

### File paths

In [3]:
train_path = "../data/Train"       # paths for your training and testing dataset
#train_path = "../data/aug_train"    
test_path = "../data/Test"
input_parameter = ""                # paths for import and export custom model trainable parameters
output_parameter = ""

### Device of use

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
device_name = ""

print(f"CPU count: {os.cpu_count()}")
num_workers = min(4, os.cpu_count() // 2)  # Dynamically set num_workers

if device == torch.device("cuda"):
    device_name = torch.cuda.get_device_name(device)
    print(f"Using GPU: {device_name}")
else:
    cpu_info = cpuinfo.get_cpu_info()
    device_name = cpu_info['brand_raw']
    print(f"Using CPU: {device_name}")

CPU count: 16
Using GPU: NVIDIA GeForce RTX 3080


### Pretrain Model of use from torchvision

In [5]:
from torchvision.models import efficientnet_b0
model = efficientnet_b0(weights='DEFAULT')
#model = efficientnet_v2_s(weights='DEFAULT')
print(f"Using model {type(model).__name__}")

epochs = 30
batch_size = 8 # adjust to your memory
optimizer = optim.Adam(
    model.parameters(),
    lr=3e-4,            # learning rate
    weight_decay=3e-5,  # L2 regularization
    betas=(0.9, 0.999), # Adam beta parameters
    )
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.3,
    patience=3,
    min_lr=1e-6,
    cooldown=1
    )

Using model EfficientNet


### Data Loader and normalization

In [6]:
# Transform for training and testing datasets
img_size = 224 # adjust input image size for model
transform_train = transforms.Compose([          # on training dataset
    transforms.Resize(450),                         # Resize to 450x450
    transforms.RandomRotation(45, expand=False),
    transforms.CenterCrop(318),                     # Center crop to 450x450
    transforms.Resize((224, 224)),                  # Resize to 224x224
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),

    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

transform_test = transforms.Compose([   # on test dataset
    transforms.Resize(224),
    transforms.CenterCrop((224, 224)),                     # Center crop to 450x450
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

# Load datasets
train_dataset = datasets.ImageFolder(root=train_path, transform=transform_train)
test_dataset = datasets.ImageFolder(root=test_path, transform=transform_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
train_data = [
    (images.to(device), labels.to(device))
    for images, labels in tqdm(train_loader, desc=f"Preloading Train Data to {device_name}", leave=False)
]
val_data = [
    (images.to(device), labels.to(device))
    for images, labels in tqdm(test_loader, desc=f"Preloading Val Data to {device_name}", leave=False)
]

class_counts = [0] * len(train_dataset.classes)
for _, label in train_dataset.samples:
    class_counts[label] += 1

print(f"Class counts: {class_counts}")
print(f"Classes: {train_dataset.classes}")

                                                                                                   

Class counts: [114, 376, 95, 438, 357, 462, 77, 181, 139]
Classes: ['actinic keratosis', 'basal cell carcinoma', 'dermatofibroma', 'melanoma', 'nevus', 'pigmented benign keratosis', 'seborrheic keratosis', 'squamous cell carcinoma', 'vascular lesion']




### Weighted Cross Entropy Loss

In [7]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.arange(len(train_dataset.classes)),
    y=[label for _, label in train_dataset.samples]
)

class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

normalized_class_weights = class_weights / class_weights.sum()

loss_function = nn.CrossEntropyLoss(weight=normalized_class_weights, label_smoothing=0.1)

print(f"Class weights: {normalized_class_weights}")

Class weights: tensor([0.1597, 0.0484, 0.1917, 0.0416, 0.0510, 0.0394, 0.2365, 0.1006, 0.1310],
       device='cuda:0')


In [8]:
# Edit the output layer of the model
num_classes = len(train_dataset.classes)
num_features = model.classifier[1].in_features
print(f"Classifier - Input features: {num_features}, Output classes: {num_classes}")
model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(num_features, num_classes),
)

Classifier - Input features: 1280, Output classes: 9


### Configure model parameters

In [9]:
# IF NEEDED
# Load custom weight and optimizer states
# if os.path.exists(input_parameter):
#     checkpoint = torch.load("test_weights.pth", map_location=device)
#     model.load_state_dict(checkpoint['model_state_dict'])
#     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [10]:
# # Selective layer freezing
# # change base on your model
# # "Early layers are often already well-optimized" by GPT-4o
# for _, param in model.named_parameters():
#     param.requires_grad = True          # Unfreeze all layers first
# for name, param in model.named_parameters():
#     if "classifier" not in name:
#         param.requires_grad = False     # Freeze everything except the last classifier layer

In [11]:
# Move model to device
model.to(device)
print(f"Model is on {next(model.parameters()).device}")

Model is on cuda:0


In [12]:
# Print model architecture
print(summary(model, (batch_size, 3, 224, 224)))

Layer (type:depth-idx)                                  Output Shape              Param #
EfficientNet                                            [8, 9]                    --
├─Sequential: 1-1                                       [8, 1280, 7, 7]           --
│    └─Conv2dNormActivation: 2-1                        [8, 32, 112, 112]         --
│    │    └─Conv2d: 3-1                                 [8, 32, 112, 112]         864
│    │    └─BatchNorm2d: 3-2                            [8, 32, 112, 112]         64
│    │    └─SiLU: 3-3                                   [8, 32, 112, 112]         --
│    └─Sequential: 2-2                                  [8, 16, 112, 112]         --
│    │    └─MBConv: 3-4                                 [8, 16, 112, 112]         1,448
│    └─Sequential: 2-3                                  [8, 24, 56, 56]           --
│    │    └─MBConv: 3-5                                 [8, 24, 56, 56]           6,004
│    │    └─MBConv: 3-6                              

In [13]:
# # DEBUG
# print(f"Model device: {next(model.parameters()).device}")
# for images, labels in train_data:
#     print(f"Input device: {images.device}")
#     break
# print(f"Device: {device}")

### Training Epochs

In [14]:
%%time
previous_loss = float('inf')
for epoch in range(epochs):
    # Training phase
    model.train()
    current_lr = scheduler.get_last_lr()[0]
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    
    train_bar = tqdm(train_data, desc=f"Epoch {epoch + 1}/{epochs} [Train]", leave=False)
    for images, labels in train_bar:
        # Forward pass
        outputs = model(images)
        loss = loss_function(outputs, labels)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate statistics
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

        # Update progress bar
        train_bar.set_postfix({
            'loss': f"{running_loss / total_train:.4f}",
            'acc': f"{100. * correct_train / total_train:.2f}%"
        })

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0

    val_bar = tqdm(val_data, desc=f"Epoch {epoch + 1}/{epochs} [Val]", leave=False)
    with torch.no_grad():
        for images, labels in val_bar:
            outputs = model(images)
            loss = loss_function(outputs, labels)

            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

            # Update progress bar
            val_bar.set_postfix({
                'loss': f"{val_loss / total_val:.4f}",
                'acc': f"{100. * correct_val / total_val:.2f}%"
            })

    # Update learning rate
    scheduler.step(val_loss / total_val)

    # Print epoch summary
    print(f"Epoch {epoch + 1}/{epochs} - "
          f"Learning Rate: {current_lr:.7f} | "
          f"Train Loss: {running_loss / total_train:.4f}, Train Acc: {100. * correct_train / total_train:.2f}% | "
          f"Val Loss: {val_loss / total_val:.4f}, Val Acc: {100. * correct_val / total_val:.2f}%")

    # Augment the dataset for every 3 epochs
    if (epoch + 1) % 3 == 0 and epoch != (epochs - 1):
        print(f"Augmenting dataset after epoch {epoch + 1}...")
        del train_data
        torch.cuda.empty_cache()
        train_dataset = datasets.ImageFolder(root=train_path, transform=transform_train)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        train_data = [
            (images.to(device), labels.to(device))
            for images, labels in tqdm(train_loader, desc=f"Preloading Train Data to {device_name}", leave=False)
        ]
    
    # # Augment the dataset if validation loss is not improving
    # if val_loss > previous_loss:
    #     times = 0
    #     print(f"Val loss did not improve from {previous_loss / total_val:.4f} at epoch {previous_epoch}. Augmenting dataset...")
    #     del train_data
    #     torch.cuda.empty_cache()
    #     train_dataset = datasets.ImageFolder(root=train_path, transform=transform_train)
    #     train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    #     train_data = [
    #         (images.to(device), labels.to(device))
    #         for images, labels in tqdm(train_loader, desc=f"Preloading Train Data to {device_name}", leave=False)
    #     ]
    # if val_loss < previous_loss:
    #     previous_loss = val_loss
    #     previous_epoch = epoch
    

                                                                                              

Epoch 1/30 - Learning Rate: 0.0003000 | Train Loss: 1.7922, Train Acc: 45.73% | Val Loss: 1.8648, Val Acc: 50.00%


                                                                                              

Epoch 2/30 - Learning Rate: 0.0003000 | Train Loss: 1.3354, Train Acc: 67.71% | Val Loss: 1.9010, Val Acc: 48.31%


                                                                                              

Epoch 3/30 - Learning Rate: 0.0003000 | Train Loss: 1.0569, Train Acc: 82.76% | Val Loss: 1.9729, Val Acc: 47.46%
Augmenting dataset after epoch 3...


                                                                                                   

Epoch 4/30 - Learning Rate: 0.0003000 | Train Loss: 1.5141, Train Acc: 59.36% | Val Loss: 1.7275, Val Acc: 55.08%


                                                                                              

Epoch 5/30 - Learning Rate: 0.0003000 | Train Loss: 1.1096, Train Acc: 78.16% | Val Loss: 1.7141, Val Acc: 59.32%


                                                                                              

Epoch 6/30 - Learning Rate: 0.0003000 | Train Loss: 0.9182, Train Acc: 90.49% | Val Loss: 1.8017, Val Acc: 59.32%
Augmenting dataset after epoch 6...


                                                                                                   

Epoch 7/30 - Learning Rate: 0.0003000 | Train Loss: 1.4023, Train Acc: 65.07% | Val Loss: 1.8367, Val Acc: 55.08%


                                                                                              

Epoch 8/30 - Learning Rate: 0.0003000 | Train Loss: 1.0521, Train Acc: 82.18% | Val Loss: 1.8419, Val Acc: 58.47%


                                                                                              

Epoch 9/30 - Learning Rate: 0.0003000 | Train Loss: 0.8847, Train Acc: 93.21% | Val Loss: 1.7560, Val Acc: 61.02%
Augmenting dataset after epoch 9...


                                                                                                   

Epoch 10/30 - Learning Rate: 0.0000900 | Train Loss: 1.2480, Train Acc: 72.18% | Val Loss: 1.6586, Val Acc: 59.32%


                                                                                               

Epoch 11/30 - Learning Rate: 0.0000900 | Train Loss: 0.9581, Train Acc: 88.79% | Val Loss: 1.6770, Val Acc: 61.86%


                                                                                               

Epoch 12/30 - Learning Rate: 0.0000900 | Train Loss: 0.8621, Train Acc: 95.00% | Val Loss: 1.6547, Val Acc: 61.86%
Augmenting dataset after epoch 12...


                                                                                                   

Epoch 13/30 - Learning Rate: 0.0000900 | Train Loss: 1.2033, Train Acc: 76.82% | Val Loss: 1.6332, Val Acc: 63.56%


                                                                                               

Epoch 14/30 - Learning Rate: 0.0000900 | Train Loss: 0.9260, Train Acc: 90.00% | Val Loss: 1.6837, Val Acc: 65.25%


                                                                                               

Epoch 15/30 - Learning Rate: 0.0000900 | Train Loss: 0.8393, Train Acc: 95.40% | Val Loss: 1.7078, Val Acc: 65.25%
Augmenting dataset after epoch 15...


                                                                                                   

Epoch 16/30 - Learning Rate: 0.0000900 | Train Loss: 1.1571, Train Acc: 78.47% | Val Loss: 1.7372, Val Acc: 62.71%


                                                                                               

Epoch 17/30 - Learning Rate: 0.0000900 | Train Loss: 0.9105, Train Acc: 90.49% | Val Loss: 1.7143, Val Acc: 62.71%


                                                                                               

Epoch 18/30 - Learning Rate: 0.0000270 | Train Loss: 0.8206, Train Acc: 96.43% | Val Loss: 1.7287, Val Acc: 64.41%
Augmenting dataset after epoch 18...


                                                                                                   

Epoch 19/30 - Learning Rate: 0.0000270 | Train Loss: 1.0946, Train Acc: 81.87% | Val Loss: 1.6905, Val Acc: 66.10%


                                                                                               

Epoch 20/30 - Learning Rate: 0.0000270 | Train Loss: 0.9652, Train Acc: 87.58% | Val Loss: 1.7018, Val Acc: 65.25%


                                                                                               

Epoch 21/30 - Learning Rate: 0.0000270 | Train Loss: 0.9017, Train Acc: 91.38% | Val Loss: 1.7230, Val Acc: 64.41%
Augmenting dataset after epoch 21...


                                                                                                   

Epoch 22/30 - Learning Rate: 0.0000270 | Train Loss: 1.0822, Train Acc: 82.36% | Val Loss: 1.6147, Val Acc: 68.64%


                                                                                               

Epoch 23/30 - Learning Rate: 0.0000270 | Train Loss: 0.9653, Train Acc: 87.94% | Val Loss: 1.6162, Val Acc: 68.64%


                                                                                               

Epoch 24/30 - Learning Rate: 0.0000270 | Train Loss: 0.9042, Train Acc: 91.25% | Val Loss: 1.6363, Val Acc: 71.19%
Augmenting dataset after epoch 24...


                                                                                                   

Epoch 25/30 - Learning Rate: 0.0000270 | Train Loss: 1.0792, Train Acc: 82.27% | Val Loss: 1.6642, Val Acc: 67.80%


                                                                                               

Epoch 26/30 - Learning Rate: 0.0000270 | Train Loss: 0.9643, Train Acc: 87.85% | Val Loss: 1.6830, Val Acc: 65.25%


                                                                                               

Epoch 27/30 - Learning Rate: 0.0000081 | Train Loss: 0.9027, Train Acc: 91.34% | Val Loss: 1.6960, Val Acc: 63.56%
Augmenting dataset after epoch 27...


                                                                                                   

Epoch 28/30 - Learning Rate: 0.0000081 | Train Loss: 1.0279, Train Acc: 84.28% | Val Loss: 1.7767, Val Acc: 63.56%


                                                                                               

Epoch 29/30 - Learning Rate: 0.0000081 | Train Loss: 0.9901, Train Acc: 85.93% | Val Loss: 1.7575, Val Acc: 64.41%


                                                                                               

Epoch 30/30 - Learning Rate: 0.0000081 | Train Loss: 0.9633, Train Acc: 86.91% | Val Loss: 1.7673, Val Acc: 63.56%
CPU times: total: 5min 38s
Wall time: 8min 48s




### Training log and data export

In [None]:
# Code here TODO

In [None]:
# iteration 5 template 2
# Updates:
# 1. Manual seed to reduce randomness
# 2. Use ReduceLROnPlateau as lr scheduler
# 3. Improved transformation to account black paddings due to rotation
# 4. Augmentation now only happen for every 3 epoch instead checking for validation loss