In [1]:
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn
import torch
import random
import pandas as pd
import numpy as np
import scipy.io as scp
import torch.optim as optim
import torchvision.models as models
from torchvision.transforms import v2
from torchvision.transforms import functional as F
import math
from dataset import train_dataset, test_dataset, val_dataset

#### The code in this notebook is referenced from https://pytorch.org/vision/master/auto_examples/transforms/plot_cutmix_mixup.html

# CutMix

> **Cut-and-Paste Data Augmentation**: CutMix combines two or more images by cutting a rectangular portion from one image and pasting it onto another. The pixel values of the pasted region are a combination of the original image and the selected portion from another image.
<br>
>
> **Label Mixing**: The labels of the pasted region are also combined based on the area. This means that if 60% of the region comes from image A and 40% from image B, the label for that region is mixed accordingly.
<br>
>
> **Smooth Regularization**: CutMix acts as a regularization technique to prevent overfitting. It encourages the model to make predictions on the mixed regions, which can lead to improved generalization.
<br>
>
> **Benefits**: CutMix can improve model robustness, make the model less sensitive to input perturbations, and lead to better generalization. It is especially useful when training on smaller datasets.
<br>
>
# MixUp

> **Linear Interpolation**: MixUp operates by linearly interpolating between pairs of input samples. Given two input samples and their corresponding labels, MixUp creates new training examples by taking a weighted sum of the two samples. The labels are also linearly interpolated.
<br>
>
> **Smooth Labeling**: MixUp effectively "softens" the labels by blending them. For example, if you mix two images with labels "cat" and "dog" with a mixing factor of 0.7, the new image's label will be a soft label with 70% "cat" and 30% "dog."
<br>
>
> **Benefits**: MixUp encourages the model to make predictions that are linear combinations of the original data points. It helps the model learn a more generalized decision boundary and reduce the risk of overfitting. It also aids in handling class imbalance.
<br>
>

In [3]:
def cutmix(data, target, alpha=1.0):
    indices = torch.randperm(data.size(0))
    lam = np.random.beta(alpha, alpha)
    data = data * lam + data[indices] * (1 - lam)
    target = target * lam + target[indices] * (1 - lam)
    return data, target

def mixup(data, target, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    indices = torch.randperm(data.size(0))
    data = data * lam + data[indices] * (1 - lam)
    target = target * lam + target[indices] * (1 - lam)
    return data, target

In [5]:

NUM_CLASSES=102
cutmix = v2.CutMix(num_classes=NUM_CLASSES)
mixup = v2.MixUp(num_classes=NUM_CLASSES)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])

def train(train_loader, model, criterion, optimizer, device, cutmix_prob=0.5, mixup_prob=0.5):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        inputs, targets = cutmix_or_mixup(inputs, targets)
        _, targets = targets.max(1)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    return running_loss / len(train_loader), correct / total

In [6]:
from train import eval
from model import mobilenet

In [8]:
from common_utils import EarlyStopper

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_EPOCH = 100
NUM_CLASSES = 5
EARLY_STOP_THRESHOLD = 3
early_stopper = EarlyStopper(patience=EARLY_STOP_THRESHOLD)

# HYPERPARAMS TO TUNE
BATCH_SIZE = 128
EARLY_STOP_THRESHOLD = 3
LR = 0.001
loss_list = []
accuracy_list = []
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
model,optimizer,criterion = mobilenet()
model.to(DEVICE)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
best_acc = 0
early_stop_count = 0

In [9]:
for epoch in range(1, NUM_EPOCH+1):
    train_loss = train(train_loader, model, criterion, optimizer, DEVICE)
    accuracy, val_loss = eval(val_loader, model, criterion, DEVICE)
    print(f'Epoch {epoch}, Train Loss: {train_loss}, Val Accuracy: {accuracy}')
    if early_stopper.early_stop(val_loss):
        print("Early Stopping...")
        break
    scheduler.step()
test_accuracy, _ = eval(test_loader, model, criterion, DEVICE)
print(f'Test Accuracy: {test_accuracy}')

Epoch 1, Train Loss: (4.790738523006439, 0.008823529411764706), Val Accuracy: 0.09411764705882353
Epoch 2, Train Loss: (4.339960753917694, 0.07450980392156863), Val Accuracy: 0.24803921568627452
Epoch 3, Train Loss: (4.066323310136795, 0.15588235294117647), Val Accuracy: 0.46568627450980393
Epoch 4, Train Loss: (3.719339430332184, 0.27058823529411763), Val Accuracy: 0.5696078431372549
Epoch 5, Train Loss: (3.5453310310840607, 0.28921568627450983), Val Accuracy: 0.6460784313725491
Epoch 6, Train Loss: (3.2798059284687042, 0.39705882352941174), Val Accuracy: 0.6647058823529411
Epoch 7, Train Loss: (3.233849823474884, 0.4088235294117647), Val Accuracy: 0.6627450980392157
Epoch 8, Train Loss: (3.1141715943813324, 0.43333333333333335), Val Accuracy: 0.6892156862745098
Epoch 9, Train Loss: (3.0263059735298157, 0.4627450980392157), Val Accuracy: 0.7186274509803922
Epoch 10, Train Loss: (2.8191276490688324, 0.5196078431372549), Val Accuracy: 0.711764705882353
Epoch 11, Train Loss: (3.004432380