In [2]:
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn
import torch
import random
import pandas as pd
import numpy as np
import scipy.io as scp
import torch.optim as optim
import torchvision.models as models
from torchvision.transforms import v2
from torchvision.transforms import functional as F
import math

In [3]:
# train_transform = v2.Compose([
#     v2.PILToTensor(),
#     v2.RandomRotation(30),
#     v2.RandomResizedCrop(size=(224, 224), antialias=True),
#     v2.RandomHorizontalFlip(p=0.5),
#     v2.ToDtype(torch.float32, scale=True),  # to float32 in [0, 1]
#     v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
# ])

# testval_transform = v2.Compose([
#     v2.PILToTensor(),
#     v2.Resize(256),
#     v2.CenterCrop(224),
#     v2.ToDtype(torch.float32, scale=True),  # to float32 in [0, 1]
#     v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
# ])

train_transform = transforms.Compose([
    transforms.RandomRotation(30), # random rotation of images
    transforms.RandomResizedCrop(224), # sample random 224x224 patch of images
    transforms.RandomHorizontalFlip(), # random horizontal flip of images
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                        [0.229, 0.224, 0.225])
    # transforms.Normalize([0.51796, 0.41106, 0.32971], 
    #                     [0.29697, 0.24955, 0.28531])
    ])

testval_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                        [0.229, 0.224, 0.225])
    # transforms.Normalize([0.51796, 0.41106, 0.32971], 
    #                     [0.29697, 0.24955, 0.28531])
])

train_dataset = torchvision.datasets.Flowers102(root='./data', split='train', download=True, transform=train_transform)
val_dataset = torchvision.datasets.Flowers102(root='./data', split='val', download=True, transform=testval_transform)
test_dataset = torchvision.datasets.Flowers102(root='./data', split='test', download=True, transform=testval_transform)

# CutMix

> **Cut-and-Paste Data Augmentation**: CutMix combines two or more images by cutting a rectangular portion from one image and pasting it onto another. The pixel values of the pasted region are a combination of the original image and the selected portion from another image.
<br>
>
> **Label Mixing**: The labels of the pasted region are also combined based on the area. This means that if 60% of the region comes from image A and 40% from image B, the label for that region is mixed accordingly.
<br>
>
> **Smooth Regularization**: CutMix acts as a regularization technique to prevent overfitting. It encourages the model to make predictions on the mixed regions, which can lead to improved generalization.
<br>
>
> **Benefits**: CutMix can improve model robustness, make the model less sensitive to input perturbations, and lead to better generalization. It is especially useful when training on smaller datasets.
<br>
>
# MixUp

> **Linear Interpolation**: MixUp operates by linearly interpolating between pairs of input samples. Given two input samples and their corresponding labels, MixUp creates new training examples by taking a weighted sum of the two samples. The labels are also linearly interpolated.
<br>
>
> **Smooth Labeling**: MixUp effectively "softens" the labels by blending them. For example, if you mix two images with labels "cat" and "dog" with a mixing factor of 0.7, the new image's label will be a soft label with 70% "cat" and 30% "dog."
<br>
>
> **Benefits**: MixUp encourages the model to make predictions that are linear combinations of the original data points. It helps the model learn a more generalized decision boundary and reduce the risk of overfitting. It also aids in handling class imbalance.
<br>
>

In [4]:
def cutmix(data, target, alpha=1.0):
    indices = torch.randperm(data.size(0))
    lam = np.random.beta(alpha, alpha)
    data = data * lam + data[indices] * (1 - lam)
    target = target * lam + target[indices] * (1 - lam)
    return data, target

def mixup(data, target, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    indices = torch.randperm(data.size(0))
    data = data * lam + data[indices] * (1 - lam)
    target = target * lam + target[indices] * (1 - lam)
    return data, target

In [5]:
# def train(train_loader, model, criterion, optimizer, device, cutmix_prob=0.5, mixup_prob=0.5):
#     model.train()
#     running_loss = 0.0
#     correct = 0
#     total = 0

#     for inputs, targets in train_loader:
#         inputs, targets = inputs.to(device), targets.to(device)
#         print(targets)
#         print(targets.dtype)
#         print(inputs.dtype)
        
#         if random.random() < cutmix_prob:
#             inputs, targets = cutmix(inputs, targets)
#         elif random.random() < mixup_prob:
#             inputs, targets = mixup(inputs, targets)
        
#         targets.to(torch.int64)
#         print(targets.dtype)
#         print(targets)
#         print(inputs.dtype)
#         # targets = targets.long()
#         # inputs = inputs.long()
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         print(outputs)
#         loss = criterion(outputs, targets)
#         loss.backward()
#         optimizer.step()

#         running_loss += loss.item()
#         _, predicted = outputs.max(1)
#         total += targets.size(0)
#         correct += predicted.eq(targets).sum().item()

#     return running_loss / len(train_loader), correct / total

In [28]:

NUM_CLASSES=102
cutmix = v2.CutMix(num_classes=NUM_CLASSES)
mixup = v2.MixUp(num_classes=NUM_CLASSES)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])

def train(train_loader, model, criterion, optimizer, device, cutmix_prob=0.5, mixup_prob=0.5):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        # print(targets)
        # print(f"Before CutMix/MixUp: {inputs.shape = }, {targets.shape = }")
        inputs, targets = cutmix_or_mixup(inputs, targets)
        # print(f"After CutMix/MixUp: {inputs.shape = }, {targets.shape = }")
        # print(targets)
        _, targets = targets.max(1)
        # print(targets)

        optimizer.zero_grad()
        outputs = model(inputs)
        # print(outputs.shape)
        # print(targets.shape)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    return running_loss / len(train_loader), correct / total

In [7]:
from train import eval
from model import mobilenet

In [8]:
class SoftCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(SoftCrossEntropyLoss, self).__init__()

    def forward(self, input, target):
        # Calculate the cross-entropy loss with soft labels
        loss = -torch.sum(target * torch.log(input + 1e-8)) / input.size(0)
        return loss

In [29]:
from common_utils import EarlyStopper

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_EPOCH = 100
NUM_CLASSES = 5
EARLY_STOP_THRESHOLD = 3
early_stopper = EarlyStopper(patience=EARLY_STOP_THRESHOLD)

# HYPERPARAMS TO TUNE
NUM_HIDDEN = 128
NUM_LAYERS = 1
BATCH_SIZE = 128
EARLY_STOP_THRESHOLD = 3
LR = 0.001
loss_list = []
accuracy_list = []
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
model,optimizer,criterion = mobilenet()
model.to(DEVICE)
# criterion = nn.MultiLabelSoftMarginLoss()
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
best_acc = 0
early_stop_count = 0

In [30]:
for epoch in range(1, NUM_EPOCH+1):
    train_loss = train(train_loader, model, criterion, optimizer, DEVICE)
    accuracy, val_loss = eval(val_loader, model, criterion, DEVICE)
    print(f'Epoch {epoch}, Train Loss: {train_loss}, Val Accuracy: {accuracy}')
    if early_stopper.early_stop(val_loss):
        print("Early Stopping...")
        break
    scheduler.step()
test_accuracy, _ = eval(test_loader, model, criterion, DEVICE)
print(f'Test Accuracy: {test_accuracy}')

Epoch 1, Train Loss: (4.820556700229645, 0.00784313725490196), Val Accuracy: 0.08823529411764706
Epoch 2, Train Loss: (4.40290379524231, 0.052941176470588235), Val Accuracy: 0.22156862745098038
Epoch 3, Train Loss: (4.088704615831375, 0.12745098039215685), Val Accuracy: 0.45784313725490194
Epoch 4, Train Loss: (3.685787260532379, 0.28137254901960784), Val Accuracy: 0.5529411764705883
Epoch 5, Train Loss: (3.544148772954941, 0.3235294117647059), Val Accuracy: 0.6333333333333333
Epoch 6, Train Loss: (3.2715260088443756, 0.3990196078431373), Val Accuracy: 0.6509803921568628
Epoch 7, Train Loss: (3.196895331144333, 0.40980392156862744), Val Accuracy: 0.6872549019607843
Epoch 8, Train Loss: (3.417306274175644, 0.3235294117647059), Val Accuracy: 0.7098039215686275
Epoch 9, Train Loss: (3.0151030719280243, 0.45294117647058824), Val Accuracy: 0.7225490196078431
Epoch 10, Train Loss: (3.294542580842972, 0.3607843137254902), Val Accuracy: 0.7147058823529412
Epoch 11, Train Loss: (2.9511909484863