## LAB 2.2 A2: Transfer Learning & Finetuning with Pytorch

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_dir = "data/dogs_cats_pandas/animals"
img_size = 224
batch_size = 64

transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]),
])

dataset = datasets.ImageFolder(data_dir, transform=transform)
num_classes = len(dataset.classes)
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2)

In [2]:
def train_model(model, criterion, optimizer, epochs=3):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
    return correct / total

def get_model():
    model = models.efficientnet_b0(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    return model

results = {}

In [3]:
# Run 1: train only classifier, Adam lr=1e-3
m1 = get_model()
opt1 = optim.Adam(m1.classifier.parameters(), lr=1e-3)
results["run1"] = train_model(m1, nn.CrossEntropyLoss(), opt1, epochs=3)

# Run 2: unfreeze last block, Adam lr=1e-4
m2 = get_model()
for param in list(m2.features[-1].parameters()):
    param.requires_grad = True
opt2 = optim.Adam(filter(lambda p: p.requires_grad, m2.parameters()), lr=1e-4)
results["run2"] = train_model(m2, nn.CrossEntropyLoss(), opt2, epochs=3)

# Run 3: train all layers, Adam lr=1e-5
m3 = get_model()
for param in m3.parameters():
    param.requires_grad = True
opt3 = optim.Adam(m3.parameters(), lr=1e-5)
results["run3"] = train_model(m3, nn.CrossEntropyLoss(), opt3, epochs=3)

# Run 4: classifier only, SGD lr=0.01
m4 = get_model()
opt4 = optim.SGD(m4.classifier.parameters(), lr=0.01, momentum=0.9)
results["run4"] = train_model(m4, nn.CrossEntropyLoss(), opt4, epochs=3)

# Run 5: unfreeze half, AdamW lr=1e-4
m5 = get_model()
for param in list(m5.features[-3:].parameters()):
    param.requires_grad = True
opt5 = optim.AdamW(filter(lambda p: p.requires_grad, m5.parameters()), lr=1e-4)
results["run5"] = train_model(m5, nn.CrossEntropyLoss(), opt5, epochs=3)

print("Validation accuracies:")
for k,v in results.items():
    print(k, v)

Validation accuracies:
run1 0.9633333333333334
run2 0.9533333333333334
run3 0.9266666666666666
run4 0.9566666666666667
run5 0.98


| Run   | Accuracy |
|-------|----------|
| Run 1 | 0.9633   |
| Run 2 | 0.9533   |
| Run 3 | 0.9267   |
| Run 4 | 0.9567   |
| Run 5 | 0.9800   |