In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import zipfile
import os
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import torch
from torch.optim import lr_scheduler

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
checkpoint_path = "/content/drive/My Drive/resnet_01.pth"
metrics_path = "/content/drive/My Drive/training_metrics_resnet_01.json"


In [None]:
data_dir = '/content/drive/My Drive/data/classification/dataset_CIFAR10.zip'
!mkdir /content/dataset/
!unzip "/content/drive/My Drive/data/classification/dataset_CIFAR10.zip" -d "/content/dataset/"

In [None]:
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])




train_dataset = datasets.ImageFolder(root="/content/dataset/train", transform=train_transform)
test_dataset = datasets.ImageFolder(root="/content/dataset/validation", transform=test_transform)


In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(train_dataset.classes)
criterion = nn.CrossEntropyLoss()

In [None]:
def train_iteration(dataloader, model, loss_fn, optimizer):
    model.train()
    for i, (images, labels) in enumerate(dataloader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        predictions = model(images)
        loss = loss_fn(predictions, labels)
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print('Train Epoch: {}, Loss: {:.6f}'.format(i, loss.item()))

    return None

def test_iteration(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    average_loss = 0
    average_accuracy = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            predictions = model(images)
            average_loss += loss_fn(predictions, labels).item()
            average_accuracy += (predictions.argmax(1) == labels).type(torch.float).sum().item()
    average_loss /= num_batches
    average_accuracy /= size
    return {'average_loss': average_loss, 'average_accuracy': average_accuracy}

The initial 30 epochs will run with only feature extractor, only the 3 residual blocks and the classifier will update

In [None]:
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
for param in model.parameters():
    param.requires_grad = False

for param in model.layer4.parameters():
    param.requires_grad = True

for param in model.fc.parameters():
    param.requires_grad = True

model.fc = nn.Linear(model.fc.in_features, len(train_dataset.classes))

model = model.to(device)

optimizer_ft = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

try:
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer_ft.load_state_dict(checkpoint['optimizer_state_dict'])
    exp_lr_scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    start_epoch = checkpoint['epoch']
    print(f"Resuming training from epoch {start_epoch}")
except FileNotFoundError:
    print("No checkpoint found. Starting training from scratch.")
    start_epoch = 0

After the initial 30 epochs, manually return here and unfreeze all layers

In [None]:
if start_epoch >= 30:
  for param in model.parameters():
      param.requires_grad = True
  print('Unfreezing all layers')

In [None]:
import json

def save_metrics(epoch, train_results, test_results, filename):
    try:
        with open(filename, 'r') as f:
            metrics = json.load(f)
    except FileNotFoundError:
        metrics = []

    metrics.append({
        'epoch': epoch + 1,
        'train_loss': train_results['average_loss'],
        'train_accuracy': train_results['average_accuracy'],
        'test_loss': test_results['average_loss'],
        'test_accuracy': test_results['average_accuracy']
    })

    with open(filename, 'w') as f:
        json.dump(metrics, f, indent=4)

In [None]:
num_epochs = 60
for epoch in range(start_epoch, num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    _ = train_iteration(train_loader, model, criterion, optimizer_ft)
    train_results = test_iteration(train_loader, model, criterion)
    test_results = test_iteration(test_loader, model, criterion)
    save_metrics(epoch, train_results, test_results, metrics_path)
    print(f"Metrics saved to {metrics_path}")
    print(f"Train Loss: {train_results['average_loss']}, Train Accuracy: {train_results['average_accuracy']}")
    print(f"Test Loss: {test_results['average_loss']}, Test Accuracy: {test_results['average_accuracy']}")
    checkpoint = {
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer_ft.state_dict(),
        'scheduler_state_dict': exp_lr_scheduler.state_dict(),
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at {checkpoint_path}")

print("Finished Training")