# CS167: Day23
## Kaggle & Fine Tuning CNN Models

#### CS167: Machine Learning, Fall 2025

## __Put the Model in GPU mode__

We want to accelerate the training process using graphical processing unit (GPU). You need to enable it (click Settings --> Accelerator--> GPU T4 x2)

In [None]:
import torch
# check GPU (Kaggle will show "cuda" if GPU enabled in the notebook settings)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
import torch
import numpy as np
import random

# Set seeds for reproducibility
seed = 42  # you can choose any integer
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# If using CUDA:
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # if using multi-GPU

## __Putting Everything Together for AlexNet__

__Putting Everything Together using our AlexNet Network on our 4-class image recognition Dataset__

In [None]:
# ============================================
# Step 1: imports and device (Kaggle version)
# ============================================
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets, models
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import time
import os
# ===========================================

In [None]:

# Step 2: dataset paths (Kaggle version)
# ============================================
# In Kaggle, uploaded datasets appear under /kaggle/input
# For example, if your dataset is named "bike-cat-dog-person", youâ€™ll see it at:
# /kaggle/input/bike-cat-dog-person/...

# uncomment for debugging if the files cannot be found
# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

base_dir   = "/kaggle/input/bike-cat-dog-person"      # <-- change this name to match your Kaggle dataset
train_dir  = os.path.join(base_dir, "bcdp_v1/train")
test_dir   = os.path.join(base_dir, "bcdp_v1/test")

# uncomment for debugging if the files cannot be found
#print("Train dir:", train_dir)
#print("Test dir:", test_dir)
#print("Train subfolders:", os.listdir(train_dir))
#print("Test subfolders:", os.listdir(test_dir))

# For AlexNet: normalize with ImageNet mean/std and resize to 227x227
transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize(
        (0.485, 0.456, 0.406),
        (0.229, 0.224, 0.225)
    )
])

train_dataset = datasets.ImageFolder(train_dir, transform=transform)
test_dataset  = datasets.ImageFolder(test_dir,  transform=transform)

dataset_labels = train_dataset.classes
number_of_classes = len(dataset_labels)
print("Classes:", dataset_labels)


In [None]:

# ============================================
# Step 3: define Neural Network model here
# ============================================
class AlexNet(nn.Module):
    def __init__(self, num_classes, pretrained=True):
        super(AlexNet, self).__init__()
        net = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1 if pretrained else None)

        # retain convolutional and pooling layers
        self.features = net.features
        self.avgpool  = net.avgpool

        # replace classifier with new head for our num_classes
        self.classifier = nn.Sequential(
            nn.Linear(256 * 6 * 6, 128),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)          # feature extraction from AlexNet
        x = self.avgpool(x)           # spatial pooling from AlexNet
        x = torch.flatten(x, 1)       # flatten to (batch_size, feature_dim)
        x = self.classifier(x)        # MLP for final classification
        return x


In [None]:
# ============================================
# Step 4: training / testing loops 
# ============================================
def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    size = len(dataloader.dataset)
    running_loss = 0.0
    correct = 0

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # forward + loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # backward + update
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    avg_loss = running_loss / len(dataloader)
    accuracy = correct / size
    return avg_loss, accuracy

def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    running_loss = 0.0
    correct = 0

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            loss = loss_fn(pred, y)

            running_loss += loss.item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

            all_preds.append(pred.argmax(1).cpu())
            all_labels.append(y.cpu())

    avg_loss = running_loss / len(dataloader)
    accuracy = correct / size

    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    conf_matrix = confusion_matrix(all_labels, all_preds)

    return avg_loss, accuracy, conf_matrix

In [None]:
# ============================================
# Step 5: your fine-tuning block (Kaggle-ready)
# ============================================
cnn_model = AlexNet(number_of_classes)
cnn_model.to(device)
print(cnn_model)

learning_rate   = 1e-4
batch_size_val  = 32
epochs          = 10
loss_fn         = nn.CrossEntropyLoss()
optimizer       = optim.Adam(cnn_model.parameters(), lr=learning_rate)
softmax         = nn.Softmax(dim=1)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size_val,
    shuffle=True,
    num_workers=2,        # Kaggle: use workers to speed up loading
    pin_memory=True if device == "cuda" else False
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size_val,
    shuffle=False,
    num_workers=2,
    pin_memory=True if device == "cuda" else False
)

train_losses = []
test_losses  = []
train_accuracies = []
test_accuracies  = []

start_time = time.time()
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    avg_train_loss, train_accuracy = train_loop(train_dataloader, cnn_model, loss_fn, optimizer)
    avg_test_loss, test_accuracy, conf_matrix_test = test_loop(test_dataloader, cnn_model, loss_fn)

    train_losses.append(avg_train_loss)
    test_losses.append(avg_test_loss)
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

    print(f"Train loss: {avg_train_loss:.4f}, Train acc: {train_accuracy:.4f}")
    print(f"Test  loss: {avg_test_loss:.4f}, Test  acc: {test_accuracy:.4f}")

print("AlexNet model has been fine-tuned!")
total_time_sec = time.time() - start_time
print("Total fine-tuning time: %.3f sec" % total_time_sec)
print("Total fine-tuning time: %.3f hrs" % (total_time_sec / 3600.0))


In [None]:
# visualizing the accuracy curves

plt.plot(range(1,epochs+1), train_accuracies)
plt.plot(range(1,epochs+1), test_accuracies)
plt.title('Model accuracies after each epoch')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'test'])
plt.show()

In [None]:
# show confusion matrix for final epoch
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_test, display_labels=dataset_labels)
disp.plot(xticks_rotation=45,cmap="Blues")
plt.tight_layout()
plt.show()


### Now, let's see how the newly-trained model works on 10 sample images from the testing set

In [None]:
# ChatGPT generated code
# Prompt to ChatGPT: https://chatgpt.com/share/6915f698-2ca8-8001-9ba6-38a7314e8b82

import random
import matplotlib.pyplot as plt
import torch

# helper to unnormalize images for display (reverse the ImageNet normalization)
def unnormalize(img_tensor):
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
    std  = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
    return img_tensor * std + mean

# get class names from the dataset object
class_names = test_dataset.classes
print("Class names:", class_names)

# make sure model is in eval mode
cnn_model.eval()

# randomly pick 10 indices from the test set
indices = random.sample(range(len(test_dataset)), 10)

plt.figure(figsize=(15, 10))

with torch.no_grad():
    for i, idx in enumerate(indices):
        img, label = test_dataset[idx]

        # Prepare batch of size 1
        x = img.unsqueeze(0).to(device)

        # model prediction
        pred = cnn_model(x)
        pred_prob = softmax(pred)
        pred_label = torch.argmax(pred_prob, dim=1).item()

        # check if correct
        is_correct = (pred_label == label)

        # unnormalize for display
        disp_img = unnormalize(img).permute(1,2,0).cpu().numpy()

        # plot
        plt.subplot(2, 5, i+1)
        plt.imshow(disp_img)
        plt.axis("off")

        title_color = "green" if is_correct else "red"
        plt.title(f"Pred: {class_names[pred_label]}\nTrue: {class_names[label]}",
                  color=title_color)

plt.tight_layout()
plt.show()