#Environment and Data Setup
Consolidate imports, device configuration, and dataset preparation using Oxford Flowers-102 with augmentation and normalization pipelines.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import datasets, models, transforms

import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7e3d2420e150>

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
import tensorflow_datasets as tfds
dataset, dataset_info = tfds.load(
    "oxford_flowers102",
    with_info=True,
    as_supervised=True
)



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/oxford_flowers102/2.1.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/oxford_flowers102/incomplete.HBRPOI_2.1.1/oxford_flowers102-train.tfrecord…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/oxford_flowers102/incomplete.HBRPOI_2.1.1/oxford_flowers102-test.tfrecord*…

Generating validation examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/oxford_flowers102/incomplete.HBRPOI_2.1.1/oxford_flowers102-validation.tfr…

Dataset oxford_flowers102 downloaded and prepared to /root/tensorflow_datasets/oxford_flowers102/2.1.1. Subsequent calls will reuse this data.


In [None]:
IMAGE_SIZE = 224
NUM_CLASSES = 102
BATCH_SIZE = 32

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(20),
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.05
    ),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

eval_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
train_ds = datasets.Flowers102(
    root="./data",
    split="train",
    download=True,
    transform=train_transform
)

val_ds = datasets.Flowers102(
    root="./data",
    split="val",
    download=True,
    transform=eval_transform
)

test_ds = datasets.Flowers102(
    root="./data",
    split="test",
    download=True,
    transform=eval_transform
)

100%|██████████| 345M/345M [00:17<00:00, 19.6MB/s]
100%|██████████| 502/502 [00:00<00:00, 2.29MB/s]
100%|██████████| 15.0k/15.0k [00:00<00:00, 47.8MB/s]


In [None]:
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

test_loader = DataLoader(
    test_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

#Convolutional Block Attention Module (CBAM)
Define the CBAM class to implement sequential channel and spatial attention for enhanced feature localization.

In [None]:
class CBAM(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()

        # Channel attention
        self.mlp = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(),
            nn.Linear(channels // reduction, channels, bias=False)
        )

        # Spatial attention
        self.spatial = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)

    def forward(self, x):
        b, c, _, _ = x.size()

        # Channel attention
        avg = F.adaptive_avg_pool2d(x, 1).view(b, c)
        mx  = F.adaptive_max_pool2d(x, 1).view(b, c)
        ca = torch.sigmoid(self.mlp(avg) + self.mlp(mx)).view(b, c, 1, 1)
        x = x * ca

        # Spatial attention
        avg = torch.mean(x, dim=1, keepdim=True)
        mx, _ = torch.max(x, dim=1, keepdim=True)
        sa = torch.sigmoid(self.spatial(torch.cat([avg, mx], dim=1)))
        return x * sa

#Attention-Based ResNet-50 Architecture
Construct a custom ResNet-50 variant by integrating CBAM modules after each residual stage for fine-grained feature extraction.

In [None]:
class AttentionResNet50(nn.Module):
    def __init__(self, num_classes=102):
        super().__init__()

        base = models.resnet50(pretrained=True)

        self.stem = nn.Sequential(
            base.conv1,
            base.bn1,
            base.relu,
            base.maxpool
        )

        self.layer1 = base.layer1
        self.cbam1  = CBAM(256)

        self.layer2 = base.layer2
        self.cbam2  = CBAM(512)

        self.layer3 = base.layer3
        self.cbam3  = CBAM(1024)

        self.layer4 = base.layer4
        self.cbam4  = CBAM(2048)

        self.pool = base.avgpool
        self.fc   = nn.Linear(2048, num_classes)

    def forward(self, x):
        x = self.stem(x)

        x = self.cbam1(self.layer1(x))
        x = self.cbam2(self.layer2(x))
        x = self.cbam3(self.layer3(x))
        x = self.cbam4(self.layer4(x))

        x = self.pool(x)
        x = torch.flatten(x, 1)
        return self.fc(x)

#Model Initialization and Selective Fine-Tuning
Initialize the Attention-ResNet-50 model and selectively unfreeze layers 2, 3, and 4 to allow adaptation to the floral dataset.

In [None]:
model = AttentionResNet50(NUM_CLASSES).to(device)

# Freeze everything
for p in model.parameters():
    p.requires_grad = False

# Fine-tune deeper layers
for module in [model.layer2, model.layer3, model.layer4, model.fc]:
    for p in module.parameters():
        p.requires_grad = True



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 211MB/s]


#Loss Function and Optimizer Setup
Setup the Cross-Entropy loss function and the Adam optimizer to update only the trainable attention and residual parameters.

In [None]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4
)

#Training and Evaluation Logic
Define standard functions to perform the training pass per epoch and calculate accuracy on validation or test sets.

In [None]:
def train_one_epoch(model, loader):
    model.train()
    correct, total = 0, 0

    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    return correct / total


def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return correct / total

#Model Training and Results
Execute the training loop for the attention-based model and output final accuracy results across training, validation, and test splits.
(Note:- The model was originally trained for 15 epochs and not 3 epochs, the weights where saved in the RAM memory hence you see the direct learning from 99)

In [None]:
EPOCHS = 3

for epoch in range(EPOCHS):
    train_acc = train_one_epoch(model, train_loader)
    val_acc = evaluate(model, val_loader)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train Acc: {train_acc:.4f} | "
        f"Val Acc: {val_acc:.4f}"
    )

test_acc = evaluate(model, test_loader)
print("Test Accuracy:", test_acc)

Epoch [1/3] | Train Acc: 0.9990 | Val Acc: 0.9118
Epoch [2/3] | Train Acc: 0.9951 | Val Acc: 0.9147
Epoch [3/3] | Train Acc: 0.9980 | Val Acc: 0.9147
Test Accuracy: 0.8882745161814929


#Model Export
Save the final state dictionary of the Level 3 attention model to a specified local directory.

In [None]:
MODEL_PATH = "/content/level3_attention_resnet50_cbam.pth"
torch.save(model.state_dict(), MODEL_PATH)

print("Model saved at:", MODEL_PATH)

Model saved at: /content/level3_attention_resnet50_cbam.pth
