In [None]:
import os
import zipfile
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from google.colab import drive
from PIL import Image

# --- CONFIGURATION ---
SEED = 42
BATCH_SIZE = 32
IMG_SIZE = 224
DATA_PATH = '/content/drive/MyDrive/DL_PROJECT_DATASET.zip'
EXTRACT_PATH = '/content/dataset'

# 1. SETUP ENV
torch.manual_seed(SEED)
Image.MAX_IMAGE_PIXELS = None  # Fix Decompression Bomb Error

if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# 2. EXTRACT DATA
if not os.path.exists(EXTRACT_PATH):
    print("Extracting dataset...")
    with zipfile.ZipFile(DATA_PATH, 'r') as zip_ref:
        zip_ref.extractall(EXTRACT_PATH)
else:
    print("Dataset already extracted.")

# 3. AUTO-DETECT CORRECT ROOT (Fixes Nested Folder Issue)
def find_classes_root(start_path):
    for root, dirs, files in os.walk(start_path):
        if len(dirs) == 5: # We look for exactly 5 classes
            return root
    raise FileNotFoundError("Could not find a directory with exactly 5 class subfolders.")

TRUE_ROOT = find_classes_root(EXTRACT_PATH)
print(f"Dataset Root Found: {TRUE_ROOT}")

# 4. DEFINE TRANSFORMS
# Train: Augmentation
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Val/Test: Clean (Resize + Normalize only)
clean_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 5. CREATE DATASETS & SPLIT
# We create two references to the same data so we can apply different transforms
full_data_train = datasets.ImageFolder(root=TRUE_ROOT, transform=train_transform)
full_data_clean = datasets.ImageFolder(root=TRUE_ROOT, transform=clean_transform)

# Calculate Sizes (70/15/15)
total_size = len(full_data_train)
train_size = int(0.70 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

# Generate Reproducible Indices
generator = torch.Generator().manual_seed(SEED)
indices = torch.randperm(total_size, generator=generator).tolist()

train_idx = indices[:train_size]
val_idx = indices[train_size : train_size + val_size]
test_idx = indices[train_size + val_size :]

# Create Subsets (Train gets augs, Val/Test get clean)
train_data = Subset(full_data_train, train_idx)
val_data = Subset(full_data_clean, val_idx)
test_data = Subset(full_data_clean, test_idx)

# 6. DATALOADERS
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

# 7. VERIFICATION
print("-" * 30)
print(f"Classes: {full_data_train.classes}")
print(f"Split: Train({len(train_data)}) | Val({len(val_data)}) | Test({len(test_data)})")
print(f"Batch Shape: {next(iter(train_loader))[0].shape}")

Mounted at /content/drive
Extracting dataset...
Dataset Root Found: /content/dataset/DL_PROJECT_DATASET/Final_Dataset
------------------------------
Classes: ['building', 'car', 'lab', 'person', 'tree']
Split: Train(1654) | Val(354) | Test(355)
Batch Shape: torch.Size([32, 3, 224, 224])


In [None]:
import os
from PIL import Image
from tqdm import tqdm

# Allow loading the huge images one last time to resize them
Image.MAX_IMAGE_PIXELS = None

TARGET_SIZE = 500  # Reasonable size (larger than 224, but small enough to load fast)
dataset_root = '/content/dataset/DL_PROJECT_DATASET/Final_Dataset'

print(f"Sanitizing dataset at: {dataset_root}")

# Counters
resized_count = 0
corrupt_count = 0
total_count = 0

# Walk through all files
for root, dirs, files in os.walk(dataset_root):
    for file in tqdm(files, desc="Processing Images"):
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            total_count += 1
            file_path = os.path.join(root, file)

            try:
                # Open image
                with Image.open(file_path) as img:
                    # Check if it needs resizing (if either side is huge)
                    if img.width > TARGET_SIZE or img.height > TARGET_SIZE:
                        # Convert to RGB (fixes PNG transparency issues if any)
                        img = img.convert('RGB')

                        # Resize maintaining aspect ratio
                        img.thumbnail((TARGET_SIZE, TARGET_SIZE))

                        # Save back to same path
                        img.save(file_path)
                        resized_count += 1
            except Exception as e:
                print(f"Corrupt image found and removed: {file_path}")
                os.remove(file_path)
                corrupt_count += 1

print("\n" + "="*30)
print(f"Total Images Scanned: {total_count}")
print(f"Resized Huge Images: {resized_count}")
print(f"Deleted Corrupt Images: {corrupt_count}")
print("="*30)

Sanitizing dataset at: /content/dataset/DL_PROJECT_DATASET/Final_Dataset


Processing Images: 0it [00:00, ?it/s]
Processing Images: 100%|██████████| 573/573 [00:43<00:00, 13.17it/s]
Processing Images: 100%|██████████| 447/447 [01:35<00:00,  4.66it/s]
Processing Images: 100%|██████████| 419/419 [00:55<00:00,  7.54it/s]
Processing Images: 100%|██████████| 368/368 [00:28<00:00, 12.77it/s]
Processing Images: 100%|██████████| 556/556 [00:45<00:00, 12.23it/s]


Total Images Scanned: 2363
Resized Huge Images: 2362
Deleted Corrupt Images: 0





In [None]:
import time
import copy
import torch.nn as nn
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights

# --- CONFIGURATION ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 0.001
EPOCHS = 10
SAVE_PATH = '/content/drive/MyDrive/baseline_mobilenet_v2.pth'

# 1. BUILD MODEL
print(f"Building MobileNetV2 for {len(full_data_train.classes)} classes...")
model = mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)

# Freeze Feature Extractor (Transfer Learning Standard)
for param in model.features.parameters():
    param.requires_grad = False

# Replace Classifier (The only part we train)
# MobileNet's classifier is a Sequential block; index 1 is the Linear layer
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, len(full_data_train.classes))

model = model.to(DEVICE)

# 2. OPTIMIZER & LOSS
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=LEARNING_RATE)

# 3. TRAINING FUNCTION
def train_model(model, train_loader, val_loader, epochs):
    best_acc = 0.0
    best_weights = copy.deepcopy(model.state_dict())

    print(f"Training on {DEVICE}...")

    for epoch in range(epochs):
        start = time.time()

        # --- TRAIN ---
        model.train()
        train_loss, train_correct, train_total = 0, 0, 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)

        train_acc = train_correct / train_total
        avg_train_loss = train_loss / train_total

        # --- VALIDATE ---
        model.eval()
        val_correct, val_total = 0, 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_acc = val_correct / val_total

        # --- SAVE BEST ---
        if val_acc > best_acc:
            best_acc = val_acc
            best_weights = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), SAVE_PATH) # Save to Drive

        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Acc: {train_acc:.4f} | "
              f"Val Acc: {val_acc:.4f} | "
              f"Time: {time.time() - start:.1f}s")

    print(f"\nBest Val Acc: {best_acc:.4f}")
    print(f"Saved to: {SAVE_PATH}")

    model.load_state_dict(best_weights)
    return model

# 4. RUN
baseline_model = train_model(model, train_loader, val_loader, EPOCHS)

Building MobileNetV2 for 5 classes...
Training on cuda...
Epoch 1/10 | Train Acc: 0.7944 | Val Acc: 0.8842 | Time: 11.5s
Epoch 2/10 | Train Acc: 0.9160 | Val Acc: 0.9011 | Time: 11.5s
Epoch 3/10 | Train Acc: 0.9160 | Val Acc: 0.9096 | Time: 11.4s
Epoch 4/10 | Train Acc: 0.9274 | Val Acc: 0.9096 | Time: 10.7s
Epoch 5/10 | Train Acc: 0.9359 | Val Acc: 0.9181 | Time: 11.3s
Epoch 6/10 | Train Acc: 0.9389 | Val Acc: 0.9153 | Time: 11.3s
Epoch 7/10 | Train Acc: 0.9541 | Val Acc: 0.9294 | Time: 11.5s
Epoch 8/10 | Train Acc: 0.9377 | Val Acc: 0.9266 | Time: 11.3s
Epoch 9/10 | Train Acc: 0.9438 | Val Acc: 0.9237 | Time: 11.3s
Epoch 10/10 | Train Acc: 0.9480 | Val Acc: 0.9266 | Time: 10.8s

Best Val Acc: 0.9294
Saved to: /content/drive/MyDrive/baseline_mobilenet_v2.pth


In [None]:
from sklearn.metrics import classification_report
import numpy as np

def evaluate_baseline(model, loader):
    model.eval()
    all_preds = []
    all_labels = []

    # Load Best Saved Weights
    model.load_state_dict(torch.load(SAVE_PATH))
    print(f"Loaded weights from {SAVE_PATH}")

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    # Metrics
    print("\n" + "="*40)
    print("BASELINE MOBILENET V2 - TEST RESULTS")
    print("="*40)
    print(classification_report(all_labels, all_preds,
                                target_names=full_data_train.classes,
                                digits=4))

    # Calculate Overall Accuracy
    correct = sum([p == l for p, l in zip(all_preds, all_labels)])
    acc = correct / len(all_labels)
    print(f"Final Test Accuracy: {acc*100:.2f}%")
    return acc

baseline_acc = evaluate_baseline(baseline_model, test_loader)

Loaded weights from /content/drive/MyDrive/baseline_mobilenet_v2.pth

BASELINE MOBILENET V2 - TEST RESULTS
              precision    recall  f1-score   support

    building     0.9130    0.9130    0.9130        46
         car     0.9605    0.9359    0.9481        78
         lab     0.8451    1.0000    0.9160        60
      person     1.0000    0.8472    0.9173        72
        tree     0.9406    0.9596    0.9500        99

    accuracy                         0.9324       355
   macro avg     0.9318    0.9312    0.9289       355
weighted avg     0.9373    0.9324    0.9324       355

Final Test Accuracy: 93.24%


In [None]:
import torch
import torch.nn as nn
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights
import copy
import time

# --- 1. DEFINE CBAM BLOCKS ---
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

class CBAM(nn.Module):
    def __init__(self, planes):
        super(CBAM, self).__init__()
        self.ca = ChannelAttention(planes)
        self.sa = SpatialAttention()

    def forward(self, x):
        x = x * self.ca(x)
        x = x * self.sa(x)
        return x

# --- 2. ASSEMBLE MODEL ---
class MobileNetV2_CBAM(nn.Module):
    def __init__(self, num_classes):
        super(MobileNetV2_CBAM, self).__init__()
        # Load Baseline
        base = mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)
        self.features = base.features

        # Freeze Features (Keep the knowledge)
        for param in self.features.parameters():
            param.requires_grad = False

        # Add CBAM (1280 is the output channel size of MobileNetV2)
        self.cbam = CBAM(1280)

        # Classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(1280, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.cbam(x)          # <--- The Novelty
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# --- 3. INIT & TRAIN ---
print("Building MobileNetV2 + CBAM...")
cbam_model = MobileNetV2_CBAM(num_classes=len(full_data_train.classes)).to(DEVICE)

# Note: We must train the CBAM layers AND the classifier
# Parameters to update = CBAM + Classifier
params_to_update = list(cbam_model.cbam.parameters()) + list(cbam_model.classifier.parameters())

optimizer = torch.optim.Adam(params_to_update, lr=0.001)
# Reuse the same criterion from before
SAVE_PATH_CBAM = '/content/drive/MyDrive/mobilenet_v2_cbam.pth'

# Reuse the training function (It's generic!)
# We modify the save path logic inside the function or just copy-paste for safety
# Let's run a quick custom loop to be safe with the new save path
def train_cbam(model, epochs):
    best_acc = 0.0
    print(f"Training CBAM Model on {DEVICE}...")

    for epoch in range(epochs):
        model.train()
        train_correct, train_total = 0, 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)

        # Val
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_acc = val_correct / val_total
        print(f"Epoch {epoch+1} | Val Acc: {val_acc:.4f}")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), SAVE_PATH_CBAM)

    print(f"Best CBAM Val Acc: {best_acc:.4f}")

# RUN
train_cbam(cbam_model, epochs=10)

Building MobileNetV2 + CBAM...
Training CBAM Model on cuda...
Epoch 1 | Val Acc: 0.9011
Epoch 2 | Val Acc: 0.9096
Epoch 3 | Val Acc: 0.9011
Epoch 4 | Val Acc: 0.9237
Epoch 5 | Val Acc: 0.9237
Epoch 6 | Val Acc: 0.9379
Epoch 7 | Val Acc: 0.9350
Epoch 8 | Val Acc: 0.9350
Epoch 9 | Val Acc: 0.9350
Epoch 10 | Val Acc: 0.9294
Best CBAM Val Acc: 0.9379


In [None]:
# Reuse the evaluation logic but for the CBAM model
def evaluate_cbam(model, loader):
    model.eval()
    all_preds = []
    all_labels = []

    # Load Best Saved Weights
    model.load_state_dict(torch.load(SAVE_PATH_CBAM))
    print(f"Loaded weights from {SAVE_PATH_CBAM}")

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    # Metrics
    print("\n" + "="*40)
    print("NOVELTY A (CBAM) - TEST RESULTS")
    print("="*40)
    print(classification_report(all_labels, all_preds,
                                target_names=full_data_train.classes,
                                digits=4))

    correct = sum([p == l for p, l in zip(all_preds, all_labels)])
    acc = correct / len(all_labels)
    print(f"Final CBAM Test Accuracy: {acc*100:.2f}%")
    return acc

cbam_test_acc = evaluate_cbam(cbam_model, test_loader)

Loaded weights from /content/drive/MyDrive/mobilenet_v2_cbam.pth

NOVELTY A (CBAM) - TEST RESULTS
              precision    recall  f1-score   support

    building     0.8958    0.9348    0.9149        46
         car     0.9605    0.9359    0.9481        78
         lab     0.8955    1.0000    0.9449        60
      person     0.9844    0.8750    0.9265        72
        tree     0.9600    0.9697    0.9648        99

    accuracy                         0.9437       355
   macro avg     0.9393    0.9431    0.9398       355
weighted avg     0.9458    0.9437    0.9435       355

Final CBAM Test Accuracy: 94.37%


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights

# --- 1. DEFINE COORDINATE ATTENTION ---
class CoordAtt(nn.Module):
    def __init__(self, inp, oup, reduction=32):
        super(CoordAtt, self).__init__()
        # X and Y pooling
        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
        self.pool_w = nn.AdaptiveAvgPool2d((1, None))

        mip = max(8, inp // reduction)

        self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(mip)
        self.act = nn.Hardswish() # Modern activation, faster on mobile

        self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
        self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)


    def forward(self, x):
        identity = x
        n, c, h, w = x.size()

        # 1. Coordinate Pooling
        x_h = self.pool_h(x)
        x_w = self.pool_w(x).permute(0, 1, 3, 2) # Permute to allow concatenation

        # 2. Concatenate & Encode
        y = torch.cat([x_h, x_w], dim=2)
        y = self.conv1(y)
        y = self.bn1(y)
        y = self.act(y)

        # 3. Split back
        x_h, x_w = torch.split(y, [h, w], dim=2)
        x_w = x_w.permute(0, 1, 3, 2)

        # 4. Generate Weights
        a_h = self.conv_h(x_h).sigmoid()
        a_w = self.conv_w(x_w).sigmoid()

        # 5. Apply
        out = identity * a_w * a_h
        return out

# --- 2. ASSEMBLE MODEL (CA) ---
class MobileNetV2_CA(nn.Module):
    def __init__(self, num_classes):
        super(MobileNetV2_CA, self).__init__()
        base = mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)
        self.features = base.features

        # Freeze Base
        for param in self.features.parameters():
            param.requires_grad = False

        # Add Coordinate Attention (Input 1280)
        self.ca = CoordAtt(1280, 1280)

        # Classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(1280, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.ca(x)          # <--- The Novelty B
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# --- 3. INIT & TRAIN ---
print("Building MobileNetV2 + Coordinate Attention...")
ca_model = MobileNetV2_CA(num_classes=len(full_data_train.classes)).to(DEVICE)

# Update params list
params_to_update = list(ca_model.ca.parameters()) + list(ca_model.classifier.parameters())
optimizer = torch.optim.Adam(params_to_update, lr=0.001)

SAVE_PATH_CA = '/content/drive/MyDrive/mobilenet_v2_ca.pth'

# Custom Train Loop for CA
def train_ca(model, epochs):
    best_acc = 0.0
    print(f"Training CA Model on {DEVICE}...")

    for epoch in range(epochs):
        model.train()
        # Train Loop (Condensed)
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Val Loop
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        val_acc = correct / total
        print(f"Epoch {epoch+1} | Val Acc: {val_acc:.4f}")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), SAVE_PATH_CA)

    print(f"Best CA Val Acc: {best_acc:.4f}")

# RUN
train_ca(ca_model, epochs=10)

Building MobileNetV2 + Coordinate Attention...
Training CA Model on cuda...
Epoch 1 | Val Acc: 0.9040
Epoch 2 | Val Acc: 0.9209
Epoch 3 | Val Acc: 0.9209
Epoch 4 | Val Acc: 0.9322
Epoch 5 | Val Acc: 0.9407
Epoch 6 | Val Acc: 0.9350
Epoch 7 | Val Acc: 0.9350
Epoch 8 | Val Acc: 0.9407
Epoch 9 | Val Acc: 0.9322
Epoch 10 | Val Acc: 0.9435
Best CA Val Acc: 0.9435


In [None]:
from sklearn.metrics import classification_report

def evaluate_ca_fixed(model, loader):
    model.eval()
    all_preds = []
    all_labels = []

    # 1. Robustly get class names from the loader
    # (Works whether it's a Subset or a full Dataset)
    if hasattr(loader.dataset, 'classes'):
        class_names = loader.dataset.classes
    elif hasattr(loader.dataset, 'dataset') and hasattr(loader.dataset.dataset, 'classes'):
        class_names = loader.dataset.dataset.classes
    else:
        # Fallback if all else fails
        class_names = ['building', 'car', 'lab', 'person', 'tree']

    print(f"Evaluated on classes: {class_names}")

    # Load Weights
    model.load_state_dict(torch.load(SAVE_PATH_CA))

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    # Metrics
    print("\n" + "="*40)
    print("NOVELTY B (COORD ATTENTION) - TEST RESULTS")
    print("="*40)
    print(classification_report(all_labels, all_preds,
                                target_names=class_names,
                                digits=4))

    correct = sum([p == l for p, l in zip(all_preds, all_labels)])
    acc = correct / len(all_labels)
    print(f"Final CA Test Accuracy: {acc*100:.2f}%")
    return acc

ca_test_acc = evaluate_ca_fixed(ca_model, test_loader)

Evaluated on classes: ['building', 'car', 'lab', 'person', 'tree']

NOVELTY B (COORD ATTENTION) - TEST RESULTS
              precision    recall  f1-score   support

    building     0.8958    0.9348    0.9149        46
         car     0.9868    0.9615    0.9740        78
         lab     0.9677    1.0000    0.9836        60
      person     0.9861    0.9861    0.9861        72
        tree     0.9794    0.9596    0.9694        99

    accuracy                         0.9690       355
   macro avg     0.9632    0.9684    0.9656       355
weighted avg     0.9696    0.9690    0.9691       355

Final CA Test Accuracy: 96.90%


In [None]:
import torch
import torch.nn as nn
from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights

# --- 1. CONFIGURATION ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 0.001
EPOCHS = 10
SAVE_PATH_V3 = '/content/drive/MyDrive/mobilenet_v3_ref.pth'
NUM_CLASSES = 5  # Hardcoded for stability

# --- 2. BUILD MODEL ---
print("Building MobileNetV3 (Large) Reference...")
v3_model = mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.DEFAULT)

# Freeze Features
for param in v3_model.features.parameters():
    param.requires_grad = False

# Modify Classifier
in_features = v3_model.classifier[-1].in_features
v3_model.classifier[-1] = nn.Linear(in_features, NUM_CLASSES)

v3_model = v3_model.to(DEVICE)

# --- 3. TRAIN ---
optimizer = torch.optim.Adam(v3_model.classifier.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

def train_and_evaluate_v3():
    best_acc = 0.0
    print(f"Training MobileNetV3 on {DEVICE}...")

    # TRAIN LOOP
    for epoch in range(EPOCHS):
        v3_model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = v3_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # VAL LOOP
        v3_model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = v3_model(inputs)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        val_acc = correct / total
        print(f"Epoch {epoch+1} | Val Acc: {val_acc:.4f}")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(v3_model.state_dict(), SAVE_PATH_V3)

    print(f"Best V3 Val Acc: {best_acc:.4f}")

    # TEST LOOP (Immediate)
    print("\nEvaluating V3 on Test Set...")
    v3_model.load_state_dict(torch.load(SAVE_PATH_V3)) # Load best
    v3_model.eval()

    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = v3_model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    final_acc = correct / total
    print(f"Final MobileNetV3 Test Accuracy: {final_acc*100:.2f}%")
    return final_acc

# RUN
v3_test_acc = train_and_evaluate_v3()

Building MobileNetV3 (Large) Reference...
Training MobileNetV3 on cuda...
Epoch 1 | Val Acc: 0.8672
Epoch 2 | Val Acc: 0.9266
Epoch 3 | Val Acc: 0.9322
Epoch 4 | Val Acc: 0.9463
Epoch 5 | Val Acc: 0.9350
Epoch 6 | Val Acc: 0.9463
Epoch 7 | Val Acc: 0.9350
Epoch 8 | Val Acc: 0.9350
Epoch 9 | Val Acc: 0.9350
Epoch 10 | Val Acc: 0.9266
Best V3 Val Acc: 0.9463

Evaluating V3 on Test Set...
Final MobileNetV3 Test Accuracy: 96.34%


In [None]:
import torch
import time
import numpy as np
import torch.nn as nn
from torchvision.models import mobilenet_v2, mobilenet_v3_large

# --- CONFIGURATION ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 5
DUMMY_INPUT = torch.randn(1, 3, 224, 224).to(DEVICE) # Simulates 1 image
PATHS = {
    'Baseline (V2)': '/content/drive/MyDrive/baseline_mobilenet_v2.pth',
    'Novelty A (CBAM)': '/content/drive/MyDrive/mobilenet_v2_cbam.pth',
    'Novelty B (CA)': '/content/drive/MyDrive/mobilenet_v2_ca.pth',
    'Reference (V3)': '/content/drive/MyDrive/mobilenet_v3_ref.pth'
}

# --- RE-DEFINE ARCHITECTURES (Required for loading) ---
# 1. CBAM Components
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) +
                            self.fc2(self.relu1(self.fc1(self.max_pool(x)))))

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.conv1(torch.cat([torch.mean(x, dim=1, keepdim=True),
                                                  torch.max(x, dim=1, keepdim=True)[0]], dim=1)))

class CBAM(nn.Module):
    def __init__(self, planes):
        super(CBAM, self).__init__()
        self.ca = ChannelAttention(planes)
        self.sa = SpatialAttention()
    def forward(self, x):
        return x * self.ca(x) * self.sa(x)

class MobileNetV2_CBAM(nn.Module):
    def __init__(self):
        super(MobileNetV2_CBAM, self).__init__()
        base = mobilenet_v2()
        self.features = base.features
        self.cbam = CBAM(1280)
        self.classifier = nn.Sequential(nn.Dropout(0.2), nn.Linear(1280, NUM_CLASSES))
    def forward(self, x):
        x = self.features(x)
        x = self.cbam(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = self.classifier(torch.flatten(x, 1))
        return x

# 2. CoordAtt Components
class CoordAtt(nn.Module):
    def __init__(self, inp, oup, reduction=32):
        super(CoordAtt, self).__init__()
        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
        self.pool_w = nn.AdaptiveAvgPool2d((1, None))
        mip = max(8, inp // reduction)
        self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(mip)
        self.act = nn.Hardswish()
        self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
        self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
    def forward(self, x):
        identity = x
        n, c, h, w = x.size()
        x_h = self.pool_h(x)
        x_w = self.pool_w(x).permute(0, 1, 3, 2)
        y = torch.cat([x_h, x_w], dim=2)
        y = self.act(self.bn1(self.conv1(y)))
        x_h, x_w = torch.split(y, [h, w], dim=2)
        x_w = x_w.permute(0, 1, 3, 2)
        return identity * self.conv_h(x_h).sigmoid() * self.conv_w(x_w).sigmoid()

class MobileNetV2_CA(nn.Module):
    def __init__(self):
        super(MobileNetV2_CA, self).__init__()
        base = mobilenet_v2()
        self.features = base.features
        self.ca = CoordAtt(1280, 1280)
        self.classifier = nn.Sequential(nn.Dropout(0.2), nn.Linear(1280, NUM_CLASSES))
    def forward(self, x):
        x = self.features(x)
        x = self.ca(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = self.classifier(torch.flatten(x, 1))
        return x

# --- HELPER TO BUILD AND LOAD ---
def load_model(name):
    if 'Baseline' in name:
        m = mobilenet_v2()
        m.classifier[1] = nn.Linear(m.classifier[1].in_features, NUM_CLASSES)
    elif 'CBAM' in name:
        m = MobileNetV2_CBAM()
    elif 'CA' in name:
        m = MobileNetV2_CA()
    elif 'V3' in name:
        m = mobilenet_v3_large()
        m.classifier[-1] = nn.Linear(m.classifier[-1].in_features, NUM_CLASSES)

    # Load weights safely
    try:
        m.load_state_dict(torch.load(PATHS[name], map_location=DEVICE))
        m.to(DEVICE)
        m.eval()
        return m
    except FileNotFoundError:
        print(f"Warning: Could not find weights for {name}")
        return None

# --- SPEED TEST ---
print(f"{'Model Name':<20} | {'Parameters':<10} | {'Inference (ms)':<15} | {'FPS':<10}")
print("-" * 65)

for name in PATHS.keys():
    model = load_model(name)
    if model is None: continue

    # 1. Count Params
    params = sum(p.numel() for p in model.parameters())

    # 2. Warmup (GPU needs this)
    for _ in range(20):
        _ = model(DUMMY_INPUT)

    # 3. Timed Run
    start = time.time()
    for _ in range(100): # Run 100 times
        with torch.no_grad():
            _ = model(DUMMY_INPUT)
    end = time.time()

    total_time = end - start
    avg_time_ms = (total_time / 100) * 1000
    fps = 100 / total_time

    print(f"{name:<20} | {params/1e6:.2f}M       | {avg_time_ms:.2f} ms        | {fps:.0f}")

Model Name           | Parameters | Inference (ms)  | FPS       
-----------------------------------------------------------------
Baseline (V2)        | 2.23M       | 11.43 ms        | 87
Novelty A (CBAM)     | 2.44M       | 5.94 ms        | 168
Novelty B (CA)       | 2.39M       | 7.08 ms        | 141
Reference (V3)       | 4.21M       | 8.48 ms        | 118


In [None]:
import torch
import torch.nn as nn
import time
import numpy as np
from torchvision.models import mobilenet_v2, mobilenet_v3_large

# --- CONFIGURATION ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 5
PATHS = {
    'Baseline (V2)': '/content/drive/MyDrive/baseline_mobilenet_v2.pth',
    'Novelty A (CBAM)': '/content/drive/MyDrive/mobilenet_v2_cbam.pth',
    'Novelty B (CA)': '/content/drive/MyDrive/mobilenet_v2_ca.pth',
    'Reference (V3)': '/content/drive/MyDrive/mobilenet_v3_ref.pth'
}

# --- ARCHITECTURE DEFINITIONS (Needed to load weights) ---
class CBAM(nn.Module):
    def __init__(self, planes):
        super(CBAM, self).__init__()
        self.ca = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Conv2d(planes, planes//16, 1), nn.ReLU(), nn.Conv2d(planes//16, planes, 1), nn.Sigmoid())
        self.sa = nn.Sequential(nn.Conv2d(2, 1, 7, padding=3), nn.Sigmoid())
    def forward(self, x):
        # Simplified Forward for brevity
        ca_out = self.ca(x) * x # Note: Simplified implementation logic for loading
        # Re-using the exact classes from previous steps is safer, but let's stick to structure
        # actually, to avoid key mismatch, we must use EXACT definitions used in training.
        # I will rely on the fact that we define the classes below exactly as before.
        return x

# REDEFINE EXACT CLASSES USED IN TRAINING
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) + self.fc2(self.relu1(self.fc1(self.max_pool(x)))))

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.conv1(torch.cat([torch.mean(x, dim=1, keepdim=True), torch.max(x, dim=1, keepdim=True)[0]], dim=1)))

class CBAM_Block(nn.Module):
    def __init__(self, planes):
        super(CBAM_Block, self).__init__()
        self.ca = ChannelAttention(planes)
        self.sa = SpatialAttention()
    def forward(self, x):
        return x * self.ca(x) * self.sa(x)

class MobileNetV2_CBAM(nn.Module):
    def __init__(self):
        super(MobileNetV2_CBAM, self).__init__()
        base = mobilenet_v2()
        self.features = base.features
        self.cbam = CBAM_Block(1280)
        self.classifier = nn.Sequential(nn.Dropout(0.2), nn.Linear(1280, NUM_CLASSES))
    def forward(self, x):
        x = self.features(x)
        x = self.cbam(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = self.classifier(torch.flatten(x, 1))
        return x

class CoordAtt(nn.Module):
    def __init__(self, inp, oup, reduction=32):
        super(CoordAtt, self).__init__()
        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
        self.pool_w = nn.AdaptiveAvgPool2d((1, None))
        mip = max(8, inp // reduction)
        self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(mip)
        self.act = nn.Hardswish()
        self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
        self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
    def forward(self, x):
        identity = x
        n, c, h, w = x.size()
        x_h = self.pool_h(x)
        x_w = self.pool_w(x).permute(0, 1, 3, 2)
        y = torch.cat([x_h, x_w], dim=2)
        y = self.act(self.bn1(self.conv1(y)))
        x_h, x_w = torch.split(y, [h, w], dim=2)
        x_w = x_w.permute(0, 1, 3, 2)
        return identity * self.conv_h(x_h).sigmoid() * self.conv_w(x_w).sigmoid()

class MobileNetV2_CA(nn.Module):
    def __init__(self):
        super(MobileNetV2_CA, self).__init__()
        base = mobilenet_v2()
        self.features = base.features
        self.ca = CoordAtt(1280, 1280)
        self.classifier = nn.Sequential(nn.Dropout(0.2), nn.Linear(1280, NUM_CLASSES))
    def forward(self, x):
        x = self.features(x)
        x = self.ca(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = self.classifier(torch.flatten(x, 1))
        return x

# --- METRIC GATHERING ---
def get_metrics(name, path, loader):
    # 1. Load Model
    if 'Baseline' in name:
        model = mobilenet_v2()
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, NUM_CLASSES)
    elif 'CBAM' in name:
        model = MobileNetV2_CBAM()
    elif 'CA' in name:
        model = MobileNetV2_CA()
    elif 'V3' in name:
        model = mobilenet_v3_large()
        model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, NUM_CLASSES)

    try:
        model.load_state_dict(torch.load(path, map_location=DEVICE))
    except:
        return None

    model.to(DEVICE)
    model.eval()

    # 2. Parameters
    params = sum(p.numel() for p in model.parameters()) / 1e6

    # 3. Accuracy
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = 100 * correct / total

    # 4. Inference Speed
    dummy = torch.randn(1, 3, 224, 224).to(DEVICE)
    # Warmup
    for _ in range(50): _ = model(dummy)

    # Timing
    start = time.time()
    for _ in range(200): # 200 runs for stability
        with torch.no_grad(): _ = model(dummy)
    end = time.time()

    avg_ms = ((end - start) / 200) * 1000
    fps = 200 / (end - start)

    return params, acc, avg_ms, fps

# --- MAIN EXECUTION ---
print(f"{'Model':<20} | {'Params (M)':<10} | {'Test Acc (%)':<12} | {'Time (ms)':<10} | {'FPS':<5}")
print("-" * 70)

for name, path in PATHS.items():
    res = get_metrics(name, path, test_loader)
    if res:
        params, acc, ms, fps = res
        print(f"{name:<20} | {params:<10.2f} | {acc:<12.2f} | {ms:<10.2f} | {fps:<5.0f}")

Model                | Params (M) | Test Acc (%) | Time (ms)  | FPS  
----------------------------------------------------------------------
Baseline (V2)        | 2.23       | 93.24        | 6.73       | 149  
Novelty A (CBAM)     | 2.44       | 94.37        | 5.26       | 190  
Novelty B (CA)       | 2.39       | 96.90        | 5.35       | 187  
Reference (V3)       | 4.21       | 96.34        | 6.01       | 166  
