# Unified-OneHead Multi-Task Challenge

In [1]:
# Upload File
from google.colab import files
uploaded = files.upload()


Saving hw2.zip to hw2.zip


In [2]:
# Unzip File
import zipfile
with zipfile.ZipFile('hw2.zip', 'r') as zip_ref:
    zip_ref.extractall('./hw2_data')


In [3]:
# Import Libraries
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from PIL import Image
from pycocotools.coco import COCO
import numpy as np
from sklearn.metrics import accuracy_score
import timm
import time

In [4]:
# Remapping COCO Dataset
COCO_CLASSES = {
    1: 0, 2: 1, 3: 2, 4: 3, 5: 4,
    6: 5, 7: 6, 8: 7, 9: 8, 10: 9
    }


In [5]:
# Classification Transforms
classify_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225])
])

# Classification Dataset Loader
def get_classification_datasets(train_dir, val_dir, transform):
    train_dataset = ImageFolder(root=train_dir, transform=transform)
    val_dataset = ImageFolder(root=val_dir, transform=transform)
    return train_dataset, val_dataset

# Segmentation Dataset
class VOCDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None, target_size=(224, 224)):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.target_size = target_size
        self.images = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.images[idx].replace('.jpg', '.png'))

        img = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path)

        img = img.resize(self.target_size)
        mask = mask.resize(self.target_size, Image.NEAREST)

        if self.transform:
            img = self.transform(img)
        mask = torch.tensor(np.array(mask), dtype=torch.long)

        return img, mask

# Detection Dataset
class COCODataset(Dataset):
    def __init__(self, img_dir, ann_path, transform=None):
        self.img_dir = img_dir
        self.coco = COCO(ann_path)
        self.img_ids = list(self.coco.imgs.keys())
        self.transform = transform

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)

        path = self.coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.img_dir, path)).convert('RGB')

        boxes = []
        labels = []
        for ann in anns:
            label = COCO_CLASSES.get(ann['category_id'], None)
            if label is None:
                continue
            bbox = ann['bbox']
            scale = 224.0 / 512.0
            bbox = [b * scale for b in bbox]

            boxes.append(bbox)
            labels.append(label)

        if self.transform:
            img = self.transform(img)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.long)

        return img, {'boxes': boxes, 'labels': labels}



In [6]:
# Setup Dataset Paths
train_path = './hw2_data/hw2/imagenette_160/train'
val_path = './hw2_data/hw2/imagenette_160/val'

seg_image_dir = './hw2_data/hw2/mini_voc_seg/train/images'
seg_mask_dir = './hw2_data/hw2/mini_voc_seg/train/masks'

det_image_dir = './hw2_data/hw2/mini_coco_det/train'
det_ann_path = './hw2_data/hw2/mini_coco_det/train/instances_train.json'

# Transforms for segmentation & detection
seg_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

det_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

def detection_collate_fn(batch):
    images = []
    targets = []
    for img, target in batch:
        images.append(img)
        targets.append(target)
    return torch.stack(images, 0), targets

# Load Datasets
train_set_cls, val_set_cls = get_classification_datasets(train_path, val_path, classify_transform)
train_loader_cls = DataLoader(train_set_cls, batch_size=8, shuffle=True, num_workers=2)
val_loader_cls = DataLoader(val_set_cls, batch_size=8, shuffle=False, num_workers=2)

seg_dataset = VOCDataset(seg_image_dir, seg_mask_dir, transform=seg_transform)
seg_loader = DataLoader(seg_dataset, batch_size=4, shuffle=True, num_workers=2)

det_dataset = COCODataset(det_image_dir, det_ann_path, transform=det_transform)
det_loader = DataLoader(det_dataset, batch_size=16, shuffle=True, collate_fn=detection_collate_fn)

# Checking Dataset
print(f"Classification Classes: {train_set_cls.classes}")
print(f"Segmentation Dataset Size: {len(seg_dataset)}")
print(f"Detection Dataset Size: {len(det_dataset)}")


loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
Classification Classes: ['n01440764', 'n02102040', 'n02979186', 'n03000684', 'n03028079', 'n03394916', 'n03417042', 'n03425413', 'n03445777', 'n03888257']
Segmentation Dataset Size: 240
Detection Dataset Size: 240


In [7]:
# Model Building
class UnifiedHead(nn.Module):
    def __init__(self, in_ch, num_classes=10, seg_classes=21, det_classes=10):
        super().__init__()
        self.shared = nn.Sequential(
            nn.Conv2d(in_ch, 128, 3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )
        self.det_ch = 5 + det_classes
        self.seg_ch = seg_classes
        self.cls_ch = num_classes
        self.out_channels = self.det_ch + self.seg_ch + self.cls_ch
        self.output = nn.Conv2d(128, self.out_channels, 1)

    def forward(self, x):
        x = self.shared(x)
        out = self.output(x)

        det_out = out[:, :self.det_ch, :, :]
        seg_out = out[:, self.det_ch:self.det_ch + self.seg_ch, :, :]
        cls_map = out[:, self.det_ch + self.seg_ch:, :, :]

        # Classification logits by global average pooling spatially
        cls_out = cls_map.mean(dim=(2, 3))

        return seg_out, det_out, cls_out


class UnifiedModel(nn.Module):
    def __init__(self, num_classes=10, seg_classes=21, det_classes=10):
        super().__init__()
        self.backbone = timm.create_model('mobilenetv3_small_100', pretrained=True, features_only=True)
        ch = self.backbone.feature_info.channels()[-1]
        self.head = UnifiedHead(in_ch=ch, num_classes=num_classes, seg_classes=seg_classes, det_classes=det_classes)

    def forward(self, x):
        feat = self.backbone(x)[-1]
        return self.head(feat)


In [8]:
# Loss Detection
def yolo_detection_loss(pred, targets, C=10, lambda_coord=5.0, lambda_noobj=0.5):
    B, _, S, _ = pred.shape
    pred = pred.permute(0, 2, 3, 1)
    total_loss = 0.0

    for i in range(B):
        pred_grid = pred[i]
        target = targets[i]
        boxes = target['boxes']
        labels = target['labels']

        obj_mask = torch.zeros((S, S), dtype=torch.bool)
        coord_loss, conf_loss, cls_loss, noobj_loss = 0.0, 0.0, 0.0, 0.0

        for j in range(len(boxes)):
            cx, cy, w, h = boxes[j]
            cx_idx = int(cx / 224 * S)
            cy_idx = int(cy / 224 * S)

            if not (0 <= cx_idx < S and 0 <= cy_idx < S):
                continue

            obj_mask[cy_idx, cx_idx] = True

            # Predicted values
            cell = pred_grid[cy_idx, cx_idx]
            pred_box = cell[:4]
            pred_conf = cell[4]
            pred_cls = cell[5:]

            # Ground truth relative to cell center
            cell_cx = ((cx / 224) * S) - cx_idx
            cell_cy = ((cy / 224) * S) - cy_idx
            gt_box = torch.tensor([cell_cx, cell_cy, w / 224, h / 224], device=pred.device)

            # Losses
            coord_loss += F.mse_loss(pred_box, gt_box)
            conf_loss += F.binary_cross_entropy_with_logits(pred_conf, torch.tensor(1.0).to(pred_conf.device))
            cls_loss += F.cross_entropy(pred_cls.unsqueeze(0), labels[j].unsqueeze(0))

        # No-object confidence loss
        for y in range(S):
            for x in range(S):
                if obj_mask[y, x]:
                    continue
                pred_conf = torch.sigmoid(pred_grid[y, x, 4])
                noobj_loss += F.binary_cross_entropy(pred_conf, torch.tensor(0.0).to(pred_conf.device))

        total_loss += lambda_coord * coord_loss + cls_loss + conf_loss + lambda_noobj * noobj_loss

    return total_loss / B


In [9]:
# Define Elastic Weight Consolidation
class EWC:
    def __init__(self, model):
        self.model = model
        self.params = {n: p.clone().detach() for n, p in model.named_parameters() if p.requires_grad}
        self.fisher = {n: torch.zeros_like(p) for n, p in model.named_parameters() if p.requires_grad}

    def compute_fisher(self, dataloader, task, device):
        self.model.eval()
        for img, target in dataloader:
            img = img.to(device)
            self.model.zero_grad()
            seg_out, det_out, cls_out = self.model(img)

            if task == 'seg':
                target = target.to(device)
                seg_out = F.interpolate(seg_out, size=target.shape[1:], mode='bilinear', align_corners=False)
                loss = F.cross_entropy(seg_out, target, ignore_index=255)
            elif task == 'cls':
                target = target.to(device)
                loss = F.cross_entropy(cls_out, target)
            else:
                loss = yolo_detection_loss(det_out, target)

            loss.backward()
            for n, p in self.model.named_parameters():
                if p.grad is not None:
                    self.fisher[n] += p.grad.data ** 2
        for n in self.fisher:
            self.fisher[n] /= len(dataloader)

    def penalty(self, model):
        loss = 0
        for n, p in model.named_parameters():
            if n in self.fisher:
                loss += (self.fisher[n] * (p - self.params[n]) ** 2).sum()
        return loss


In [12]:
# Parameters Count
model = UnifiedModel().to('cpu')

def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Total parameters:", count_parameters(model))
print("Trainable parameters:", count_trainable_parameters(model))




Total parameters: 1744462
Trainable parameters: 1744462


In [15]:
# Inference Time
model.eval()
dummy_input = torch.randn(1, 3, 512, 512).to('cpu')
with torch.no_grad():
    for _ in range(10):
        _ = model(dummy_input)

# Timing
start = time.time()
with torch.no_grad():
    for _ in range(30):
        _ = model(dummy_input)
end = time.time()

avg_inference_time = (end - start) / 30 * 1000
print(f"Average inference time: {avg_inference_time:.2f} ms")


Average inference time: 55.09 ms


In [10]:
# Training Setup
def train(model, dataloader, task, optimizer, device, epochs=30, ewc=None, lambda_ewc=1.0 or 2.0):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        preds_cls, labels_cls = [], []
        miou_sum = 0
        n_samples = 0

        for batch in dataloader:
            optimizer.zero_grad()

            if task == 'cls':
                inputs, targets = batch
                inputs, targets = inputs.to(device), targets.to(device)
                seg_out, det_out, cls_out = model(inputs)
                loss = F.cross_entropy(cls_out, targets)
                preds_cls.append(cls_out.argmax(dim=1).cpu())
                labels_cls.append(targets.cpu())

            elif task == 'seg':
                inputs, targets = batch
                inputs, targets = inputs.to(device), targets.to(device)
                seg_out, det_out, cls_out = model(inputs)
                seg_out = F.interpolate(seg_out, size=targets.shape[1:], mode='bilinear', align_corners=False)
                loss = F.cross_entropy(seg_out, targets, ignore_index=255)
                # Calculate mIoU on CPU
                miou_sum += compute_miou(seg_out.detach().cpu(), targets.cpu())
                n_samples += 1

            elif task == 'det':
                inputs, targets = batch
                inputs = inputs.to(device)
                seg_out, det_out, cls_out = model(inputs)
                # Placeholder loss, detection target processing needed
                loss = yolo_detection_loss(det_out, targets)


            else:
                raise ValueError("Task must be one of 'cls', 'seg', 'det'")

            if ewc is not None:
                ewc_loss = ewc.penalty(model)
                loss += lambda_ewc * ewc_loss

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{epochs}, Task: {task}, Loss: {avg_loss:.4f}")

        if task == 'cls':
            acc = accuracy_score(torch.cat(labels_cls), torch.cat(preds_cls))
            print(f"Accuracy: {acc:.4f}")
        elif task == 'seg':
            print(f"mIoU (batch average): {miou_sum / n_samples:.4f}")

def compute_miou(pred_logits, true_masks):
    preds = pred_logits.argmax(dim=1)
    num_classes = pred_logits.shape[1]
    ious = []
    for cls in range(num_classes):
        pred_cls = (preds == cls)
        true_cls = (true_masks == cls)
        intersection = (pred_cls & true_cls).sum().item()
        union = (pred_cls | true_cls).sum().item()
        if union == 0:
            continue
        ious.append(intersection / union)
    if len(ious) == 0:
        return 0
    return sum(ious) / len(ious)


In [11]:
# Baseline Training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = UnifiedModel().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
print("\n=== Training Classification Baseline ===")
train(model, train_loader_cls, task='cls', optimizer=optimizer, device=device, epochs=30)
torch.save(model.state_dict(), "baseline_cls.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
print("\n=== Training Segmentation Baseline ===")
train(model, seg_loader, task='seg', optimizer=optimizer, device=device, epochs=30)
torch.save(model.state_dict(), "baseline_seg.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
print("\n=== Training Detection Baseline ===")
train(model, det_loader, task='det', optimizer=optimizer, device=device, epochs=15)
torch.save(model.state_dict(), "baseline_det.pth")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/10.2M [00:00<?, ?B/s]




=== Training Classification Baseline ===
Epoch 1/30, Task: cls, Loss: 1.8286
Accuracy: 0.4417
Epoch 2/30, Task: cls, Loss: 1.2939
Accuracy: 0.6125
Epoch 3/30, Task: cls, Loss: 1.0838
Accuracy: 0.6917
Epoch 4/30, Task: cls, Loss: 0.8709
Accuracy: 0.7875
Epoch 5/30, Task: cls, Loss: 0.6360
Accuracy: 0.8583
Epoch 6/30, Task: cls, Loss: 0.5624
Accuracy: 0.8708
Epoch 7/30, Task: cls, Loss: 0.5900
Accuracy: 0.8625
Epoch 8/30, Task: cls, Loss: 0.5283
Accuracy: 0.8667
Epoch 9/30, Task: cls, Loss: 0.4820
Accuracy: 0.8958
Epoch 10/30, Task: cls, Loss: 0.4123
Accuracy: 0.9042
Epoch 11/30, Task: cls, Loss: 0.3950
Accuracy: 0.8917
Epoch 12/30, Task: cls, Loss: 0.3903
Accuracy: 0.8833
Epoch 13/30, Task: cls, Loss: 0.3761
Accuracy: 0.8958
Epoch 14/30, Task: cls, Loss: 0.3995
Accuracy: 0.8792
Epoch 15/30, Task: cls, Loss: 0.4517
Accuracy: 0.8833
Epoch 16/30, Task: cls, Loss: 0.2703
Accuracy: 0.9375
Epoch 17/30, Task: cls, Loss: 0.2333
Accuracy: 0.9542
Epoch 18/30, Task: cls, Loss: 0.2365
Accuracy: 0.

In [12]:
# Training with EWC
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = UnifiedModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

print("\n=== Training Classification Task (with EWC) ===")
train(model, train_loader_cls, task='cls', optimizer=optimizer, device=device, epochs=30)
ewc = EWC(model)
ewc.compute_fisher(train_loader_cls, task='cls', device=device)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
print("\n=== Training Segmentation Task (with EWC) ===")
train(model, seg_loader, task='seg', optimizer=optimizer, device=device, epochs=30, ewc=ewc, lambda_ewc=3.0)
ewc.compute_fisher(seg_loader, task='seg', device=device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
print("\n=== Training Detection Task (with EWC, Placeholder) ===")
train(model, det_loader, task='det', optimizer=optimizer, device=device, epochs=15, ewc=ewc, lambda_ewc=5.0)

save_path = "./unified_model.pth"
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")


Using device: cpu





=== Training Classification Task (with EWC) ===
Epoch 1/30, Task: cls, Loss: 1.8433
Accuracy: 0.4125
Epoch 2/30, Task: cls, Loss: 1.3190
Accuracy: 0.6208
Epoch 3/30, Task: cls, Loss: 1.1305
Accuracy: 0.6833
Epoch 4/30, Task: cls, Loss: 0.8393
Accuracy: 0.8125
Epoch 5/30, Task: cls, Loss: 0.6797
Accuracy: 0.8333
Epoch 6/30, Task: cls, Loss: 0.5445
Accuracy: 0.8833
Epoch 7/30, Task: cls, Loss: 0.5569
Accuracy: 0.8792
Epoch 8/30, Task: cls, Loss: 0.5548
Accuracy: 0.8625
Epoch 9/30, Task: cls, Loss: 0.5768
Accuracy: 0.8500
Epoch 10/30, Task: cls, Loss: 0.6628
Accuracy: 0.8167
Epoch 11/30, Task: cls, Loss: 0.4077
Accuracy: 0.8875
Epoch 12/30, Task: cls, Loss: 0.3819
Accuracy: 0.9083
Epoch 13/30, Task: cls, Loss: 0.3447
Accuracy: 0.9083
Epoch 14/30, Task: cls, Loss: 0.3139
Accuracy: 0.9167
Epoch 15/30, Task: cls, Loss: 0.3378
Accuracy: 0.9125
Epoch 16/30, Task: cls, Loss: 0.2426
Accuracy: 0.9625
Epoch 17/30, Task: cls, Loss: 0.1962
Accuracy: 0.9625
Epoch 18/30, Task: cls, Loss: 0.1251
Accur

In [13]:
# Prediction Decoder
def decode_predictions(det_out, conf_thresh=0.01, S=7, num_classes=10):
    B, _, H, W = det_out.shape
    assert H == W == S
    pred = det_out.permute(0, 2, 3, 1)  # [B, S, S, 5+C]
    decoded = []

    for b in range(B):
        boxes = []
        scores = []
        labels = []
        for i in range(S):
            for j in range(S):
                cell = pred[b, i, j]
                cx, cy, w, h = cell[:4]
                obj_logit = cell[4]
                class_scores = F.softmax(cell[5:], dim=0)

                # Apply activations
                obj_conf = torch.sigmoid(obj_logit)
                cx = torch.sigmoid(cx)
                cy = torch.sigmoid(cy)
                w = torch.exp(w) if w < 10 else torch.tensor(1.0)  # safety
                h = torch.exp(h) if h < 10 else torch.tensor(1.0)

                # Compute box center in image space
                cx_abs = (j + cx.item()) / S * 224
                cy_abs = (i + cy.item()) / S * 224
                w_abs = w.item() * 224
                h_abs = h.item() * 224

                x1 = cx_abs - w_abs / 2
                y1 = cy_abs - h_abs / 2
                x2 = cx_abs + w_abs / 2
                y2 = cy_abs + h_abs / 2

                score, cls = class_scores.max(0)
                final_score = (score * obj_conf).item()

                if final_score > conf_thresh:
                    boxes.append([x1, y1, x2, y2])
                    scores.append(final_score)
                    labels.append(cls.item())

        decoded.append({
            'boxes': torch.tensor(boxes),
            'scores': torch.tensor(scores),
            'labels': torch.tensor(labels)
        })

    return decoded


In [14]:
# IoU Computation
def compute_iou(box1, box2):
    # box: [x1, y1, x2, y2]
    xA = max(box1[0], box2[0])
    yA = max(box1[1], box2[1])
    xB = min(box1[2], box2[2])
    yB = min(box1[3], box2[3])

    inter_area = max(0, xB - xA) * max(0, yB - yA)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    union = box1_area + box2_area - inter_area
    return inter_area / union if union > 0 else 0.0
def evaluate_map(preds, targets, iou_thresh=0.3):
    all_true = 0
    all_pred = 0
    true_positives = 0

    for pred, target in zip(preds, targets):
        pred_boxes = pred['boxes']
        pred_labels = pred['labels']
        gt_boxes = target['boxes']
        gt_labels = target['labels']

        matched = set()
        for i, p_box in enumerate(pred_boxes):
            p_label = pred_labels[i]
            found_match = False
            for j, gt_box in enumerate(gt_boxes):
                if j in matched:
                    continue
                if p_label != gt_labels[j]:
                    continue
                iou = compute_iou(p_box.tolist(), gt_box.tolist())
                if iou >= iou_thresh:
                    true_positives += 1
                    matched.add(j)
                    found_match = True
                    break
        all_true += len(gt_boxes)
        all_pred += len(pred_boxes)

    precision = true_positives / all_pred if all_pred > 0 else 0
    recall = true_positives / all_true if all_true > 0 else 0
    return precision, recall, precision


In [15]:
# Detection Validation Dataset
det_val_image_dir = './hw2_data/hw2/mini_coco_det/val'
det_val_ann_path = './hw2_data/hw2/mini_coco_det/val/instances_val.json'

det_val_dataset = COCODataset(det_val_image_dir, det_val_ann_path, transform=det_transform)
val_loader_det = DataLoader(det_val_dataset, batch_size=8, shuffle=False, collate_fn=detection_collate_fn)

# Segmentation Validation Dataset
seg_val_image_dir = './hw2_data/hw2/mini_voc_seg/val/images'
seg_val_mask_dir = './hw2_data/hw2/mini_voc_seg/val/masks'

val_seg_dataset = VOCDataset(seg_val_image_dir, seg_val_mask_dir, transform=seg_transform)
val_loader_seg = DataLoader(val_seg_dataset, batch_size=4, shuffle=False, num_workers=2)

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!


In [19]:
# Classification Evaluation Function
def evaluate_classification(model_path, val_loader, device):
    model = UnifiedModel().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    preds, labels = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            _, _, cls_out = model(x)
            preds.append(cls_out.argmax(1).cpu())
            labels.append(y.cpu())

    acc = accuracy_score(torch.cat(labels), torch.cat(preds))
    print(f"[{os.path.basename(model_path)}] Top-1 Accuracy: {acc:.4f}")
    return acc


In [20]:
# Segmentation Evaluation Function
def evaluate_segmentation(model_path, val_loader, device):
    model = UnifiedModel().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    total_miou = 0
    count = 0
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            seg_out, _, _ = model(x)
            seg_out = F.interpolate(seg_out, size=y.shape[1:], mode='bilinear', align_corners=False)
            total_miou += compute_miou(seg_out.cpu(), y.cpu())
            count += 1
    miou = total_miou / count
    print(f"[{os.path.basename(model_path)}] mIoU: {miou:.4f}")
    return miou


In [17]:
# Detection Evaluation Function
def evaluate_detection_map(model_path, dataloader, device):
    model = UnifiedModel(num_classes=10, seg_classes=21, det_classes=10).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    all_preds, all_targets = [], []
    with torch.no_grad():
        for imgs, targets in dataloader:
            imgs = imgs.to(device)
            _, det_out, _ = model(imgs)
            preds = decode_predictions(det_out.cpu())
            all_preds.extend(preds)
            all_targets.extend(targets)

    precision, recall, approx_map = evaluate_map(all_preds, all_targets)
    print(f"[{os.path.basename(model_path)}] mAP@0.3 (approx): {approx_map:.4f}")
    return approx_map


In [22]:
# Classification
acc_base = evaluate_classification("baseline_cls.pth", val_loader_cls, device)
acc_final = evaluate_classification("unified_model.pth", val_loader_cls, device)
acc_drop = acc_base - acc_final
print(f"Top-1 drop: {acc_drop:.4f} ({(acc_drop / acc_base) * 100:.2f}%)")

# Segmentation
miou_base = evaluate_segmentation("baseline_seg.pth", val_loader_seg, device)
miou_final = evaluate_segmentation("unified_model.pth", val_loader_seg, device)
miou_drop = miou_base - miou_final
print(f"mIoU drop: {miou_drop:.4f} ({(miou_drop / miou_base) * 100:.2f}%)")




[baseline_cls.pth] Top-1 Accuracy: 0.5833




[unified_model.pth] Top-1 Accuracy: 0.1833
Top-1 drop: 0.4000 (68.57%)




[baseline_seg.pth] mIoU: 0.1004
[unified_model.pth] mIoU: 0.0988
mIoU drop: 0.0016 (1.60%)


In [18]:
# mAP Calculation
baseline_map = evaluate_detection_map("baseline_det.pth", val_loader_det, device)
final_map = evaluate_detection_map("unified_model.pth", val_loader_det, device)

drop = baseline_map - final_map
print(f"mAP drop: {drop:.4f} ({(drop / baseline_map) * 100:.2f}%)")




[baseline_det.pth] mAP@0.3 (approx): 0.0014
[unified_model.pth] mAP@0.3 (approx): 0.0017
mAP drop: -0.0004 (-26.38%)
