**Prepare data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Load data**

In [None]:
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
import os
import json
from torch.utils.data import Dataset

class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, colormap_file, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        with open(colormap_file) as f:
            self.colormap = json.load(f)
            self.color2label = {tuple(color): idx for idx, color in enumerate(self.colormap.values())}
        self.images = os.listdir(image_dir)
        print(f"Found {len(self.images)} images.")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        mask_path = os.path.join(self.mask_dir, img_name.replace('.jpg', '.png'))  # Adjust if necessary
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("RGB")
        mask = self.rgb_to_mask(mask)

        if self.transform is not None:
            image = self.transform(image)
            mask = torch.from_numpy(mask).long()

        # Debug statements
        print(f"Loading image: {img_name}")
        print(f"Image shape: {image.shape}")
        print(f"Mask unique values: {torch.unique(mask)}")

        return image, mask

    def rgb_to_mask(self, mask):
        """Convert a RGB mask to a class map mask."""
        mask = np.array(mask)
        class_map = np.zeros(mask.shape[:2], dtype=np.int32)

        for rgb, class_id in self.color2label.items():
            equality = np.equal(mask, rgb)
            class_map[np.all(equality, axis=-1)] = class_id

        # Debug statement
        print(f"Unique classes in mask: {np.unique(class_map)}")

        return class_map

transform = transforms.Compose([
    transforms.ToTensor(),
])

image_dir = '/content/drive/MyDrive/AIP Assignment 2/Question 2/dataset/images'
mask_dir = '/content/drive/MyDrive/AIP Assignment 2/Question 2/dataset/masks'
colormap_file = '/content/drive/MyDrive/AIP Assignment 2/Question 2/dataset/label2cmap.json'

# Create the dataset
dataset = SegmentationDataset(image_dir, mask_dir, colormap_file, transform=transform)

image, mask = dataset[0]  # inspect a sample

Found 298 images.
Unique classes in mask: [0 1 5 6 7]
Loading image: 2022-08-24 (185).png
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([0, 1, 5, 6, 7])


**Split training and testing data**

In [None]:

split_file_path = '/content/drive/MyDrive/AIP Assignment 2/Question 2/dataset/train_test_split.json'

# Read the JSON file
with open(split_file_path, 'r') as file:
    split_data = json.load(file)

train_images = split_data['train']
test_images = split_data['test']

print(f"Total training images: {len(train_images)}")
print(f"Total testing images: {len(test_images)}")


Total training images: 248
Total testing images: 50


In [None]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, colormap_file, subset, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        with open(colormap_file) as f:
            self.colormap = json.load(f)
            self.color2label = {tuple(color): idx for idx, color in enumerate(self.colormap.values())}
        # Use only the subset of images specified ('train' or 'test')
        if subset == 'train':
            self.images = [img for img in os.listdir(image_dir) if img in train_images]
        elif subset == 'test':
            self.images = [img for img in os.listdir(image_dir) if img in test_images]
        else:
            raise ValueError("Subset must be either 'train' or 'test'")
        print(f"Found {len(self.images)} {subset} images.")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        mask_path = os.path.join(self.mask_dir, img_name.replace('.jpg', '.png'))  # Adjust if necessary
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("RGB")
        mask = self.rgb_to_mask(mask)

        if self.transform is not None:
            image = self.transform(image)
            mask = torch.from_numpy(mask).long()

        # Debug statements
        print(f"Loading image: {img_name}")
        print(f"Image shape: {image.shape}")
        print(f"Mask unique values: {torch.unique(mask)}")

        return image, mask

    def rgb_to_mask(self, mask):
        """Convert a RGB mask to a class map mask."""
        mask = np.array(mask)
        class_map = np.zeros(mask.shape[:2], dtype=np.int32)

        for rgb, class_id in self.color2label.items():
            equality = np.equal(mask, rgb)
            class_map[np.all(equality, axis=-1)] = class_id

        # Debug statement
        print(f"Unique classes in mask: {np.unique(class_map)}")

        return class_map

In [None]:
# Create the training dataset
train_dataset = SegmentationDataset(image_dir, mask_dir, colormap_file, 'train', transform=transform)

# Create the testing dataset
test_dataset = SegmentationDataset(image_dir, mask_dir, colormap_file, 'test', transform=transform)

Found 248 train images.
Found 50 test images.


**Data loaders**

In [None]:
from torch.utils.data import DataLoader

# Set the batch size for the data loaders
batch_size = 16

# Create the training data loader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

# Create the testing data loader
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Verify the data loaders
print(f"Total train batches: {len(train_loader)}")
print(f"Total test batches: {len(test_loader)}")

Total train batches: 16
Total test batches: 4


**Model - without skip connections**

In [None]:
import torch
import torchvision.models as models
from torch.nn import functional as F

# Load the pre-trained ResNet-18 model
resnet18 = models.resnet18(pretrained=True)

# Freeze the parameters (weights) of the ResNet-18 backbone
for param in resnet18.parameters():
    param.requires_grad = False

# Remove the average pooling and fully connected layers
modules = list(resnet18.children())[:-2]
resnet18_backbone = torch.nn.Sequential(*modules)

# Define the number of classes for the segmentation task
num_classes = 9  # Change this to your actual number of classes

# Add intermediate upsampling layers
upsample_layers = [
    torch.nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2, padding=0, bias=False),
    torch.nn.BatchNorm2d(256),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
    torch.nn.BatchNorm2d(256),
    torch.nn.ReLU(inplace=True),
    torch.nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2, padding=0, bias=False),
    torch.nn.BatchNorm2d(128),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False),
    torch.nn.BatchNorm2d(128),
    torch.nn.ReLU(inplace=True),
    torch.nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2, padding=0, bias=False),
    torch.nn.BatchNorm2d(64),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
    torch.nn.BatchNorm2d(64),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(64, num_classes, kernel_size=1)  # Final layer to get to the number of classes
]

# Create the full model by combining the backbone and upsampling layers
resnet18_segmentation = torch.nn.Sequential(*resnet18_backbone, *upsample_layers)

# Initialize the weights of the upsampling layers
def init_weights(m):
    if isinstance(m, torch.nn.ConvTranspose2d) or isinstance(m, torch.nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)

# Apply the weights initialization to only the upsampling layers
for m in upsample_layers:
    m.apply(init_weights)

# Print the modified model to verify the changes
print(resnet18_segmentation)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Con

**Training**

In [None]:
import torch.optim as optim

# Set up the loss function
criterion = torch.nn.CrossEntropyLoss()

# Set up the optimizer, only optimizing the parameters that require gradients
optimizer = optim.Adam(filter(lambda p: p.requires_grad, resnet18_segmentation.parameters()), lr=0.001)

In [None]:
def train_model(model, criterion, optimizer, train_loader, num_epochs=25):
    model.train()  # Set model to training mode

    for epoch in range(num_epochs):
        running_loss = 0.0

        for images, masks in train_loader:
            # Move tensors to the appropriate device
            images, masks = images.to(device), masks.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)

            # Calculate loss
            loss = criterion(outputs, masks)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

    print('Training complete')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resnet18_segmentation = resnet18_segmentation.to(device)

In [None]:
from torch.nn.functional import interpolate

def train_model(model, criterion, optimizer, train_loader, num_epochs=1):
    model.train()  # Set model to training mode

    for epoch in range(num_epochs):
        running_loss = 0.0

        for images, masks in train_loader:
            images, masks = images.to(device), masks.to(device)


            masks_resized = interpolate(masks.unsqueeze(1).float(),
                                        size=(272, 480),
                                        mode='nearest').squeeze(1).long()

            optimizer.zero_grad()
            outputs = model(images)

            loss = criterion(outputs, masks_resized)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

    print('Training complete')

train_model(resnet18_segmentation, criterion, optimizer, train_loader, num_epochs=1)

Unique classes in mask: [0 1 5 6]
Unique classes in mask: [1 2 3 4 5 7 8]
Loading image: 2022-08-24 (331).pngLoading image: 2022-08-24 (53).png
Image shape: torch.Size([3, 1080, 1920])
Image shape: torch.Size([3, 1080, 1920])

Mask unique values: tensor([0, 1, 5, 6])
Mask unique values: tensor([1, 2, 3, 4, 5, 7, 8])
Unique classes in mask: [0 1 2 3 4 5 8]
Unique classes in mask: [0 1 6 7]
Loading image: 2022-08-24 (348).png
Image shape: torch.Size([3, 1080, 1920])
Loading image: 2022-08-24 (209).png
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([0, 1, 2, 3, 4, 5, 8])
Mask unique values: tensor([0, 1, 6, 7])
Unique classes in mask: [0 1 6 7]
Loading image: 2022-08-24 (216).png
Unique classes in mask: [1 2 3 4 5 7 8]
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([0, 1, 6, 7])
Loading image: 2022-08-24 (71).png
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([1, 2, 3, 4, 5, 7, 8])
Unique classes in mask: [0 1 6 7]
Loading ima

**Testing accuracy**

In [None]:
import numpy as np
import torch

def fast_hist(a, b, n):
    """Compute the histogram of a and b."""
    k = (a >= 0) & (a < n)
    return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n)

def compute_metrics(hist):
    """Compute metrics including pixel-wise accuracy and mean IoU."""
    # Pixel-wise accuracy
    accuracy = np.diag(hist).sum() / hist.sum()
    # Per-class IoU
    iou = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
    # Mean IoU
    mean_iou = np.nanmean(iou)
    return accuracy, mean_iou

def evaluate_model(model, data_loader, num_classes):
    model.eval()  # Set the model to evaluation mode
    device = next(model.parameters()).device
    hist = np.zeros((num_classes, num_classes))

    with torch.no_grad():  # No need to track gradients
        for images, masks in data_loader:
            images, masks = images.to(device), masks.to(device)

            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)  # Get the predicted classes


            predicted_resized = torch.nn.functional.interpolate(predicted.unsqueeze(1).float(),
                                                                size=masks.size()[1:],
                                                                mode='nearest').squeeze(1).long()

            # Compute the histogram of the predicted vs true labels
            hist += fast_hist(masks.cpu().numpy().flatten(),
                              predicted_resized.cpu().numpy().flatten(),
                              num_classes)

    accuracy, mean_iou = compute_metrics(hist)
    return accuracy, mean_iou

num_classes = 9

# Evaluate the model
accuracy, mean_iou = evaluate_model(resnet18_segmentation, test_loader, num_classes)
print(f'Test Pixel-wise Accuracy: {accuracy:.4f}')
print(f'Test Mean IoU: {mean_iou:.4f}')

Unique classes in mask: [0 1 6 7]
Unique classes in mask: [0 1 2 3 4 5 6 7 8]
Loading image: 2022-08-24 (160).png
Image shape: torch.Size([3, 1080, 1920])Loading image: 2022-08-24 (197).png

Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([0, 1, 6, 7])
Mask unique values: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8])
Unique classes in mask: [0 1 3 4 6 7 8]
Unique classes in mask: [1 2 3 4 5 7 8]
Loading image: 2022-08-24 (280).png
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([0, 1, 3, 4, 6, 7, 8])
Loading image: 2022-08-24 (310).png
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([1, 2, 3, 4, 5, 7, 8])
Unique classes in mask: [1 2 3 4 5 8]
Loading image: 2022-08-24 (111).png
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([1, 2, 3, 4, 5, 8])
Unique classes in mask: [1 2 3 4 5 8]
Loading image: 2022-08-24 (124).png
Image shape: torch.Size([3, 1080, 1920])
Mask unique values: tensor([1, 2, 3, 4, 5, 8])
Unique classe