In [None]:
# Define ground truth set in training
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.models as models
import ijson


# Define transformations for the image patches
IMAGE_SIZE = (128, 128)  # Resize patches for CNN input

transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

def standardize_filename(path_or_name):
    # Remove any directories
    base = os.path.basename(path_or_name)
    # Split off the first extension (e.g. ".pt" or ".jpg")
    base, _ = os.path.splitext(base)
    return base  # e.g. "abcd123.jpg" or just "abcd123" if there were two extensions

def extract_first_n_labels(json_file_path, n):
    labels = []
    with open(json_file_path, 'rb') as f:
        parser = ijson.items(f, 'item')
        for i, item in enumerate(parser):
            if i >= n:
                break
            filtered_labels = [
                {
                    "category": label_item.get("category"),
                    "box2d": label_item.get("box2d")
                }
                for label_item in item.get("labels", [])
                if "box2d" in label_item
            ]
            labels.append({
                "name": item.get("name"),
                "timestamp": item.get("timestamp"),
                "labels": filtered_labels
            })
    return labels

class GroundTruthDataset(Dataset):
    def __init__(self, image_dir, labels):
        self.image_dir = image_dir
        self.image_files = sorted([
            os.path.join(image_dir, f)
            for f in os.listdir(image_dir)
            if f.lower().endswith(('.jpg', '.png', '.jpeg'))
        ])
        self.label_dict = {standardize_filename(item["name"]): item for item in labels}

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = self.image_files[idx]
        image = Image.open(image_path).convert('RGB')
        image_width, image_height = image.size
        base_key = standardize_filename(image_path)

        matched = self.label_dict.get(base_key, None)
        crops, labels = [], []

        if matched and "labels" in matched:
            for obj in matched["labels"]:
                if "box2d" in obj:
                    b2d = obj["box2d"]
                    y1, x1, y2, x2 = map(int, [b2d["y1"], b2d["x1"], b2d["y2"], b2d["x2"]])

                    # Ensure the box is within image boundaries
                    y1, x1 = max(0, y1), max(0, x1)
                    y2, x2 = min(image_height, y2), min(image_width, x2)

                    # Crop and resize object patch
                    patch = image.crop((x1, y1, x2, y2))
                    patch = transform(patch)

                    crops.append(patch)
                    labels.append(name_to_id.get(obj["category"], 0))  # Convert category name to ID

        if not crops:  # If no objects found, return whole image as background
            crops.append(transform(image))
            labels.append(0)  # Background class

        return torch.stack(crops), torch.tensor(labels, dtype=torch.long)

# Class mapping (same for training & inference)
name_to_id = {"traffic light": 0,
    "traffic sign": 1,
    "car": 2,
    "person": 3,
    "bus": 4,
    "truck": 5,
    "rider": 6,
    "bike": 7,
    "motor": 8,
    "train": 9
}

def custom_collate_fn(batch):
    """Custom collate function for DataLoader."""
    all_patches = []
    all_labels = []

    for patches, labels in batch:
        all_patches.append(patches)  # Each image has a different number of patches
        all_labels.append(labels)

    return all_patches, all_labels  # Keep them as lists instead of stacking


In [None]:
# Model definition with Dropout + freeze
class ObjectClassifier_new(nn.Module):
    def __init__(self, num_classes=10, dropout_p=0.5, freeze_backbone=True):
        super(ObjectClassifier_new, self).__init__()
        self.model = models.resnet18(pretrained=True)

        # Optionally freeze early layers
        if freeze_backbone:
            for param in self.model.parameters():
                param.requires_grad = False
            for param in self.model.layer4.parameters():
                param.requires_grad = True
            for param in self.model.fc.parameters():
                param.requires_grad = True

        # Replace the classification head with dropout + linear
        in_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Dropout(p=dropout_p),
            nn.Linear(in_features, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
# Load dataset (replace `ground_truth_labels` with your actual dataset labels)
json_file_path = 'bdd100k_labels_images_train.json'

# Extract labels from JSON (adjust number as desired)
ground_truth_labels = extract_first_n_labels(json_file_path, 40000)

dataset = GroundTruthDataset(image_dir= "trainA_original_2000", labels= ground_truth_labels)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)
# Split using random_split (70% train, 15% val, 15% test)
batch_size = 4
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

In [None]:
# Contrast stretch on training data set
import numpy as np
def contrast_stretch(image, low_percentile=10, high_percentile=90):
    """
    Apply contrast stretching to a single image tensor (C, H, W).
    Returns: Tensor of same shape and type.
    """
    image_np = image.cpu().numpy()

    min_val = np.percentile(image_np, low_percentile)
    max_val = np.percentile(image_np, high_percentile)

    if max_val - min_val < 1e-6:
        return image  # Avoid division by near-zero

    stretched = (image_np - min_val) / (max_val - min_val + 1e-8)
    stretched = np.clip(stretched, 0, 1)

    return torch.tensor(stretched, dtype=image.dtype, device=image.device)

def enhance_contrast_in_dataset(dataset, low_percentile=10, high_percentile=90):
    """
    Applies contrast stretching to each cropped patch in a dataset.
    Returns: List of (crops, labels) with contrast-enhanced tensors.
    """
    enhanced_data = []

    for i in range(len(dataset)):
        crops, labels = dataset[i]  # crops: [N, C, H, W]

        enhanced_crops = []
        for crop in crops:
            enhanced = contrast_stretch(crop, low_percentile, high_percentile)
            enhanced_crops.append(enhanced)

        enhanced_data.append((torch.stack(enhanced_crops), labels))

    return enhanced_data

enhanced_train_dataset = enhance_contrast_in_dataset(train_dataset)

In [None]:
train_loader = torch.utils.data.DataLoader(enhanced_train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn, num_workers=os.cpu_count(), pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn, num_workers=os.cpu_count(), pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn, num_workers=os.cpu_count(), pin_memory=True)

print (f"Train size: {len(train_loader.dataset)}")
print (f"Val size: {len(val_loader.dataset)}")
print (f"Test size: {len(test_loader.dataset)}")

In [None]:
# Initialize model, loss function, and optimizer
import torch
import os
import matplotlib.pyplot as plt

# For tracking loss curves
train_losses = []
val_losses = []

best_val_loss = 0.1

# Best validation loss (init high)
best_val_loss = float('inf')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ObjectClassifier_new(num_classes=len(name_to_id)).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Set CPU parallelism (for data loading)
torch.set_num_threads(os.cpu_count())

# Enable mixed precision (for speedup on GPUs)
scaler = torch.amp.GradScaler()
# Training loop
num_epochs = 20

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=2, verbose=True
)

best_val_loss = float('inf')
early_stop_patience = 10
patience_counter = 0

for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0

    for batch_patches, batch_labels in train_loader:  # Each batch is a list of patch tensors
        batch_loss = 0

        batch_patches = [patches.to(device) for patches in batch_patches]
        batch_labels = [labels.to(device) for labels in batch_labels]
        for patches, labels in zip(batch_patches, batch_labels):
            outputs = model(patches)

            optimizer.zero_grad()
            outputs = model(patches)
            loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_train_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # validation
    total_val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for val_patches, val_labels in val_loader:
            val_patches = [patches.to(device) for patches in val_patches]
            val_labels = [labels.to(device) for labels in val_labels]
            for patches, labels in zip(val_patches, val_labels):
              outputs = model(patches)

              optimizer.zero_grad()
              outputs = model(patches)
              loss = criterion(outputs, labels)

              total_val_loss += loss.item()

    avg_val_loss = total_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    # scheduler step + early stopping
    scheduler.step(avg_val_loss)

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), "best_model.pth")
        print("Saved best model!")
    else:
        patience_counter += 1
        if patience_counter >= early_stop_patience:
            print("Early stopping triggered")
            break


    ### === EPOCH SUMMARY === ###
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"Train Loss: {avg_train_loss:.4f}")
    print(f"Val Loss: {avg_val_loss:.4f}")

In [None]:
# plot test and val loss graphs
plt.figure(figsize=(8, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# precision calculation on training and validation data
from sklearn.metrics import precision_score
model_test = ObjectClassifier_new(num_classes=len(name_to_id)).to(device)
model_test.load_state_dict(torch.load("best_model.pth",map_location=device))

model_test.eval()  # Set model to evaluation mode


print("🔍 Sample Predictions on Train Data:\n")

all_preds = []
all_labels = []

with torch.no_grad():
    for i, (train_patches, train_labels) in enumerate(train_loader):
        train_patches_tensor = torch.cat(train_patches, dim=0).to(device)
        train_labels_tensor = torch.cat(train_labels, dim=0).to(device)

        outputs = model_test(train_patches_tensor)
        _, predicted = torch.max(outputs, 1)

        # Save for precision calculation
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(train_labels_tensor.cpu().numpy())

        if i < 2:
            for j in range(min(len(predicted), 10)):
                print(f"Sample {j+1}: Predicted = {predicted[j].item()}, Actual = {train_labels_tensor[j].item()}")

# Calculate precision (macro-averaged across all classes)
precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)

print(f"\n🎯 Train Precision (Macro): {precision * 100:.2f}%")

from sklearn.metrics import precision_score


print("🔍 Sample Predictions on Validation Data:\n")

all_preds = []
all_labels = []

with torch.no_grad():
    for i, (val_patches, val_labels) in enumerate(val_loader):
        val_patches_tensor = torch.cat(val_patches, dim=0).to(device)
        val_labels_tensor = torch.cat(val_labels, dim=0).to(device)

        outputs = model_test(val_patches_tensor)
        _, predicted = torch.max(outputs, 1)

        # Save for precision calculation
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(val_labels_tensor.cpu().numpy())

        if i < 2:
            for j in range(min(len(predicted), 10)):
                print(f"Sample {j+1}: Predicted = {predicted[j].item()}, Actual = {val_labels_tensor[j].item()}")

# Calculate precision (macro-averaged across all classes)
precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)

print(f"\n🎯 Validation Precision (Macro): {precision * 100:.2f}%")

In [None]:
# precision calculation on test data
from sklearn.metrics import precision_score


print("🔍 Sample Predictions on Test Data:\n")

all_preds = []
all_labels = []

with torch.no_grad():
    for i, (test_patches, test_labels) in enumerate(test_loader):
        test_patches_tensor = torch.cat(test_patches, dim=0).to(device)
        test_labels_tensor = torch.cat(test_labels, dim=0).to(device)

        outputs = model_test(test_patches_tensor)
        _, predicted = torch.max(outputs, 1)

        # Save for precision calculation
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(test_labels_tensor.cpu().numpy())

        if i < 2:
            for j in range(min(len(predicted), 10)):
                print(f"Sample {j+1}: Predicted = {predicted[j].item()}, Actual = {test_labels_tensor[j].item()}")

# Calculate precision (macro-averaged across all classes)
precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)

print(f"\n🎯 Test Precision (Macro): {precision * 100:.2f}%")