In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/kitti-dataset/data_object_calib/training/calib/005063.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/000637.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/002604.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/004229.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/000360.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/006439.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/002979.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/005938.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/007127.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/000870.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/007419.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/006003.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/004343.txt
/kaggle/input/kitti-dataset/data_object_calib/training/calib/007

In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection import retinanet_resnet50_fpn, RetinaNet_ResNet50_FPN_Weights
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
from torchvision.models.detection import ssd300_vgg16, SSD300_VGG16_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
import random
from sklearn.metrics import average_precision_score
from torch.amp import autocast, GradScaler 

In [3]:
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [5]:
import os
import torch
from torch.utils.data import Dataset
from PIL import Image

class KITTIDataset(Dataset):
    def __init__(self, root_dir, split='train', transform=None):
        """
        Args:
            root_dir: Path to KITTI dataset parent directory
            split: 'train', 'val', or 'test'
            transform: Optional transforms to apply
        """
        self.root_dir = root_dir
        self.split = split
        self.transform = transform
        
        # Define class names for KITTI
        self.classes = ['Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 
                        'Cyclist', 'Tram', 'Misc', 'DontCare']
        
        # Create class to index mapping
        self.class_to_idx = {cls_name: i + 1 for i, cls_name in enumerate(self.classes)}
        self.class_to_idx['Background'] = 0
        
        # Load image paths and labels
        self.image_paths = []
        self.labels = []

        # Corrected image and label paths based on KITTI structure
        if split in ['train', 'training', 'val']:
            image_dir = os.path.join(root_dir, 'data_object_image_2', 'training', 'image_2')
            label_dir = os.path.join(root_dir, 'data_object_label_2', 'training', 'label_2')
        else:  # Testing
            image_dir = os.path.join(root_dir, 'data_object_image_2', 'testing', 'image_2')
            label_dir = None  # No labels for test set
        
        # Debug: Print resolved image directory
        print(f"[INFO] Loading images from: {image_dir}")
        
        # Get all image files
        self.image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.png')])
        
        print(f"[INFO] Found {len(self.image_files)} image(s) for split '{split}'")

        # Load all image paths and annotations
        for img_file in self.image_files:
            img_path = os.path.join(image_dir, img_file)
            self.image_paths.append(img_path)
            
            boxes = []
            labels = []
            
            if label_dir and split in ['train', 'training', 'val']:
                label_file = os.path.join(label_dir, img_file.replace('.png', '.txt'))
                
                if os.path.exists(label_file):
                    with open(label_file, 'r') as f:
                        for line in f:
                            parts = line.strip().split()
                            if len(parts) < 8:
                                continue
                                
                            cls_name = parts[0]
                            if cls_name not in self.class_to_idx:
                                continue
                            
                            try:
                                x1 = float(parts[4])
                                y1 = float(parts[5])
                                x2 = float(parts[6])
                                y2 = float(parts[7])
                                
                                boxes.append([x1, y1, x2, y2])
                                labels.append(self.class_to_idx[cls_name])
                            except (IndexError, ValueError) as e:
                                print(f"Warning: Could not parse line in {label_file}: {line}")
                                print(f"Error: {e}")
                                continue
            
            if len(boxes) == 0:
                boxes = torch.zeros((0, 4), dtype=torch.float32)
                labels = torch.zeros(0, dtype=torch.int64)
            else:
                boxes = torch.tensor(boxes, dtype=torch.float32)
                labels = torch.tensor(labels, dtype=torch.int64)
            
            target = {
                'boxes': boxes,
                'labels': labels
            }
            
            self.labels.append(target)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        target = self.labels[idx]
        
        img = Image.open(img_path).convert("RGB")
        
        if self.transform:
            img = self.transform(img)
        
        return img, target


In [6]:
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
def collate_fn(batch):
    return tuple(zip(*batch))



In [8]:
def create_datasets(kitti_root):
    # Create full training dataset
    full_train_dataset = KITTIDataset(root_dir=kitti_root, split='training', transform=data_transform)
    
    # Split into train and validation
    train_size = int(0.8 * len(full_train_dataset))
    val_size = len(full_train_dataset) - train_size
    
    # Use random_split to create train and validation datasets
    train_dataset, val_dataset = random_split(
        full_train_dataset, 
        [train_size, val_size],
        generator=torch.Generator().manual_seed(42)  # For reproducibility
    )
    
    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Val dataset size: {len(val_dataset)}")
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=16,  # Adjust based on GPU memory
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=2
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=16,
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=2
    )
    
    # Create a test dataset
    test_dataset = KITTIDataset(root_dir=kitti_root, split='testing', transform=data_transform)
    print(f"Test dataset size: {len(test_dataset)}")
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=16,
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=2
    )
    
    # Get class mapping from the full training dataset
    class_to_idx = full_train_dataset.class_to_idx
    
    return train_loader, val_loader, test_loader, class_to_idx

In [9]:
def get_faster_rcnn_model(num_classes):
    # Load pre-trained Faster R-CNN model
    weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
    model = fasterrcnn_resnet50_fpn(weights=weights)
    
    # Freeze backbone layers (early feature extraction layers)
    for name, param in model.backbone.named_parameters():
        # Freeze all backbone layers except the last block (layer4)
        if 'layer4' not in name:
            param.requires_grad = False
    
    # Replace the classifier with a new one for our number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

In [10]:
def get_retinanet_model(num_classes):
    # Load pre-trained RetinaNet model
    weights = RetinaNet_ResNet50_FPN_Weights.DEFAULT
    model = retinanet_resnet50_fpn(weights=weights)
    
    # Freeze backbone layers
    for name, param in model.backbone.named_parameters():
        # Freeze all backbone layers except the last block
        if 'layer4' not in name:
            param.requires_grad = False
    
    # Get number of input features and anchors
    in_features = model.head.classification_head.cls_logits.in_channels
    num_anchors = model.head.classification_head.num_anchors
    
    # Replace the classification head with a new one
    model.head.classification_head = RetinaNetClassificationHead(
        in_channels=in_features,
        num_anchors=num_anchors,
        num_classes=num_classes
    )
    return model



In [11]:
def get_ssd_model(num_classes):
    # Load pre-trained SSD model
    weights = SSD300_VGG16_Weights.DEFAULT
    model = ssd300_vgg16(weights=weights)
    
    # Freeze early layers of the VGG backbone
    # SSD uses a modified VGG16, so the structure is different
    for name, param in model.backbone.named_parameters():
        # Freeze the first 4 convolutional blocks (out of 5)
        if 'features.0.' in name or 'features.1.' in name or 'features.2.' in name or 'features.3.' in name:
            param.requires_grad = False
    
    # Replace the classifier
    model.head.classification_head.num_classes = num_classes
    
    return model

In [12]:
def train_model(model, train_loader, val_loader, num_epochs=10, lr=0.001):
    # Move model to device
    model.to(device)

    # Define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(params, lr=lr, momentum=0.9, weight_decay=0.0005)

    # Learning rate scheduler
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    # Training loop
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0

        print(f"Epoch {epoch+1}/{num_epochs}")
        progress_bar = tqdm(train_loader)

        for images, targets in progress_bar:
            # Move to device
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            loss_dict = model(images, targets)

            # Calculate total loss
            losses = sum(loss for loss in loss_dict.values())

            # Backward pass and optimization
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            # Update progress bar
            train_loss += losses.item()
            progress_bar.set_description(f"Loss: {losses.item():.4f}")

        # Update learning rate
        lr_scheduler.step()

        # Validation phase
        model.eval()
        val_loss = 0
        val_batches_counted = 0

        with torch.no_grad():
            for images, targets in tqdm(val_loader):
                images = list(image.to(device) for image in images)
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

                # For validation, we need to force the model to compute losses
                # by switching temporarily to training mode (but without updating weights)
                model.train()
                loss_dict = model(images, targets)
                model.eval()  # Switch back to eval mode

                # Calculate total loss
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()
                val_batches_counted += 1

        # Calculate average loss
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / val_batches_counted if val_batches_counted > 0 else float('nan')

        # Print epoch results
        print(f"Epoch {epoch+1} - Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    return model

In [13]:
class EnsembleModel:
    def __init__(self, models, weights=None):
        """
        Initialize ensemble with multiple models
        
        Args:
            models: List of trained models
            weights: Optional weights for each model
        """
        self.models = models
        self.weights = weights if weights is not None else [1/len(models)] * len(models)
    
    def predict(self, image):
        """
        Make prediction using ensemble of models
        
        Args:
            image: Input image tensor
        
        Returns:
            Dictionary with ensemble predictions
        """
        # Move image to device
        image = image.to(device)
        
        # Put in list format as expected by models
        images = [image]
        
        all_predictions = []
        
        # Get predictions from each model
        for model in self.models:
            model.eval()
            with torch.no_grad():
                predictions = model(images)
                all_predictions.append(predictions[0])  # Get first (only) image predictions
        
        # Ensemble results
        # For detection, we'll use non-maximum suppression across model predictions
        return self._ensemble_detections(all_predictions)
    
    def _ensemble_detections(self, predictions):
        """
        Combine predictions from multiple models using weighted box fusion
        """
        all_boxes = []
        all_scores = []
        all_labels = []
        
        # Extract boxes, scores and labels from all models
        for i, pred in enumerate(predictions):
            weight = self.weights[i]
            boxes = pred['boxes'].cpu().numpy()
            scores = pred['scores'].cpu().numpy() * weight  # Apply model weight
            labels = pred['labels'].cpu().numpy()
            
            if len(boxes) > 0:  # Only add if there are detections
                all_boxes.append(boxes)
                all_scores.append(scores)
                all_labels.append(labels)
        
        # Combine all detections
        if all_boxes:  # Check if any boxes were detected
            combined_boxes = np.vstack(all_boxes)
            combined_scores = np.concatenate(all_scores)
            combined_labels = np.concatenate(all_labels)
            
            # Non-maximum suppression
            if len(combined_boxes) > 0:
                selected_indices = self._nms(combined_boxes, combined_scores, iou_threshold=0.5)
                
                result = {
                    'boxes': torch.tensor(combined_boxes[selected_indices]),
                    'scores': torch.tensor(combined_scores[selected_indices]),
                    'labels': torch.tensor(combined_labels[selected_indices])
                }
            else:
                result = {
                    'boxes': torch.tensor([]),
                    'scores': torch.tensor([]),
                    'labels': torch.tensor([])
                }
        else:
            # Return empty tensors if no detections
            result = {
                'boxes': torch.tensor([]),
                'scores': torch.tensor([]),
                'labels': torch.tensor([])
            }
        
        return result
    
    def _nms(self, boxes, scores, iou_threshold):
        """
        Non-maximum suppression
        """
        if len(boxes) == 0:
            return []
        
        # Sort by score
        order = scores.argsort()[::-1]
        keep = []
        
        while order.size > 0:
            i = order[0]
            keep.append(i)
            
            # Compute IoU of the selected box with the rest
            ious = self._compute_iou(boxes[i, :], boxes[order[1:], :])
            
            # Keep boxes with IoU less than threshold
            mask = ious < iou_threshold
            order = order[1:][mask]
        
        return keep
    
    def _compute_iou(self, box, boxes):
        """
        Compute IoU between box and boxes
        """
        # Box: [x1, y1, x2, y2]
        x1 = np.maximum(box[0], boxes[:, 0])
        y1 = np.maximum(box[1], boxes[:, 1])
        x2 = np.minimum(box[2], boxes[:, 2])
        y2 = np.minimum(box[3], boxes[:, 3])
        
        # Intersection area
        inter_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
        
        # Union area
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        union_area = box_area + boxes_area - inter_area
        
        # IoU
        iou = inter_area / union_area
        
        return iou


In [14]:
def evaluate_model(model, data_loader, class_to_idx):
    """
    Evaluate model on validation set
    """
    model.eval()
    
    all_predictions = []
    all_targets = []
    
    with torch.no_grad():
        for images, targets in tqdm(data_loader):
            images = list(img.to(device) for img in images)
            
            # Get predictions
            outputs = model(images)
            
            # Store predictions and targets
            for i, output in enumerate(outputs):
                all_predictions.append(output)
                all_targets.append(targets[i])
    
    # Compute mAP
    map_score = calculate_map(all_predictions, all_targets, len(class_to_idx))
    
    print(f"mAP: {map_score:.4f}")
    return map_score

In [15]:
def calculate_map(predictions, targets, num_classes):
    """
    Calculate mean Average Precision
    (Simplified implementation for demonstration)
    """
    # For each class
    aps = []
    
    for cls in range(1, num_classes):  # Skip background class (0)
        y_true = []
        y_scores = []
        
        # For each image
        for i, (pred, target) in enumerate(zip(predictions, targets)):
            pred_boxes = pred['boxes'].cpu().numpy()
            pred_scores = pred['scores'].cpu().numpy()
            pred_labels = pred['labels'].cpu().numpy()
            
            target_boxes = target['boxes'].cpu().numpy()
            target_labels = target['labels'].cpu().numpy()
            
            # Find predictions for this class
            mask = pred_labels == cls
            cls_pred_boxes = pred_boxes[mask]
            cls_pred_scores = pred_scores[mask]
            
            # Find targets for this class
            mask = target_labels == cls
            cls_target_boxes = target_boxes[mask]
            
            # If no targets for this class in this image
            if len(cls_target_boxes) == 0:
                # All predictions are false positives
                y_true.extend([0] * len(cls_pred_boxes))
                y_scores.extend(cls_pred_scores.tolist())
                continue
            
            # If no predictions for this class in this image
            if len(cls_pred_boxes) == 0:
                continue
            
            # Match predictions to targets based on IoU
            matched = np.zeros(len(cls_target_boxes), dtype=bool)
            
            # Sort predictions by score
            order = np.argsort(-cls_pred_scores)
            
            for idx in order:
                pred_box = cls_pred_boxes[idx]
                pred_score = cls_pred_scores[idx]
                
                # Compute IoU with all targets
                ious = []
                for target_box in cls_target_boxes:
                    iou = compute_iou(pred_box, target_box)
                    ious.append(iou)
                
                # Find the best matching target
                best_idx = np.argmax(ious)
                best_iou = ious[best_idx]
                
                # If IoU > threshold and target not already matched
                if best_iou > 0.5 and not matched[best_idx]:
                    y_true.append(1)  # True positive
                    matched[best_idx] = True
                else:
                    y_true.append(0)  # False positive
                
                y_scores.append(pred_score)
        
        # Calculate AP for this class if we have predictions
        if len(y_true) > 0:
            ap = average_precision_score(y_true, y_scores, average='macro')
            aps.append(ap)
    
    # Calculate mAP
    if len(aps) > 0:
        return sum(aps) / len(aps)
    else:
        return 0.0


In [16]:
def compute_iou(box1, box2):
    """
    Compute IoU between two boxes
    """
    # Box: [x1, y1, x2, y2]
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    # Intersection area
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # Union area
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - inter_area
    
    # IoU
    iou = inter_area / union_area if union_area > 0 else 0
    
    return iou

In [17]:
def visualize_predictions(model, dataset, idx=0, threshold=0.5):
    """
    Visualize model predictions
    """
    # Get image and target
    img, target = dataset[idx]
    
    # Make prediction
    model.eval()
    with torch.no_grad():
        prediction = model([img.to(device)])[0]
    
    # Convert image for visualization
    img = img.permute(1, 2, 0).cpu().numpy()
    img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
    img = img.astype(np.uint8)
    
    # Create figure
    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    
    # Show ground truth
    ax[0].imshow(img)
    ax[0].set_title('Ground Truth')
    
    boxes = target['boxes'].cpu().numpy()
    labels = target['labels'].cpu().numpy()
    
    for box, label in zip(boxes, labels):
        x1, y1, x2, y2 = box
        rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, 
                             fill=False, edgecolor='green', linewidth=2)
        ax[0].add_patch(rect)
        ax[0].text(x1, y1, f'Class {label}', 
                   bbox=dict(facecolor='green', alpha=0.5))
    
    # Show predictions
    ax[1].imshow(img)
    ax[1].set_title('Predictions')
    
    boxes = prediction['boxes'].cpu().numpy()
    scores = prediction['scores'].cpu().numpy()
    labels = prediction['labels'].cpu().numpy()
    
    # Filter by threshold
    if len(scores) > 0:  # Check if there are any predictions
        mask = scores >= threshold
        boxes = boxes[mask]
        scores = scores[mask]
        labels = labels[mask]
    
    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box
        rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, 
                             fill=False, edgecolor='red', linewidth=2)
        ax[1].add_patch(rect)
        ax[1].text(x1, y1, f'Class {label}: {score:.2f}', 
                   bbox=dict(facecolor='red', alpha=0.5))
    
    plt.tight_layout()
    plt.savefig('prediction_visualization.png')
    plt.close()



In [18]:
def main():
    # Set KITTI dataset path - use the parent directory containing data_object_* folders
    kitti_root = "/kaggle/input/kitti-dataset"  # Update this path to your actual path
    
    # Create datasets and loaders
    train_loader, val_loader, test_loader, class_to_idx = create_datasets(kitti_root)
    num_classes = len(class_to_idx)
    
    print(f"Number of classes: {num_classes}")
    
    # Create models with transfer learning
    faster_rcnn_model = get_faster_rcnn_model(num_classes)
    retinanet_model = get_retinanet_model(num_classes)
    ssd_model = get_ssd_model(num_classes)
    
    # Train models
    print("Training Faster R-CNN model...")
    faster_rcnn_model = train_model(faster_rcnn_model, train_loader, val_loader, num_epochs=5)
    
    print("Training RetinaNet model...")
    retinanet_model = train_model(retinanet_model, train_loader, val_loader, num_epochs=5)
    
    print("Training SSD model...")
    ssd_model = train_model(ssd_model, train_loader, val_loader, num_epochs=5)
    
    # Save trained models
    torch.save(faster_rcnn_model.state_dict(), 'faster_rcnn_kitti.pth')
    torch.save(retinanet_model.state_dict(), 'retinanet_kitti.pth')
    torch.save(ssd_model.state_dict(), 'ssd_kitti.pth')
    
    # Evaluate individual models
    print("Evaluating Faster R-CNN model...")
    faster_rcnn_map = evaluate_model(faster_rcnn_model, val_loader, class_to_idx)
    
    print("Evaluating RetinaNet model...")
    retinanet_map = evaluate_model(retinanet_model, val_loader, class_to_idx)
    
    print("Evaluating SSD model...")
    ssd_map = evaluate_model(ssd_model, val_loader, class_to_idx)
    
    # Create ensemble model with weights based on performance
    # Normalize the mAP scores to use as weights
    total_map = faster_rcnn_map + retinanet_map + ssd_map
    weights = [faster_rcnn_map/total_map, retinanet_map/total_map, ssd_map/total_map]
    
    ensemble = EnsembleModel(
        models=[faster_rcnn_model, retinanet_model, ssd_model],
        weights=weights
    )
    
    # Evaluate ensemble model
    print("Evaluating Ensemble model...")
    # Create a wrapper model that follows PyTorch model interface
    class EnsembleWrapper(nn.Module):
        def __init__(self, ensemble):
            super().__init__()
            self.ensemble = ensemble
        
        def forward(self, images, targets=None):
            # During evaluation, we only need predictions
            if self.training or targets is not None:
                # During training, just pass through for now
                # In a real implementation, you'd handle this properly
                return faster_rcnn_model(images, targets)
            
            # For inference
            return [self.ensemble.predict(img) for img in images]
    
    ensemble_wrapper = EnsembleWrapper(ensemble)
    ensemble_map = evaluate_model(ensemble_wrapper, val_loader, class_to_idx)
    
    print("\nFinal Results:")
    print(f"Faster R-CNN mAP: {faster_rcnn_map:.4f}")
    print(f"RetinaNet mAP: {retinanet_map:.4f}")
    print(f"SSD mAP: {ssd_map:.4f}")
    print(f"Ensemble mAP: {ensemble_map:.4f}")
    
    # Visualize predictions
    val_dataset = val_loader.dataset
    visualize_predictions(faster_rcnn_model, val_dataset, idx=0)
    visualize_predictions(ensemble_wrapper, val_dataset, idx=0)


In [19]:
if __name__ == "__main__":
    main()

[INFO] Loading images from: /kaggle/input/kitti-dataset/data_object_image_2/training/image_2
[INFO] Found 7481 image(s) for split 'training'
Train dataset size: 5984
Val dataset size: 1497
[INFO] Loading images from: /kaggle/input/kitti-dataset/data_object_image_2/testing/image_2
[INFO] Found 7518 image(s) for split 'testing'
Test dataset size: 7518
Number of classes: 10


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 232MB/s]
Downloading: "https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth" to /root/.cache/torch/hub/checkpoints/retinanet_resnet50_fpn_coco-eeacb38b.pth
100%|██████████| 130M/130M [00:00<00:00, 222MB/s]
Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to /root/.cache/torch/hub/checkpoints/ssd300_vgg16_coco-b556d3b4.pth
100%|██████████| 136M/136M [00:00<00:00, 189MB/s]


Training Faster R-CNN model...
Epoch 1/5


Loss: 0.6188: 100%|██████████| 374/374 [06:32<00:00,  1.05s/it]
100%|██████████| 94/94 [01:05<00:00,  1.44it/s]


Epoch 1 - Train Loss: 0.8878, Val Loss: 0.8136
Epoch 2/5


Loss: 0.5665: 100%|██████████| 374/374 [06:31<00:00,  1.05s/it]
100%|██████████| 94/94 [01:04<00:00,  1.46it/s]


Epoch 2 - Train Loss: 0.7997, Val Loss: 0.7964
Epoch 3/5


Loss: 0.7295: 100%|██████████| 374/374 [06:28<00:00,  1.04s/it]
100%|██████████| 94/94 [01:04<00:00,  1.46it/s]


Epoch 3 - Train Loss: 0.7585, Val Loss: 0.7769
Epoch 4/5


Loss: 0.7286: 100%|██████████| 374/374 [06:27<00:00,  1.04s/it]
100%|██████████| 94/94 [01:04<00:00,  1.46it/s]


Epoch 4 - Train Loss: 0.7247, Val Loss: 0.7564
Epoch 5/5


Loss: 0.7317: 100%|██████████| 374/374 [06:27<00:00,  1.04s/it]
100%|██████████| 94/94 [01:04<00:00,  1.46it/s]


Epoch 5 - Train Loss: 0.7201, Val Loss: 0.7490
Training RetinaNet model...
Epoch 1/5


Loss: 1.2465: 100%|██████████| 374/374 [05:14<00:00,  1.19it/s]
100%|██████████| 94/94 [00:50<00:00,  1.86it/s]


Epoch 1 - Train Loss: 1.4407, Val Loss: 1.1994
Epoch 2/5


Loss: 0.8780: 100%|██████████| 374/374 [05:13<00:00,  1.19it/s]
100%|██████████| 94/94 [00:50<00:00,  1.86it/s]


Epoch 2 - Train Loss: 1.0099, Val Loss: 0.9214
Epoch 3/5


Loss: 0.9561: 100%|██████████| 374/374 [05:13<00:00,  1.19it/s]
100%|██████████| 94/94 [00:50<00:00,  1.85it/s]


Epoch 3 - Train Loss: 0.8941, Val Loss: 0.8859
Epoch 4/5


Loss: 0.8083: 100%|██████████| 374/374 [05:13<00:00,  1.19it/s]
100%|██████████| 94/94 [00:50<00:00,  1.86it/s]


Epoch 4 - Train Loss: 0.8284, Val Loss: 0.8222
Epoch 5/5


Loss: 0.8052: 100%|██████████| 374/374 [05:13<00:00,  1.19it/s]
100%|██████████| 94/94 [00:50<00:00,  1.86it/s]


Epoch 5 - Train Loss: 0.8187, Val Loss: 0.8173
Training SSD model...
Epoch 1/5


Loss: 4.4243: 100%|██████████| 374/374 [02:14<00:00,  2.79it/s]
100%|██████████| 94/94 [00:21<00:00,  4.27it/s]


Epoch 1 - Train Loss: 4.0027, Val Loss: 3.4357
Epoch 2/5


Loss: 2.5284: 100%|██████████| 374/374 [02:14<00:00,  2.77it/s]
100%|██████████| 94/94 [00:21<00:00,  4.31it/s]


Epoch 2 - Train Loss: 3.1466, Val Loss: 3.1508
Epoch 3/5


Loss: 2.7539: 100%|██████████| 374/374 [02:14<00:00,  2.78it/s]
100%|██████████| 94/94 [00:21<00:00,  4.31it/s]


Epoch 3 - Train Loss: 2.8072, Val Loss: 2.9271
Epoch 4/5


Loss: 2.4118: 100%|██████████| 374/374 [02:14<00:00,  2.78it/s]
100%|██████████| 94/94 [00:21<00:00,  4.31it/s]


Epoch 4 - Train Loss: 2.4674, Val Loss: 2.8277
Epoch 5/5


Loss: 2.5794: 100%|██████████| 374/374 [02:14<00:00,  2.78it/s]
100%|██████████| 94/94 [00:21<00:00,  4.35it/s]


Epoch 5 - Train Loss: 2.4040, Val Loss: 2.8137
Evaluating Faster R-CNN model...


100%|██████████| 94/94 [01:07<00:00,  1.40it/s]


mAP: 0.3362
Evaluating RetinaNet model...


100%|██████████| 94/94 [00:54<00:00,  1.72it/s]


mAP: 0.0849
Evaluating SSD model...


100%|██████████| 94/94 [00:42<00:00,  2.22it/s]


mAP: 0.5466
Evaluating Ensemble model...


100%|██████████| 94/94 [03:19<00:00,  2.12s/it]


mAP: 0.6369

Final Results:
Faster R-CNN mAP: 0.3362
RetinaNet mAP: 0.0849
SSD mAP: 0.5466
Ensemble mAP: 0.6369
