In [None]:
from detectron2.modeling import ANCHOR_GENERATOR_REGISTRY

# Print all keys in the backbone registry:
print(list(ANCHOR_GENERATOR_REGISTRY._obj_map.keys()))

In [None]:
from detectron2.modeling import BACKBONE_REGISTRY

# Print all keys in the backbone registry:
print(list(BACKBONE_REGISTRY._obj_map.keys()))

# Get the dataset and inspect a data sample


In [None]:
from detectron2.data import DatasetCatalog

# Retrieve the dataset dictionaries
dataset_dicts = DatasetCatalog.get("RADIal_COCO-style")

# Print one sample to check if the "aperture" field is present in its annotations.
print("First sample:")
print(dataset_dicts[0])

# Optionally, loop over annotations in the sample to check each one:
for ann in dataset_dicts[0]["annotations"]:
    if "aperture" in ann:
        print("Aperture found in annotation:", ann["aperture"])
    else:
        print("Aperture not found in this annotation")

# Build train loader with custom mapper and inspect output

In [None]:
from detectron2.data import build_detection_train_loader

train_loader = build_detection_train_loader(cfg, mapper=radar_mapper)
data_sample = next(iter(train_loader))
print("Data sample keys:", list(data_sample[0].keys()))
print("Image shape:", data_sample[0]["image"].shape)
if "instances" in data_sample[0]:
    print("Instances fields:", data_sample[0]["instances"].get_fields().keys())

# Test backbone

In [None]:
from detectron2.modeling import ShapeSpec
dummy_input = torch.randn(1, 32, 512, 256).to(device)
# Create the correct input_shape specification
input_shape = ShapeSpec(channels=32, height=512, width=256)

backbone = CustomResNetBackbone(cfg, input_shape=input_shape)
backbone = backbone.to(device)

# Forward pass through the backbone
features = backbone(dummy_input)

# Print the output feature map shapes
for key, feature_map in features.items():
    print(f"{key}: {feature_map.shape}")

NameError: name 'torch' is not defined

: 

# Print architecture after config 

In [None]:
# Build the full model with your configuration
model = build_model(cfg)
model = model.to(device)

# Put model in eval mode
model.eval()


# Inspect the outputs

In [None]:
import torch

# Define the dummy input with batch size 1, 32 channels, height=512, width=256
dummy_input = torch.randn(1, 32, 512, 256)

# Move the input to the correct device (GPU if available)
dummy_input = dummy_input.to(device)

# Build the full model with your configuration
model = build_model(cfg)
model = model.to(device)
# Suppose dummy_input has shape [4, 32, 512, 256]
batched_inputs = [{"image": dummy_input[i]} for i in range(dummy_input.shape[0])]

# Put model in eval mode
model.eval()

with torch.no_grad():
    outputs = model(batched_inputs)
print("Type of outputs:", type(outputs))
if isinstance(outputs, (tuple, list)):
    print("Length of outputs:", len(outputs))
    for i, item in enumerate(outputs):
        print(f"Output[{i}]: type {type(item)}")

# Generate prediction files

In [None]:
import numpy as np

output_filename = "detections.txt"
with open(output_filename, "w") as f:
    # Loop over each image's detections.
    for output in outputs:
        instances = output["instances"]
        boxes = instances.pred_boxes.tensor.cpu().numpy()  # shape (N, 4)
        scores = instances.scores.cpu().numpy()            # shape (N,)
        classes = instances.pred_classes.cpu().numpy()       # shape (N,)
        # Check for your custom aperture field
        if hasattr(instances, "aperture"):
            # Ensure the aperture tensor is a 2D array of shape (N, ?)
            aperture = instances.aperture.cpu().numpy()
        else:
            aperture = None

        # Iterate over each detection and write a line with: class, score, boxes, aperture
        for i in range(boxes.shape[0]):
            cls_val = classes[i]
            score_val = scores[i]
            box_val = boxes[i]  # [x0, y0, x1, y1]
            # If aperture is available, extract its value.
            if aperture is not None:
                # If aperture has extra dimensions, take the first element.
                ap_val = aperture[i][0] if aperture[i].ndim > 0 else aperture[i]
            else:
                ap_val = "N/A"
            # Create a formatted string for this detection.
            line = f"{cls_val}, {score_val:.4f}, {box_val.tolist()}, {ap_val}\n"
            f.write(line)

# Sanity check for loss computation

In [None]:
import torch
from detectron2.structures import Boxes, Instances
from detectron2.modeling import build_model
from detectron2.config import get_cfg
from detectron2.utils.events import EventStorage

# Assume cfg is already configured and updated for your custom architecture.
# Also, device is defined (e.g., device = torch.device("cuda:1"))

# Create a dummy input image with 32 channels, size 512x256.
dummy_input = torch.randn(1, 32, 512, 256, device=device)

# Create dummy ground truth:
# Let's assume you have one ground-truth object in the image.
# For example, a box [x0, y0, x1, y1] in pixel coordinates.
dummy_gt_boxes = Boxes(torch.tensor([[50.0, 100.0, 200.0, 120.0]], device=device))
# Ground truth class for the object (if you have one foreground class, set it to 0).
dummy_gt_classes = torch.tensor([0], device=device)
# Ground truth aperture value for the object (a dummy value, e.g., 0.5).
dummy_gt_aperture = torch.tensor([[0.5]], device=device)

# Create an Instances object with the image size (height, width)
dummy_instances = Instances(image_size=(512, 256))
dummy_instances.gt_boxes = dummy_gt_boxes
dummy_instances.gt_classes = dummy_gt_classes
dummy_instances.gt_aperture = dummy_gt_aperture

# Package the image and instances into a dictionary.
batched_inputs = [{"image": dummy_input[0], "instances": dummy_instances}]

# Build the model
model = build_model(cfg)
model = model.to(device)
model.train()  # Set to training mode

with EventStorage():
    losses = model(batched_inputs)
    total_loss = sum(losses.values())
    print("Total loss:", total_loss.item())
    total_loss.backward()

In [None]:
import torch
from detectron2.structures import Boxes, Instances
from detectron2.modeling import build_model
from detectron2.config import get_cfg
from detectron2.utils.events import EventStorage
import numpy as np
import cv2
from detectron2.engine import DefaultPredictor
from detectron2 import model_zoo
from custom_anchorGen2 import CustomAnchorGenerator
from custom_backbone import CustomResNetBackbone
from CustomFastRCNNOutputLayers import CustomFastRCNNOutputLayers
from CustomStandardROIHeads import CustomStandardROIHeads
from detectron2.checkpoint import DetectionCheckpointer

device = torch.device("cuda:1")
# Load Detectron2 default model config
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")) 
# Set device in config
cfg.MODEL.WEIGHTS = ""
cfg.MODEL.DEVICE = "cuda:1"  # Add this line
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.DATASETS.TRAIN = ("RADIal_COCO-style_train",)
cfg.DATASETS.TEST = ("RADIal_COCO-style_val",)

# Manually set the required fields for a ResNet-FPN backbone:
cfg.MODEL.RESNETS.DEPTH = 50  # or 101, depending on your design
cfg.MODEL.RESNETS.STEM_OUT_CHANNELS = 192
cfg.MODEL.RESNETS.OUT_FEATURES = ["res2", "res3", "res4"] 
cfg.MODEL.RESNETS.NORM = ""
cfg.MODEL.FPN.IN_FEATURES = ["res2", "res3", "res4"]

# Update anchor generator config

cfg.MODEL.ANCHOR_GENERATOR.STRIDES = [1, 2, 4]
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[71], [71], [71]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.078125]]
cfg.MODEL.ANCHOR_GENERATOR.OFFSET = 0.5
cfg.MODEL.ANCHOR_GENERATOR.STD_BEHAVIOR = True

# Use your custom ROI heads that instantiate the custom box predictor
cfg.MODEL.ROI_HEADS.NAME = "CustomStandardROIHeads"

# Update box regression weights to include the extra aperture output:
cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (1.0, 5.0, 1.0, 5.0)
cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
cfg.MODEL.ROI_BOX_HEAD.APERTURE_LOSS_WEIGHT = 300.0
cfg.MODEL.ROI_BOX_HEAD.LOSS_CLS_WEIGHT=1.4
cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = True

cfg.MODEL.ROI_HEADS.IN_FEATURES = ["p2", "p3", "p4"]
cfg.MODEL.RPN.IN_FEATURES = ["p2", "p3", "p4"]
cfg.MODEL.FPN.OUT_CHANNELS = 256
cfg.MODEL.FPN.NORM = ""
cfg.MODEL.FPN.FUSE_TYPE = "sum"
cfg.MODEL.BACKBONE.FREEZE_AT = 0
# Set the backbone
cfg.MODEL.BACKBONE.NAME = 'CustomResNetBackbone'
cfg.MODEL.PIXEL_MEAN = [-2.6244e-03, -2.1335e-01,  1.8789e-02, -1.4427e+00, -3.7618e-01,
                1.3594e+00, -2.2987e-01,  1.2244e-01,  1.7359e+00, -6.5345e-01,
                3.7976e-01,  5.5521e+00,  7.7462e-01, -1.5589e+00, -7.2473e-01,
                1.5182e+00, -3.7189e-01, -8.8332e-02, -1.6194e-01,  1.0984e+00,
                9.9929e-01, -1.0495e+00,  1.9972e+00,  9.2869e-01,  1.8991e+00,
               -2.3772e-01,  2.0000e+00,  7.7737e-01,  1.3239e+00,  1.1817e+00,
               -6.9696e-01,  4.4288e-01] 
cfg.MODEL.PIXEL_STD = [20775.3809, 23085.5000, 23017.6387, 14548.6357, 32133.5547, 28838.8047,
                27195.8945, 33103.7148, 32181.5273, 35022.1797, 31259.1895, 36684.6133,
                33552.9258, 25958.7539, 29532.6230, 32646.8984, 20728.3320, 23160.8828,
                23069.0449, 14915.9053, 32149.6172, 28958.5840, 27210.8652, 33005.6602,
                31905.9336, 35124.9180, 31258.4316, 31086.0273, 33628.5352, 25950.2363,
                29445.2598, 32885.7422]
cfg.MODEL.ANCHOR_GENERATOR.NAME = 'CustomAnchorGenerator'

import torch
from detectron2.data import build_detection_train_loader
from detectron2.utils.events import EventStorage
from detectron2.modeling import build_model
from detectron2.config import get_cfg


# Build the train dataloader using your custom mapper.
train_loader = build_detection_train_loader(cfg, mapper=radar_mapper)

# Retrieve one batch from the train loader.
data_batch = next(iter(train_loader))
print("A batch from the train loader:")
print("Keys in batch sample:", data_batch[0].keys())
print("Image shape:", data_batch[0]["image"].shape)
if "instances" in data_batch[0]:
    print("Instances fields:", data_batch[0]["instances"].get_fields().keys())

# Build the model.
model = build_model(cfg)
model = model.to("cuda:1")  # or whichever device you are using
model.train()  # Set model to training mode

# Wrap the forward pass within an EventStorage context.
with EventStorage():
    losses = model(data_batch)
    total_loss = sum(losses.values())
    print("Losses:", {k: v.item() for k, v in losses.items()})
    print("Total loss:", total_loss.item())
    total_loss.backward()
    print("Backward pass successful!")
# --- Now, inspect gradients for the aperture branch ---
# Access the box predictor (assumed to be under roi_heads.box_predictor)
predictor = model.roi_heads.box_predictor

# Get the gradient of the final linear layer that produces regression outputs.
# Its shape is [num_bbox_reg_outputs, in_features]
weight_grad = predictor.bbox_pred.weight.grad  # shape: [num_bbox_reg_outputs, in_features]

# In your custom predictor, total_reg_dim = 5 (first 4 for box deltas, 5th for aperture)
total_reg_dim = predictor.total_reg_dim  # should be 5
# Calculate the number of regression heads (e.g., 1 if class-agnostic)
num_bbox_reg_classes = weight_grad.shape[0] // total_reg_dim

# Reshape the weight gradient so that the aperture gradients are in a separate slice.
# New shape: [num_bbox_reg_classes, total_reg_dim, in_features]
reshaped_weight_grad = weight_grad.view(num_bbox_reg_classes, total_reg_dim, -1)

# Extract the gradients for the aperture output (index 4 in each group)
aperture_weight_grad = reshaped_weight_grad[:, 4, :]  # shape: [num_bbox_reg_classes, in_features]

# Compute the norm (L2) of the gradients for each regression head's aperture part.
aperture_grad_norms = aperture_weight_grad.norm(dim=1)
print("Aperture weight gradient norms per regression head:", aperture_grad_norms)

# Similarly, check the bias gradients for the aperture.
bias_grad = predictor.bbox_pred.bias.grad.view(num_bbox_reg_classes, total_reg_dim)
aperture_bias_grad = bias_grad[:, 4]
print("Aperture bias gradient norms per regression head:", aperture_bias_grad.abs())

The following dataset names are not registered in the DatasetCatalog: {'RADIal_COCO-style_train'}. Available datasets are KeysView(DatasetCatalog(registered datasets: coco_2014_train, coco_2014_val, coco_2014_minival, coco_2014_valminusminival, coco_2017_train, coco_2017_val, coco_2017_test, coco_2017_test-dev, coco_2017_val_100, keypoints_coco_2014_train, keypoints_coco_2014_val, keypoints_coco_2014_minival, keypoints_coco_2014_valminusminival, keypoints_coco_2017_train, keypoints_coco_2017_val, keypoints_coco_2017_val_100, coco_2017_train_panoptic_separated, coco_2017_train_panoptic_stuffonly, coco_2017_train_panoptic, coco_2017_val_panoptic_separated, coco_2017_val_panoptic_stuffonly, coco_2017_val_panoptic, coco_2017_val_100_panoptic_separated, coco_2017_val_100_panoptic_stuffonly, coco_2017_val_100_panoptic, lvis_v1_train, lvis_v1_val, lvis_v1_test_dev, lvis_v1_test_challenge, lvis_v0.5_train, lvis_v0.5_val, lvis_v0.5_val_rand_100, lvis_v0.5_test, lvis_v0.5_train_cocofied, lvis_v0

KeyError: "Dataset 'RADIal_COCO-style_train' is not registered! Available datasets are: coco_2014_train, coco_2014_val, coco_2014_minival, coco_2014_valminusminival, coco_2017_train, coco_2017_val, coco_2017_test, coco_2017_test-dev, coco_2017_val_100, keypoints_coco_2014_train, keypoints_coco_2014_val, keypoints_coco_2014_minival, keypoints_coco_2014_valminusminival, keypoints_coco_2017_train, keypoints_coco_2017_val, keypoints_coco_2017_val_100, coco_2017_train_panoptic_separated, coco_2017_train_panoptic_stuffonly, coco_2017_train_panoptic, coco_2017_val_panoptic_separated, coco_2017_val_panoptic_stuffonly, coco_2017_val_panoptic, coco_2017_val_100_panoptic_separated, coco_2017_val_100_panoptic_stuffonly, coco_2017_val_100_panoptic, lvis_v1_train, lvis_v1_val, lvis_v1_test_dev, lvis_v1_test_challenge, lvis_v0.5_train, lvis_v0.5_val, lvis_v0.5_val_rand_100, lvis_v0.5_test, lvis_v0.5_train_cocofied, lvis_v0.5_val_cocofied, cityscapes_fine_instance_seg_train, cityscapes_fine_sem_seg_train, cityscapes_fine_instance_seg_val, cityscapes_fine_sem_seg_val, cityscapes_fine_instance_seg_test, cityscapes_fine_sem_seg_test, cityscapes_fine_panoptic_train, cityscapes_fine_panoptic_val, voc_2007_trainval, voc_2007_train, voc_2007_val, voc_2007_test, voc_2012_trainval, voc_2012_train, voc_2012_val, ade20k_sem_seg_train, ade20k_sem_seg_val"

# Anchor generator inspector 

In [None]:
anchor_generator = model.proposal_generator.anchor_generator

# Debug configuration values
print("\nConfiguration Details:")
print(f"Sizes configuration: {cfg.MODEL.ANCHOR_GENERATOR.SIZES}")
print(f"Aspect ratios configuration: {cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS}")
print(f"Strides configuration: {cfg.MODEL.ANCHOR_GENERATOR.STRIDES}")

# Debug anchor generator state
print("\nAnchor Generator State:")
print(f"Strides: {anchor_generator.strides}")
print(f"Number of features: {anchor_generator.num_features}")
print(f"Number of cell anchors: {anchor_generator.num_cell_anchors}")

# Detailed cell anchor inspection
print("\nDetailed Cell Anchors:")
for i, cell_anchor in enumerate(anchor_generator.cell_anchors):
    print(f"\nLevel {i}:")
    print(f"Shape: {cell_anchor.shape}")
    if len(cell_anchor) > 0:
        print(f"All anchor boxes at this level:")
        print(cell_anchor)
        
        # Calculate actual dimensions
        if not torch.all(cell_anchor == 0):
            widths = cell_anchor[:, 2] - cell_anchor[:, 0]
            heights = cell_anchor[:, 3] - cell_anchor[:, 1]
            print(f"Anchor widths: {widths}")
            print(f"Anchor heights: {heights}")
            print(f"Aspect ratios: {heights/widths}")

In [None]:
# Get feature maps from your backbone (we need this to see actual coverage)
features = backbone(dummy_input)

# Get anchors for these feature maps
anchors = anchor_generator(list(features.values()))

print("Effective Coverage Analysis:")
for level_idx, (level_name, feature_map) in enumerate(features.items()):
    # Get feature map dimensions
    fmap_h, fmap_w = feature_map.shape[-2:]
    
    # Get stride for this level
    stride = anchor_generator.strides[level_idx]
    
    # Get anchors for this level
    level_anchors = anchors[level_idx].tensor
    
    # Get first anchor dimensions in absolute coordinates
    first_anchor = level_anchors[0]
    abs_width = first_anchor[2] - first_anchor[0]
    abs_height = first_anchor[3] - first_anchor[1]
    
    # Calculate effective coverage (in feature map units)
    eff_width = abs_width / stride
    eff_height = abs_height / stride
    
    print(f"\nLevel {level_name}:")
    print(f"Feature map size: {fmap_h}×{fmap_w}")
    print(f"Stride: {stride}")
    print(f"Absolute anchor size: {abs_width:.2f}×{abs_height:.2f}")
    print(f"Effective coverage (feature map units): {eff_width:.2f}×{eff_height:.2f}")

In [None]:
import matplotlib.pyplot as plt

def plot_coverage(feature_map_size, effective_coverage, level_name):
    fig, ax = plt.subplots(figsize=(6, 9))
    # Plot feature map bounds
    rect_map = plt.Rectangle((0, 0), feature_map_size[1], feature_map_size[0], 
                           fill=False, color='blue', label='Feature Map')
    # Plot anchor coverage
    rect_anchor = plt.Rectangle(
        ((feature_map_size[1] - effective_coverage[0])/2, 
         (feature_map_size[0] - effective_coverage[1])/2),
        effective_coverage[0], effective_coverage[1],
        fill=False, color='red', label='Anchor Coverage')
    
    ax.add_patch(rect_map)
    ax.add_patch(rect_anchor)
    ax.set_xlim(-10, feature_map_size[1] + 10)
    ax.set_ylim(-10, feature_map_size[0] + 10)
    ax.set_title(f'Level {level_name} Coverage')
    ax.legend()
    plt.show()

# Plot for each level
for level_name, feature_map in features.items():
    fmap_h, fmap_w = feature_map.shape[-2:]
    level_idx = int(level_name[1]) - 2  # p2->0, p3->1, p4->2
    
    effective_width = 254.02 / anchor_generator.strides[level_idx]
    effective_height = 19.85 / anchor_generator.strides[level_idx]
    
    plot_coverage((fmap_h, fmap_w), 
                 (effective_width, effective_height),
                 level_name)

In [None]:
# Get template anchors (cell anchors)
print("Template Anchors (Cell Anchors):")
for i, cell_anchor in enumerate(anchor_generator.cell_anchors):
    print(f"\nLevel {i}:")
    print(f"Template shape: {cell_anchor.shape}")
    print(f"Template anchor: {cell_anchor[0]}")

# Get actual generated anchors
features_list = [features[k] for k in ["p2", "p3", "p4"]]
anchors = anchor_generator(features_list)

print("\nGenerated Anchors Position Analysis:")
total_anchors = 0

for i, (level_name, feature_map) in enumerate(features.items()):
    fmap_h, fmap_w = feature_map.shape[-2:]
    expected_center = (fmap_w * anchor_generator.strides[i]) / 2
    
    # Get all anchors for this level
    level_anchors = anchors[i].tensor
    num_anchors = len(level_anchors)
    total_anchors += num_anchors
    
    # Calculate centers of anchors
    anchor_centers_x = (level_anchors[:, 0] + level_anchors[:, 2]) / 2
    
    # Calculate expected number of anchors
    expected_anchors = fmap_h * fmap_w if anchor_generator.std_behavior else fmap_h
    
    print(f"\nLevel {level_name}:")
    print(f"Feature map size: {fmap_h}×{fmap_w}")
    print(f"Number of anchors: {num_anchors}")
    print(f"Expected number of anchors: {expected_anchors}")
    print(f"Behavior matches config?: {num_anchors == expected_anchors}")
    print(f"Expected center position: {expected_center}")
    print(f"Actual anchor centers (first 5): {anchor_centers_x[:5]}")
    print(f"All centers same?: {torch.allclose(anchor_centers_x, anchor_centers_x[0])}")
    print(f"Center matches expected?: {torch.allclose(anchor_centers_x[0], torch.tensor(expected_center, device='cuda:1'))}")

print(f"\nTotal number of anchors across all levels: {total_anchors}")
print(f"Using {'standard' if anchor_generator.std_behavior else 'center-only'} behavior")

# Predictor head output inspector 

In [None]:
# Dummy forward pass example

import torch
from detectron2.layers import ShapeSpec
from detectron2.modeling.box_regression import Box2BoxTransform
from CustomFastRCNNOutputLayers import CustomFastRCNNOutputLayers

# Create a dummy ShapeSpec matching your configuration
input_shape = ShapeSpec(channels=256, height=7, width=7)

# Set up a dummy box2box transform with 4 weights (ignore aperture here)
dummy_box2box_transform = Box2BoxTransform(weights=(1.0, 10.0, 1.0, 5.0))

# Instantiate your custom predictor using dummy parameters
num_classes = 1   # as in your config
predictor = CustomFastRCNNOutputLayers(
    input_shape,
    box2box_transform=dummy_box2box_transform,
    num_classes=num_classes,
    cls_agnostic_bbox_reg=True,   # according to your config
    smooth_l1_beta=0.0,
    box_reg_loss_type="smooth_l1",
    test_score_thresh=0.0,
    test_nms_thresh=0.5,
    test_topk_per_image=100,
    aperture_loss_weight=2.0,  # as set in your config
)

# Set to evaluation mode
predictor.eval()

# Create a dummy input tensor that matches the expected shape (e.g., a batch of 2 regions)
dummy_input = torch.randn(16, 256, 7, 7)

# Forward pass through the predictor.
with torch.no_grad():
    scores, proposal_deltas = predictor(dummy_input)

print("Scores shape:", scores.shape)
print("Proposal deltas shape:", proposal_deltas.shape)