In [252]:
import pandas as pd
import ast
import torch
from collections import Counter
from tqdm import tqdm

# Path to the CSV file containing predictions
predictions_path = 'result.csv'

# Load the CSV file into a DataFrame
predictions_df = pd.read_csv(predictions_path)

# Convert the string representations in the 'prediction_list' column to actual lists
predictions_df["prediction_list"] = predictions_df["prediction_list"].apply(ast.literal_eval)

predictions_df.head()

Unnamed: 0,id,image_id,prediction_list
0,1,002824.jpg,"[[person, 0.5703840851783752, 8, 13, 488, 427]]"
1,2,000473.jpg,"[[aeroplane, 0.8369326591491699, 425, 123, 451..."
2,3,000358.jpg,"[[person, 0.7091589570045471, 12, 21, 115, 302..."
3,4,006052.jpg,"[[cat, 0.36228689551353455, 132, 33, 485, 363]]"
4,5,004758.jpg,"[[dog, 0.6347848176956177, 43, 42, 133, 307]]"


In [253]:
VOC_CLASSES = (  # always index 0
    "aeroplane",
    "bicycle",
    "bird",
    "boat",
    "bottle",
    "bus",
    "car",
    "cat",
    "chair",
    "cow",
    "diningtable",
    "dog",
    "horse",
    "motorbike",
    "person",
    "pottedplant",
    "sheep",
    "sofa",
    "train",
    "tvmonitor",
)

In [254]:
# Load the ground truth data
ground_truth_path = 'data/voc2007test_gt.txt'

# let's try reading the file line by line and parsing it manually.
ground_truth_data = []

with open(ground_truth_path, 'r') as file:
    for line in file:
        # Split the line by spaces and parse the contents
        split_line = line.strip().split()
        image_id = split_line[0]
        boxes = []
        for i in range(1, len(split_line), 5):
            box = {
                "x1": int(split_line[i]),
                "y1": int(split_line[i+1]),
                "x2": int(split_line[i+2]),
                "y2": int(split_line[i+3]),
                "class_name": VOC_CLASSES[int(split_line[i+4])]
            }
            boxes.append(box)
        ground_truth_data.append({"image_id": image_id, "boxes": boxes})

# Convert the list of dictionaries to a DataFrame
ground_truth_df = pd.DataFrame(ground_truth_data)
ground_truth_df.head()

Unnamed: 0,image_id,boxes
0,002824.jpg,"[{'x1': 1, 'y1': 13, 'x2': 500, 'y2': 431, 'cl..."
1,000473.jpg,"[{'x1': 415, 'y1': 120, 'x2': 460, 'y2': 153, ..."
2,000358.jpg,"[{'x1': 89, 'y1': 100, 'x2': 387, 'y2': 284, '..."
3,006052.jpg,"[{'x1': 129, 'y1': 51, 'x2': 497, 'y2': 374, '..."
4,004758.jpg,"[{'x1': 44, 'y1': 49, 'x2': 129, 'y2': 308, 'c..."


In [255]:
ground_truth_df["boxes"][0]

[{'x1': 1, 'y1': 13, 'x2': 500, 'y2': 431, 'class_name': 'person'}]

In [256]:
# Create a mapping from class names to class indices
class_name_to_index = {class_name: index for index, class_name in enumerate(VOC_CLASSES)}

def convert_predictions_to_list(df, class_name_to_index):
    """ Convert predictions DataFrame to the required list format for mAP calculation. """
    pred_boxes_list = []
    for _, row in df.iterrows():
        image_id = row["image_id"]
        for pred in row["prediction_list"]:
            class_name, prob_score, x1, y1, x2, y2 = pred
            class_index = class_name_to_index[class_name]
            pred_boxes_list.append([image_id, class_index, prob_score, x1, y1, x2, y2])
    return pred_boxes_list

def convert_ground_truth_to_list(df, class_name_to_index):
    """ Convert ground truth DataFrame to the required list format for mAP calculation. """
    true_boxes_list = []
    for _, row in df.iterrows():
        image_id = row["image_id"]
        for box in row["boxes"]:
            class_index = class_name_to_index[box["class_name"]]
            x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"]
            true_boxes_list.append([image_id, class_index, 1.0 ,x1, y1, x2, y2])
    return true_boxes_list

# Convert DataFrames to the required list format
pred_boxes = convert_predictions_to_list(predictions_df, class_name_to_index)
true_boxes = convert_ground_truth_to_list(ground_truth_df, class_name_to_index)

# Display first few elements of converted lists for verification
pred_boxes[:5], true_boxes[:5]

([['002824.jpg', 14, 0.5703840851783752, 8, 13, 488, 427],
  ['000473.jpg', 0, 0.8369326591491699, 425, 123, 451, 148],
  ['000358.jpg', 14, 0.7091589570045471, 12, 21, 115, 302],
  ['000358.jpg', 6, 0.6020732522010803, 62, 87, 392, 297],
  ['006052.jpg', 7, 0.36228689551353455, 132, 33, 485, 363]],
 [['002824.jpg', 14, 1.0, 1, 13, 500, 431],
  ['000473.jpg', 0, 1.0, 415, 120, 460, 153],
  ['000358.jpg', 6, 1.0, 89, 100, 387, 284],
  ['000358.jpg', 14, 1.0, 23, 33, 110, 287],
  ['006052.jpg', 7, 1.0, 129, 51, 497, 374]])

In [257]:
# Create a mapping from image_id to a unique numerical index
unique_image_ids = ground_truth_df["image_id"].unique()
image_id_to_index = {image_id: idx for idx, image_id in enumerate(unique_image_ids)}

def update_list_with_index(input_list, image_id_to_index):
    """ Update the given list by replacing image_id with a numerical index based on the provided mapping. """
    updated_list = []
    for item in input_list:
        image_id = item[0]
        updated_item = [image_id_to_index[image_id]] + item[1:]
        updated_list.append(updated_item)
    return updated_list

# Update pred_boxes and true_boxes with numerical indices
pred_boxes_updated = update_list_with_index(pred_boxes, image_id_to_index)
true_boxes_updated = update_list_with_index(true_boxes, image_id_to_index)

# Display first few elements of the updated lists for verification
pred_boxes_updated[:5], true_boxes_updated[:5]

([[0, 14, 0.5703840851783752, 8, 13, 488, 427],
  [1, 0, 0.8369326591491699, 425, 123, 451, 148],
  [2, 14, 0.7091589570045471, 12, 21, 115, 302],
  [2, 6, 0.6020732522010803, 62, 87, 392, 297],
  [3, 7, 0.36228689551353455, 132, 33, 485, 363]],
 [[0, 14, 1.0, 1, 13, 500, 431],
  [1, 0, 1.0, 415, 120, 460, 153],
  [2, 6, 1.0, 89, 100, 387, 284],
  [2, 14, 1.0, 23, 33, 110, 287],
  [3, 7, 1.0, 129, 51, 497, 374]])

In [258]:
# Copyright (c) 2023 Aladdin Persson
# The following code is derived from the YOLOv3 implementation by Aladdin Persson available at
# https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/YOLOv3/
# MIT license

def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
    """
    This function calculates the Intersection over Union (IoU) given predicted boxes and target boxes.

    Parameters:
        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
        box_format (str): Format of the boxes, either "midpoint" or "corners"

    Returns:
        tensor: Intersection over Union for all examples
    """

    if box_format == "midpoint":
        # Convert midpoint format to corner format
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    if box_format == "corners":
        # Use corner format directly
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]

    # Calculate the coordinates of the intersection rectangle
    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)

    # Calculate the area of intersection and union
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))

    # Calculate the IoU
    iou = intersection / (box1_area + box2_area - intersection + 1e-6)

    return iou


In [259]:
# Copyright (c) 2023 Aladdin Persson
# The following code is derived from the YOLOv3 implementation by Aladdin Persson available at
# https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/YOLOv3/
# MIT license

def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners", max_cal=3000):
    """
    Does Non Max Suppression given bboxes

    Parameters:
        bboxes (list): list of lists containing all bboxes with each bbox
        specified as [class_pred, prob_score, x1, y1, x2, y2]
        iou_threshold (float): threshold where predicted bbox is considered correct
        threshold (float): threshold to remove predicted bboxes (independent of IoU)
        box_format (str): "midpoint" or "corners" used to specify bboxes
        max_cal: maximum number of bboxes to calculate NMS for

    Returns:
        list: bboxes after performing NMS given a specific IoU threshold
    """

    assert type(bboxes) == list

    # Filter out bboxes with probability scores below the threshold
    bboxes = [box for box in bboxes if box[1] > threshold]

    # Sort the bboxes in descending order based on probability scores
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)

    bboxes_after_nms = []

    # Limit the number of bboxes to perform NMS on
    bboxes = bboxes[:max_cal]

    while bboxes:
        chosen_box = bboxes.pop(0)

        # Remove bboxes that have the same class prediction as the chosen_box
        # and have an IoU greater than the specified threshold
        bboxes = [
            box
            for box in bboxes
            if box[0] != chosen_box[0]
            or intersection_over_union(
                torch.tensor(chosen_box[2:]),
                torch.tensor(box[2:]),
                box_format=box_format,
            )
            < iou_threshold
        ]

        # Add the chosen_box to the final list of bboxes after NMS
        bboxes_after_nms.append(chosen_box)

    return bboxes_after_nms

In [260]:
# Copyright (c) 2023 Aladdin Persson
# The following code is derived from the YOLOv3 implementation by Aladdin Persson available at
# https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/YOLOv3/
# MIT license

def mean_average_precision(
    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
):
    """
    Calculates the mean Average Precision (mAP) for object detection.

    Parameters:
        pred_boxes (list): List of lists containing predicted bounding boxes,
                           each specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2].
        true_boxes (list): List of lists containing ground truth bounding boxes,
                           formatted similarly to pred_boxes but without the prob_score.
        iou_threshold (float): Threshold for Intersection over Union (IoU) to consider
                               a prediction as a correct detection.
        box_format (str): Format of the bounding boxes - "midpoint" or "corners".
        num_classes (int): Total number of classes.

    Returns:
        float: Mean mAP value across all classes given a specific IoU threshold.
        dict: mAP values for each class.
    """

    average_precisions = []  # Stores the average precision for each class.
    epsilon = 1e-6  # Small value to ensure numerical stability.
    class_map = {}  # Dictionary to store AP for each class.

    # Iterate over all classes.
    for c in tqdm(range(num_classes)):
        detections = []  # List to store predictions for the current class.
        ground_truths = []  # List to store ground truths for the current class.

        # Filter out predictions and ground truths for the current class.
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)
        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)

        # Count the number of ground truth boxes for each image.
        amount_bboxes = Counter([gt[0] for gt in ground_truths])
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        # Sort detections by confidence score.
        detections.sort(key=lambda x: x[2], reverse=True)

        TP = torch.zeros((len(detections)))  # True Positives.
        FP = torch.zeros((len(detections)))  # False Positives.
        total_true_bboxes = len(ground_truths)  # Total ground truths for the class.

        # Skip if there are no ground truths for this class.
        if total_true_bboxes == 0:
            continue

        # Calculate TP and FP for each detection.
        for detection_idx, detection in enumerate(detections):
            ground_truth_img = [bbox for bbox in ground_truths if bbox[0] == detection[0]]
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                    torch.tensor(detection[3:]),
                    torch.tensor(gt[3:]),
                    box_format=box_format,
                )

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            # Mark as TP or FP based on IoU threshold.
            if best_iou > iou_threshold:
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1
            else:
                FP[detection_idx] = 1

        # Calculate cumulative sums for TP and FP to compute recalls and precisions.
        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
        
        # Add sentinel values at the start and end.
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))

        # Calculate AP using the trapezoidal rule (numerical integration).
        average_precisions.append(torch.trapz(precisions, recalls))
        class_map[c] = average_precisions[-1]

    # Calculate mean AP over all classes.
    mean_map = sum(average_precisions) / len(average_precisions)

    return mean_map, class_map

In [261]:
mapval, map_dict = mean_average_precision(
    pred_boxes_updated,
    true_boxes_updated,
    iou_threshold=0.5,
    box_format="corners",
    num_classes=20,
)

for c, ap in map_dict.items():
    print(f"{VOC_CLASSES[int(c)]} AP (EMA): {ap.item()}")
            
print(f"MAP (EMA): {mapval.item()}")

100%|██████████| 20/20 [00:06<00:00,  2.95it/s]

aeroplane AP (EMA): 0.40616244077682495
bicycle AP (EMA): 0.6702146530151367
bird AP (EMA): 0.48762625455856323
boat AP (EMA): 0.28872260451316833
bottle AP (EMA): 0.23380839824676514
bus AP (EMA): 0.6299288868904114
car AP (EMA): 0.6543501019477844
cat AP (EMA): 0.7566591501235962
chair AP (EMA): 0.3437689244747162
cow AP (EMA): 0.5243673920631409
diningtable AP (EMA): 0.4062141478061676
dog AP (EMA): 0.6922149658203125
horse AP (EMA): 0.7172512412071228
motorbike AP (EMA): 0.6244627833366394
person AP (EMA): 0.5648261308670044
pottedplant AP (EMA): 0.2502616047859192
sheep AP (EMA): 0.47784918546676636
sofa AP (EMA): 0.48125213384628296
train AP (EMA): 0.6834219098091125
tvmonitor AP (EMA): 0.4964831471443176
MAP (EMA): 0.5194922685623169



