In [1]:
"""Compute PASCAL_VOC MAP.

Reference:
  https://github.com/chainer/chainercv/blob/master/chainercv/evaluations/eval_detection_voc.py
"""

from __future__ import division

import six
import itertools
import numpy as np

from collections import defaultdict


def voc_eval(
    pred_bboxes,
    pred_labels,
    pred_scores,
    gt_bboxes,
    gt_labels,
    gt_difficults=None,
    iou_thresh=0.5,
    use_07_metric=True,
):
    """Wrap VOC evaluation for PyTorch."""
    pred_bboxes = [xy2yx(b).numpy() for b in pred_bboxes]
    pred_labels = [label.numpy() for label in pred_labels]
    pred_scores = [score.numpy() for score in pred_scores]
    gt_bboxes = [xy2yx(b).numpy() for b in gt_bboxes]
    gt_labels = [label.numpy() for label in gt_labels]
    return eval_detection_voc(
        pred_bboxes,
        pred_labels,
        pred_scores,
        gt_bboxes,
        gt_labels,
        gt_difficults,
        iou_thresh,
        use_07_metric,
    )


def xy2yx(boxes):
    """Convert box (xmin,ymin,xmax,ymax) to (ymin,xmin,ymax,xmax)."""
    c0 = boxes[:, 0].clone()
    c2 = boxes[:, 2].clone()
    boxes[:, 0] = boxes[:, 1]
    boxes[:, 1] = c0
    boxes[:, 2] = boxes[:, 3]
    boxes[:, 3] = c2
    return boxes


def bbox_iou(bbox_a, bbox_b):
    """Calculate the Intersection of Unions (IoUs) between bounding boxes.

    Args:
        bbox_a (array): An array whose shape is :math:`(N, 4)`.
            :math:`N` is the number of bounding boxes.
            The dtype should be :obj:`numpy.float32`.
        bbox_b (array): An array similar to :obj:`bbox_a`,
            whose shape is :math:`(K, 4)`.
            The dtype should be :obj:`numpy.float32`.

    Returns:
        array:
        An array whose shape is :math:`(N, K)`. \
        An element at index :math:`(n, k)` contains IoUs between \
        :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
        box in :obj:`bbox_b`.
    """
    # top left
    tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
    # bottom right
    br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])

    area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
    area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
    area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)
    return area_i / (area_a[:, None] + area_b - area_i)


def eval_detection_voc(
    pred_bboxes,
    pred_labels,
    pred_scores,
    gt_bboxes,
    gt_labels,
    gt_difficults=None,
    iou_thresh=0.5,
    use_07_metric=False,
):
    """Calculate average precisions based on evaluation code of PASCAL VOC.

    This function evaluates predicted bounding boxes obtained from a dataset
    which has :math:`N` images by using average precision for each class.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
            sets of bounding boxes.
            Its index corresponds to an index for the base dataset.
            Each element of :obj:`pred_bboxes` is a set of coordinates
            of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
            where :math:`R` corresponds
            to the number of bounding boxes, which may vary among boxes.
            The second axis corresponds to
            :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
        pred_labels (iterable of numpy.ndarray): An iterable of labels.
            Similar to :obj:`pred_bboxes`, its index corresponds to an
            index for the base dataset. Its length is :math:`N`.
        pred_scores (iterable of numpy.ndarray): An iterable of confidence
            scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
            its index corresponds to an index for the base dataset.
            Its length is :math:`N`.
        gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
            bounding boxes
            whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
            bounding box whose shape is :math:`(R, 4)`. Note that the number of
            bounding boxes in each image does not need to be same as the number
            of corresponding predicted boxes.
        gt_labels (iterable of numpy.ndarray): An iterable of ground truth
            labels which are organized similarly to :obj:`gt_bboxes`.
        gt_difficults (iterable of numpy.ndarray): An iterable of boolean
            arrays which is organized similarly to :obj:`gt_bboxes`.
            This tells whether the
            corresponding ground truth bounding box is difficult or not.
            By default, this is :obj:`None`. In that case, this function
            considers all bounding boxes to be not difficult.
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value.
        use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
            for calculating average precision. The default value is
            :obj:`False`.

    Returns:
        dict:

        The keys, value-types and the description of the values are listed
        below.

        * **ap** (*numpy.ndarray*): An array of average precisions. \
            The :math:`l`-th value corresponds to the average precision \
            for class :math:`l`. If class :math:`l` does not exist in \
            either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
            value is set to :obj:`numpy.nan`.
        * **map** (*float*): The average of Average Precisions over classes.

    """

    prec, rec = calc_detection_voc_prec_rec(
        pred_bboxes,
        pred_labels,
        pred_scores,
        gt_bboxes,
        gt_labels,
        gt_difficults,
        iou_thresh=iou_thresh,
    )

    ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)

    return {"ap": ap, "map": np.nanmean(ap)}


def calc_detection_voc_prec_rec(
    pred_bboxes,
    pred_labels,
    pred_scores,
    gt_bboxes,
    gt_labels,
    gt_difficults=None,
    iou_thresh=0.5,
):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.

    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
            sets of bounding boxes.
            Its index corresponds to an index for the base dataset.
            Each element of :obj:`pred_bboxes` is a set of coordinates
            of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
            where :math:`R` corresponds
            to the number of bounding boxes, which may vary among boxes.
            The second axis corresponds to
            :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
        pred_labels (iterable of numpy.ndarray): An iterable of labels.
            Similar to :obj:`pred_bboxes`, its index corresponds to an
            index for the base dataset. Its length is :math:`N`.
        pred_scores (iterable of numpy.ndarray): An iterable of confidence
            scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
            its index corresponds to an index for the base dataset.
            Its length is :math:`N`.
        gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
            bounding boxes
            whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
            bounding box whose shape is :math:`(R, 4)`. Note that the number of
            bounding boxes in each image does not need to be same as the number
            of corresponding predicted boxes.
        gt_labels (iterable of numpy.ndarray): An iterable of ground truth
            labels which are organized similarly to :obj:`gt_bboxes`.
        gt_difficults (iterable of numpy.ndarray): An iterable of boolean
            arrays which is organized similarly to :obj:`gt_bboxes`.
            This tells whether the
            corresponding ground truth bounding box is difficult or not.
            By default, this is :obj:`None`. In that case, this function
            considers all bounding boxes to be not difficult.
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value..

    Returns:
        tuple of two lists:
        This function returns two lists: :obj:`prec` and :obj:`rec`.

        * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
            for class :math:`l`. If class :math:`l` does not exist in \
            either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
            set to :obj:`None`.
        * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
            for class :math:`l`. If class :math:`l` that is not marked as \
            difficult does not exist in \
            :obj:`gt_labels`, :obj:`rec[l]` is \
            set to :obj:`None`.

    """

    pred_bboxes = iter(pred_bboxes)
    pred_labels = iter(pred_labels)
    pred_scores = iter(pred_scores)
    gt_bboxes = iter(gt_bboxes)
    gt_labels = iter(gt_labels)
    if gt_difficults is None:
        gt_difficults = itertools.repeat(None)
    else:
        gt_difficults = iter(gt_difficults)

    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)

    for (
        pred_bbox,
        pred_label,
        pred_score,
        gt_bbox,
        gt_label,
        gt_difficult,
    ) in six.moves.zip(
        pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults
    ):

        if gt_difficult is None:
            gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0,) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1

            iou = bbox_iou(pred_bbox_l, gt_bbox_l)
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    for iter_ in (
        pred_bboxes,
        pred_labels,
        pred_scores,
        gt_bboxes,
        gt_labels,
        gt_difficults,
    ):
        if next(iter_, None) is not None:
            raise ValueError("Length of input iterables need to be same.")

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec


def calc_detection_voc_ap(prec, rec, use_07_metric=False):
    """Calculate average precisions based on evaluation code of PASCAL VOC.

    This function calculates average precisions
    from given precisions and recalls.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        prec (list of numpy.array): A list of arrays.
            :obj:`prec[l]` indicates precision for class :math:`l`.
            If :obj:`prec[l]` is :obj:`None`, this function returns
            :obj:`numpy.nan` for class :math:`l`.
        rec (list of numpy.array): A list of arrays.
            :obj:`rec[l]` indicates recall for class :math:`l`.
            If :obj:`rec[l]` is :obj:`None`, this function returns
            :obj:`numpy.nan` for class :math:`l`.
        use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
            for calculating average precision. The default value is
            :obj:`False`.

    Returns:
        ~numpy.ndarray:
        This function returns an array of average precisions.
        The :math:`l`-th value corresponds to the average precision
        for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
        :obj:`None`, the corresponding value is set to :obj:`numpy.nan`.

    """

    n_fg_class = len(prec)
    ap = np.empty(n_fg_class)
    for l in six.moves.range(n_fg_class):
        if prec[l] is None or rec[l] is None:
            ap[l] = np.nan
            continue

        if use_07_metric:
            # 11 point metric
            ap[l] = 0
            for t in np.arange(0.0, 1.1, 0.1):
                if np.sum(rec[l] >= t) == 0:
                    p = 0
                else:
                    p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
                ap[l] += p / 11
        else:
            # correct AP calculation
            # first append sentinel values at the end
            mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
            mrec = np.concatenate(([0], rec[l], [1]))

            mpre = np.maximum.accumulate(mpre[::-1])[::-1]

            # to calculate area under PR curve, look for points
            # where X axis (recall) changes value
            i = np.where(mrec[1:] != mrec[:-1])[0]

            # and sum (\Delta recall) * prec
            ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])

    return ap

In [2]:
import os

os.getcwd()
os.chdir("../../")
os.getcwd()

'/data01/dl23vitcas/dl_project'

In [3]:
import sys
import argparse
import os


sys.argv = [
    "view",
    "--config",
    "config/single_task_object_detection.yaml",
    "--model_path",
    "models/object_detection/model_2024-06-23_11-20.pth",
]

# Creazione del parser
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, required=True, help="Path to the config file")
parser.add_argument(
    "--model_path", type=str, required=True, help="Path to the model file"
)

# Parsing degli argomenti
args = parser.parse_args()

# Stampare i valori degli argomenti
print(args.config)
print(args.model_path)

config/single_task_object_detection.yaml
models/object_detection/model_2024-06-23_11-20.pth


In [9]:
import torch
from config_experiments import config, parse_args
from utils import set_seed, set_device
from dataloader import VOC08Attr
import torchvision.transforms as transforms
from model import ObjectDetectionModel
from metrics import compute_mAP, view_mAP_for_class
import wandb

if __name__ == "__main__":

    wandb.init(
        group="object_detection",
        project="DL",
        config=config,
        save_code=True,
        mode="disabled",
    )
    model_path = parse_args().model_path
    transform_test = transforms.Compose(
        [
            transforms.Resize(size=config["transform"]["resize_values"]),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=config["transform"]["mean"], std=config["transform"]["std"]
            ),
        ]
    )
    set_seed(config["global"]["seed"])
    device = set_device(config["global"]["gpu_id"])
    data_test = VOC08Attr(train=False, transform=transform_test)
    model = ObjectDetectionModel().to(device)

    model.load_state_dict(torch.load(model_path, map_location=device))

In [None]:
mAP = compute_mAP(data_test, model, device)
view_mAP_for_class(mAP, data_test)

In [None]:
def view_mAP_for_class(mAP, data_test):
    print(mAP)
    print(f"\nmAP@0.50 (per class):")
    index = torch.arange(1, config["global"]["num_classes"] + 1)

    for i, value in zip(index, mAP["map_per_class"].numpy()):
        category = data_test.id2category.get(i.item())
        mAP_category = value.item()

        print(f"\tAP {category} : {(mAP_category):.2f}")
        wandb.config.update({f"AP {category} ": mAP_category})

    mAP50 = mAP["map_50"].item()
    print(f"\nmAP@0.50 : {mAP50:.2f}")

    wandb.config.update({"mAP@0.50": mAP50})

In [None]:
view_mAP_for_class(mAP, data_test)

In [10]:
import torch
from config_experiments import config
from bbox_transform import resize_bounding_boxes
import torchmetrics
from bbox_transform import apply_nms
from tqdm import tqdm
import wandb
import logging
import yaml
import os


def compute_mAP(data_set, model, device):  # train/val

    model.eval()

    with open(
        os.getcwd()
        + "/src/single_task_object_detection/"
        + "target_mean_std_by_class.yaml",
        "r",
    ) as f:
        mean_std_by_class = yaml.safe_load(f)

    pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels = [], [], [], [], []
    with torch.no_grad():

        for i, (
            image,
            image_size,
            gt_class,
            gt_bbox,
            gt_attributes,
            ss_rois,
        ) in enumerate(tqdm(data_set, desc="Compute mAP")):
            image = image.unsqueeze(0).to(device)
            gt_class = gt_class.to(device)
            gt_bbox = gt_bbox.to(device)
            ss_rois = ss_rois.to(device)

            orig_w, orig_h = image_size
            new_w, new_h = (image.shape[3], image.shape[2])
            gt_bbox = resize_bounding_boxes(
                gt_bbox, orig_size=(new_w, new_h), new_size=(orig_w, orig_h)
            )

            indices_batch = data_set.get_indices_batch(
                image.shape[0], ss_rois.shape[0]
            ).unsqueeze(-1)

            indices_batch = indices_batch.to(device)

            cls_max_score_net, max_score_net, bboxs_net = model.prediction_img(
                image, ss_rois, indices_batch, mean_std_by_class
            )

            bboxs_net = resize_bounding_boxes(
                bboxs_net, orig_size=(new_w, new_h), new_size=(orig_w, orig_h)
            )

            pred_bbox, pred_class, pred_score = apply_nms(
                cls_max_score_net, max_score_net, bboxs_net
            )

            pred_bboxes.append(pred_bbox)
            pred_labels.append(pred_class)
            pred_scores.append(pred_score)
            gt_bboxes.append(gt_bbox)
            gt_labels.append(gt_class)
    mAP = voc_eval(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels)
    return mAP

In [11]:
pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels = compute_mAP(
    data_test, model, device
)

Compute mAP:   0%|          | 0/2227 [00:00<?, ?it/s]

Compute mAP: 100%|██████████| 2227/2227 [09:16<00:00,  4.00it/s]


{'ap': array([       nan, 0.36184114, 0.39342054, 0.15304123, 0.28581046,
        0.43998192, 0.33640001, 0.46924918, 0.46955405, 0.13036021,
        0.13498852, 0.29672787, 0.1849298 , 0.44415412, 0.45414605,
        0.19410411, 0.2370254 , 0.11723687, 0.42764903, 0.21635322,
        0.2183466 ]),
 'map': 0.29826601751400483}