In [3]:
import os
import sys
import json
import numpy as np

def iou(a, b):
    ax, ay, ar, ab = a
    bx, by, br, bb = b
    cross_x = max(ax, bx)
    cross_y = max(ay, by)
    cross_r = min(ar, br)
    cross_b = min(ab, bb)
    cross_w = max(0, (cross_r - cross_x) + 1)
    cross_h = max(0, (cross_b - cross_y) + 1)
    cross_area = cross_w * cross_h
    union = (ar - ax + 1) * (ab - ay + 1) + (br - bx + 1) * (bb - by + 1) - cross_area
    return cross_area / union

def nms(bboxes, threshold, confidence_index=-1):
    bboxes.sort(key=lambda x: x[confidence_index], reverse=True)
    flags = [True] * len(bboxes)
    keep = []
    for i in range(len(bboxes)):
        if not flags[i]: continue
        keep.append(bboxes[i])

        for j in range(i+1, len(bboxes)):
            if iou(bboxes[i][:4], bboxes[j][:4]) > threshold:
                flags[j] = False
    return keep

def nms_as_class(bboxes, threshold, class_index=-1, confidence_index=-2):
    boxasclass = {}
    for box in bboxes:
        classes = box[class_index]
        if classes not in boxasclass:
            boxasclass[classes] = []
        boxasclass[classes].append(box)

    output = []
    for key in boxasclass:
        result = nms(boxasclass[key], threshold, confidence_index)
        output.extend(result)
    return output

def xml_value(line):
    p0 = line.find(">") + 1
    p1 = line.find("</", p0)
    return line[p0:p1]

def xml_token(line):
    p0 = line.find("<") + 1
    p1 = line.find(">", p0)
    return line[p0:p1]

def load_voc_xml(file):

    with open(file, "r") as f:
        lines = f.readlines()

    name = None
    box = None
    bboxes = []
    enter_object = False
    enter_part = False
    for line in lines:
        token = xml_token(line)
        
        if token == "object":
            enter_object = True
        elif token == "/object":
            enter_object = False            
        elif enter_object:
            if token == "part":
                enter_part = True
            elif token == "/part":
                enter_part = False

            if not enter_part:
                if token == "name":
                    name = xml_value(line)
                elif token == "bndbox":
                    box = [name]
                    bboxes.append(box)
                elif token in ["xmin", "ymin", "xmax", "ymax"]:
                    box.append(float(xml_value(line)))
    return bboxes

def load_ann(root, call):
    files = os.listdir(root)
    anns = {}
    for file in files:
        name = file[:file.rfind(".")]
        anns[name] = call(os.path.join(root, file))
    return anns

def load_json_ann(root):
    def call(file):
        with open(file, "r") as f:
            ann = json.load(f)
        return ann
    return load_ann(root, call)

def load_xml_ann(root, label_map):
    def call(file):
        return [item[1:] + [0, label_map.index(item[0])] for item in load_voc_xml(file)]
    return load_ann(root, call)

In [4]:
detection_annotation_root = "predict_json"
groundtruth_annotation_root = "groundtruths_xml"
label_map = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
detection_annotation = load_json_ann(detection_annotation_root)
groundtruth_annotation = load_xml_ann(groundtruth_annotation_root, label_map)

for image_id in detection_annotation:
    image_base_annotations = detection_annotation[image_id]
    image_base_annotations = nms_as_class(image_base_annotations, 0.5)
    detection_annotation[image_id] = image_base_annotations

In [29]:
class MAPTool:
    def __init__(self, groundtruth_annotations, detection_annotations, class_names):
        """ 指定检测结果和gt的标注信息，以及labelmap
        # Arguments
            detection_annotations(dict): {image_id: [[left, top, right, bottom, confidence, classes_index], [left, top, right, bottom, confidence, classes_index]]} 
            groundtruth_annotations(dict): {image_id: [[left, top, right, bottom, 0, classes_index], [left, top, right, bottom, 0, classes_index]]}
            class_names(list): ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
        """
        self.detection_annotations = detection_annotations
        self.groundtruth_annotations = groundtruth_annotations
        self.class_names = class_names
        self.average_precision_array = np.zeros((len(class_names), ))
        self.map_array = np.zeros((3, ))
        self.compute()

    def class_ap(self, class_name_or_index):
        ''' 
        # return:
            np.array([ap@0.5, ap@0.75, ap@0.5:0.95])
        '''
        class_index = class_name_or_index
        if isinstance(class_name_or_index, str):
            class_index = self.class_names.index(class_name_or_index)
        return self.average_precision_array[class_index]

    @property
    def map(self):
        ''' 
        # return:
            np.array([map@0.5, map@0.75, map@0.5:0.95])
        '''
        return self.map_array

    def iou(self, a, b):
        aleft, atop, aright, abottom = [a[i] for i in range(4)]
        awidth = aright - aleft + 1
        aheight = abottom - atop + 1

        bleft, btop, bright, bbottom = [b[i] for i in range(4)]
        bwidth = bright - bleft + 1
        bheight = bbottom - btop + 1

        cleft = np.maximum(aleft, bleft)
        ctop = np.maximum(atop, btop)
        cright = np.minimum(aright, bright)
        cbottom = np.minimum(abottom, bbottom)
        cross_area = (cright - cleft + 1).clip(0) * (cbottom - ctop + 1).clip(0)
        union_area = awidth * aheight + bwidth * bheight - cross_area
        return cross_area / union_area

    # methods: 'continuous', 'interp101', 'interp11'
    def integrate_area_under_curve(self, precision, recall, method="interp101"):
        """ Compute the average precision, given the recall and precision curves.
        Source: https://github.com/rbgirshick/py-faster-rcnn.
        # Arguments
            recall:    The recall curve (list).
            precision: The precision curve (list).
        # Returns
            The average precision as computed in py-faster-rcnn.
        """
        # Append sentinel values to beginning and end
        mrec = np.concatenate(([0.], recall, [min(recall[-1] + 1E-3, 1.)]))
        mpre = np.concatenate(([0.], precision, [0.]))

        # Compute the precision envelope
        mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))

        # Integrate area under curve
        if method == 'interp101':
            x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
            #ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate，梯度积分，https://blog.csdn.net/weixin_44338705/article/details/89203791
            ap = np.mean(np.interp(x, mrec, mpre))  # integrate，直接取均值，论文上都这么做的
        elif method == 'interp11':
            x = np.linspace(0, 1, 11)  # 11-point interp (VOC2007)
            ap = np.mean(np.interp(x, mrec, mpre))  # integrate，直接取均值，论文上都这么做的
        else:  # 'continuous'
            i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes (VOC2012)
            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
        return ap

    def compute_average_precision(self, matched_table, sum_groundtruth, threshold):
        num_dets = len(matched_table)
        true_positive = np.zeros((num_dets, ))

        groundtruth_seen_map = {item[3] : set() for item in matched_table}
        for index, (confidence, matched_iou, matched_groundtruth_index, image_id) in enumerate(matched_table):
            image_base_seen_map = groundtruth_seen_map[image_id]
            if matched_iou >= threshold:
                if matched_groundtruth_index not in image_base_seen_map:
                    true_positive[index] = 1
                    image_base_seen_map.add(matched_groundtruth_index)

        num_predicts = np.arange(1, len(true_positive) + 1)
        accumulate_true_positive = np.cumsum(true_positive)
        precision = accumulate_true_positive / num_predicts
        recall = accumulate_true_positive / sum_groundtruth
        average_precision = self.integrate_area_under_curve(precision, recall)
        return average_precision

    def compute(self):
        ''' 计算MAP
        # return:
            np.array([map@0.5, map@0.75, map@0.5:0.95])
        '''
        average_precision_array = []
        max_dets = 100
        for classes in range(len(self.class_names)):

            matched_table = []
            sum_groundtruth = 0

            for image_id in self.groundtruth_annotations:
                select_detection = np.array(list(filter(lambda x:x[5] == classes, self.detection_annotations[image_id])))
                select_groundtruth = np.array(list(filter(lambda x:x[5] == classes, self.groundtruth_annotations[image_id])))
                
                num_detection = len(select_detection)
                num_groundtruth = len(select_groundtruth)

                num_use_detection = min(num_detection, max_dets)
                sum_groundtruth += num_groundtruth

                if num_detection == 0:
                    continue
                
                if len(select_groundtruth) == 0:
                    for index_of_detection in range(num_use_detection):
                        confidence = select_detection[index_of_detection, 4]
                        matched_table.append([confidence, 0, 0, image_id])
                    continue

                sgt = select_groundtruth.T.reshape(6, -1, 1)
                sdt = select_detection.T.reshape(6, 1, -1)

                # num_groundtruth x num_detection
                groundtruth_detection_iou = self.iou(sgt, sdt)
                for index_of_detection in range(num_use_detection):
                    confidence = select_detection[index_of_detection, 4]
                    matched_groundtruth_index = groundtruth_detection_iou[:, index_of_detection].argmax()
                    matched_iou = groundtruth_detection_iou[matched_groundtruth_index, index_of_detection]
                    matched_table.append([confidence, matched_iou, matched_groundtruth_index, image_id])

            matched_table = sorted(matched_table, key=lambda x: x[0], reverse=True)
            ap_05 = self.compute_average_precision(matched_table, sum_groundtruth, 0.5)
            ap_075 = self.compute_average_precision(matched_table, sum_groundtruth, 0.75)
            ap_05_095 = np.mean([self.compute_average_precision(matched_table, sum_groundtruth, t) for t in np.arange(0.5, 1, 0.05)])
            average_precision_array.append([ap_05, ap_075, ap_05_095])

        self.average_precision_array = average_precision_array
        self.map_array = np.mean(average_precision_array, axis=0)
        return self.map_array

In [33]:
mAP = MAPTool(groundtruth_annotation, detection_annotation, label_map)
aps = mAP.map
names = ["0.5", "0.75", "0.5:0.95"]

for ap, name in zip(aps, names):
    print(f"Average Precision  (AP) @[ IoU={name:8s} | area=   all | maxDets=100 ] = {ap:.3f}")
    
for index, name in enumerate(label_map):
    class_ap05, class_ap075, class_ap05095 = mAP.class_ap(index)
    print(f"Class {index:02d}[{name:11s}] mAP@.5 = {class_ap05:.3f},  mAP@.75 = {class_ap075:.3f},  mAP@.5:.95 = {class_ap05095:.3f}")

Average Precision  (AP) @[ IoU=0.5      | area=   all | maxDets=100 ] = 0.509
Average Precision  (AP) @[ IoU=0.75     | area=   all | maxDets=100 ] = 0.262
Average Precision  (AP) @[ IoU=0.5:0.95 | area=   all | maxDets=100 ] = 0.271
Class 00[aeroplane  ] mAP@.5 = 0.545,  mAP@.75 = 0.130,  mAP@.5:.95 = 0.234
Class 01[bicycle    ] mAP@.5 = 0.623,  mAP@.75 = 0.371,  mAP@.5:.95 = 0.362
Class 02[bird       ] mAP@.5 = 0.366,  mAP@.75 = 0.118,  mAP@.5:.95 = 0.173
Class 03[boat       ] mAP@.5 = 0.325,  mAP@.75 = 0.087,  mAP@.5:.95 = 0.125
Class 04[bottle     ] mAP@.5 = 0.301,  mAP@.75 = 0.087,  mAP@.5:.95 = 0.132
Class 05[bus        ] mAP@.5 = 0.596,  mAP@.75 = 0.492,  mAP@.5:.95 = 0.394
Class 06[car        ] mAP@.5 = 0.679,  mAP@.75 = 0.462,  mAP@.5:.95 = 0.420
Class 07[cat        ] mAP@.5 = 0.583,  mAP@.75 = 0.388,  mAP@.5:.95 = 0.345
Class 08[chair      ] mAP@.5 = 0.286,  mAP@.75 = 0.112,  mAP@.5:.95 = 0.135
Class 09[cow        ] mAP@.5 = 0.388,  mAP@.75 = 0.164,  mAP@.5:.95 = 0.193
Class 