In [None]:
## 2 ways to improve the performance
# yolov2 : changes in model configurations
# yolo9000 : changes in model training method. joint training of classification and object detection.


In [96]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from global_utils import IoU

## Anchor Boxes

### k-mean clustering

In [None]:
## https://towardsdatascience.com/create-your-own-k-means-clustering-algorithm-in-python-d7d4c9077670

def KmeanClustering(bboxes, max_iters, k = 5) :
    '''
    read out whole bounding boxes from train datasets, and perform k mean clusterings
    bboxes = (x_center, y_center, w, h, c). Here ratios are respect to the image size, NOT grid cell.
    max_iters : number of maximum iterations
    k : number of clusters
    '''

    w = bboxes[:, 2]
    h = bboxes[:, 3]

    # initialize the centroid
    min_w, max_w = min(w), max(w)
    min_h, max_h = min(h), max(h)
    # uniformly distribute the k cluster centroids
    centroids = [((max_w - min_w) * (i+1), (max_h - min_h) * (i+1)) for i in range(k)]

    prev_centroids = None
    iteration = 0


    while np.not_equal(centroids, prev_centroid).any() or iteration < max_iters :
        
        sorted_points = [[] for _ in range(k)]

        for (_, _, w, h, _) in bboxes :
            
            # Suppose x_center and y_center are same for centroid and data point here, we just want the ratio of w, h.
            x, y = 0.5, 0.5



            dist = np.array([IoU(np.array([x,y,w,h]), np.array([x,y,centroid_w, centroid_h]) ) \
                                for (centroid_w, centroid_h) in centroids])

            centroid_idx = np.argmax(dist)
            sorted_points[centroid_idx].append([w, h])

        prev_centroids = centroids
        sorted_points = np.array(sorted_points)
        centroids = [(np.mean(cluster[:,0]), np.mean(cluster[:,1]) ) for cluster in sorted_points]

        # if any new centroids has NaN, then substitute back to previous centroids
        centroids[np.isnan(centroids)] = prev_centroids[np.isnan(centroids)]

        iteration += 1

    return centroids



In [6]:
samp = np.array([[np.NaN, 0, 0, 0]])
prev = np.array([[1, 2, 3, 4]])
samp[np.isnan(samp)] = prev[np.isnan(samp)]


samp

array([[1., 0., 0., 0.]])

In [330]:
''' 
reference : https://pyimagesearch.com/2022/05/02/mean-average-precision-map-using-the-coco-evaluator/
usage : validation, evaluation. 
predictions and gts will be handed by reading gtfile directly, since dataloader limits the number of gt bounding box
'''


from global_utils import IoU

## MAP

class MeanAveragePrecisionMetrics :
    def __init__(self, gts, preds, num_classes, iou_threshold_range, confidence_threshold) :
        '''
        gts, preds = [[[class, x, y, w, h, c],...], ...] # imgs x bboxes

        num_classes : # of classes, make sure the class numbers starts from 0 and increased by 1.
        iou_threshold_range : (min_threshold, interval, max_threshold). e.g) IoU(0.6, 0.1, 0.9) = [0.6, 0.7, 0.8, 0.9]
        confidence_threshold : predicted bounding boxes are filtered by confidence threshold
        '''
        self.gts = gts
        self.preds = preds
        assert len(gts) == len(preds), '# of images should be the same for predictions and ground truths.'
        self.num_classes = num_classes 
        # convert iou_threshold_range into list
        min_threshold, interval, max_threshold  = iou_threshold_range
        self.iou_threshold_range = np.linspace(min_threshold, max_threshold, num=int((max_threshold - min_threshold)//interval + 1))
        
        self.confidence_threshold = confidence_threshold
        self.iou_table_per_img = {imgidx : None for imgidx in range(len(gts))}

        # self.total_iou_table = {clslabel : {iou_threshold : None \
        #                                         for iou_threshold in self.iou_threshold_range} \
        #                                             for clslabel in range(num_classes)}

        self.TOTAL_TP = [{iou_threshold : 0 for iou_threshold in self.iou_threshold_range} for _ in range(num_classes)]
        self.TOTAL_FP = [{iou_threshold : 0 for iou_threshold in self.iou_threshold_range} for _ in range(num_classes)]
        self.TOTAL_FN = [{iou_threshold : 0 for iou_threshold in self.iou_threshold_range} for _ in range(num_classes)]

        self.total_statistics = []


    def calculate_PR(self, imgidx, pred, gt, iou_threshold) :
        '''
        calculate precision and recall per classes
        Recall = TP / (TP+FN)
        Precision = TP / (TP + FP)
        1. match preds and gts using IoU
        2. matched preds will be TP, and remaining unmatched preds will be FP, and unmatched gt are FN.
        '''

        # bboxes = [[class,x,y,w,h,c],...]. c = confidence_score. filter predicted bboxes by confidence_threshold.
        
        pred = np.array([p for p in pred if p[-1] > self.confidence_threshold])
        
        
        # 행 : pred, 열 : gt
        if self.iou_table_per_img[imgidx] is not None :
            iou_table = self.iou_table_per_img[imgidx]
        else : 
            iou_table = torch.zeros((len(pred), len(gt)))
            for j, pred_bbox in enumerate(pred) :
                for i, gt_bbox in enumerate(gt) :
                    iou_table[j][i] = IoU(pred_bbox[..., 1:], gt_bbox[..., 1:])
            self.iou_table_per_img[imgidx] = iou_table
        # if there are more than one prediction box matched with on gt box, then we choose the predicition with the highest IoU as TP, and treat other matches as FP.     
        filtered_iou_table = torch.zeros_like(iou_table)
        filtered_iou_table[torch.argmax(iou_table, axis = 0), torch.arange(iou_table.shape[1])] = torch.max(iou_table, axis = 0)[0] # this will leave only one highest IoU per gts
        
        result = filtered_iou_table > iou_threshold

        TP = result.any(axis = 1).sum()
        FP = len(pred) - TP
        FN = len(gt) - result.any(axis = 0).sum()

        return TP.item(), FP.item(), FN.item()

    


    def calculate(self) :
        ## TODO : add typing of variables
        '''
        preds : list of numpy array. []
        gts 
        iou_threshold_range : (minimum_threshold, maximum_threshold, interval)
        '''
        for imgidx, (pred_by_img, gt_by_img) in enumerate(zip(self.preds, self.gts)) :
            for cls_label in range(self.num_classes) :
                cls_preds = pred_by_img[pred_by_img[..., 0] == cls_label]
                cls_gts = gt_by_img[gt_by_img[..., 0] == cls_label]

                # assert (cls_preds.shape == cls_gts.shape) and (cls_preds.ndim == cls_gts.ndim == 2 and cls_preds.shape[-1] == cls_gts.shape[-1] == 5), \
                #             'pred and gt shape = (# of bboxes over images, len([x,y,w,h,c]) )'

                for iou_threshold in self.iou_threshold_range :
                    TP, FP, FN = self.calculate_PR(imgidx, cls_preds, cls_gts, iou_threshold)
                    self.TOTAL_TP[cls_label][iou_threshold] += TP
                    self.TOTAL_FP[cls_label][iou_threshold] += FP
                    self.TOTAL_FN[cls_label][iou_threshold] += FN

        # calculate Precision and Recall

        for cls_label in range(self.num_classes) :
            for iou_threshold in self.iou_threshold_range :
                precision = self.TOTAL_TP[cls_label][iou_threshold] / (self.TOTAL_TP[cls_label][iou_threshold] + self.TOTAL_FP[cls_label][iou_threshold])
                recall = self.TOTAL_TP[cls_label][iou_threshold] / (self.TOTAL_TP[cls_label][iou_threshold] + self.TOTAL_FN[cls_label][iou_threshold])

                self.total_statistics.append([cls_label, iou_threshold, precision, recall])

        self.total_statistics = np.array(self.total_statistics)

        # mean average precision = sum(avg_cls_precision) / num_classes. avg_cls_precision = sum of cls_precisions in different recalls / 
        for cls_label in range(self.num_classes) :

        # interpolated precision

    

In [320]:
## read sample and test 
# 00001.txt gt
gt1= ['person 0.22 0.22 0.19 0.28 1',
'person 0.7475 0.77 0.20500000000000002 0.31 1']
# 00001.txt pred
pred1 = ['person .88 0.10250000000000001 0.455 0.155 0.24',
'person .70 0.6950000000000001 0.7225 0.2 0.335',
'person .80 0.7425 0.2125 0.245 0.335',]
# 00002.txt gt
gt2= ['person 0.7225 0.1925 0.215 0.275 1',
'person 0.3375 0.7725 0.295 0.225 1',]
# 00002.txt pred
pred2 = ['person .71 0.48 0.7000000000000001 0.32 0.29',
'person .54 0.28 0.8175 0.3 0.23500000000000001',
'person .74 0.2025 0.1775 0.215 0.17500000000000002',]
# 00003.txt gt
gt3= ['person 0.1675 0.19 0.17500000000000002 0.24 1',
'person 0.7375 0.26 0.245 0.22 1',
'person 0.6125 0.8125 0.23500000000000001 0.23500000000000001 1',]
# 00003.txt pred
pred3 = ['person .18 0.7375 0.17250000000000001 0.385 0.195',
'person .67 0.545 0.4275 0.23 0.225',
'person .38 0.89 0.4425 0.18 0.265',
'person .91 0.6425 0.7725 0.23500000000000001 0.23500000000000001',
'person .44 0.19 0.85 0.2 0.22',]

# 00004.txt gt
gt4= ['person 0.365 0.34 0.2 0.26 1',
'person 0.8475 0.3 0.155 0.17 1',]
# 00004.txt pred
pred4 = ['person .35 0.485 0.20500000000000002 0.14 0.13',
'person .78 0.245 0.5075000000000001 0.21 0.335',
'person .45 0.4975 0.5425 0.125 0.195',
'person .14 0.2 0.84 0.3 0.13',]

# 00005.txt gt
gt5= ['person 0.405 0.28250000000000003 0.22 0.255 1',
'person 0.325 0.77 0.17 0.26 1',]
# 00005.txt pred
pred5 = ['person .62 0.32 0.305 0.14 0.23',
'person .44 0.6075 0.125 0.265 0.14',
'person .95 0.325 0.7275 0.36 0.145',
'person .23 0.325 0.8875000000000001 0.36 0.145',]

# 00006.txt gt
gt6= ['person 0.31 0.635 0.26 0.38 1',
'person 0.42 0.4575 0.22 0.335 1',]
# 00006.txt pred
pred6 = ['person .45 0.4 0.335 0.37 0.19',
'person .84 0.1575 0.8625 0.145 0.17500000000000002',
'person .43 0.5375 0.655 0.125 0.21',]

# 00007.txt gt
gt7= ['person 0.2775 0.3125 0.275 0.315 1',
'person 0.41500000000000004 0.48 0.25 0.29 1',]
# 00007.txt pred
pred7 = ['person .48 0.3325 0.32 0.505 0.44',
'person .95 0.2575 0.7025 0.185 0.245',]





In [321]:
class_dict = {'person' : 0}

def convert_bbox(infos) : 
    bboxes = []
    for info in infos :
        classlabel, x, y, w, h, c = info.split(' ')
        classlabel = class_dict[classlabel]
        x,y,w,h,c =  map(float, [x,y,w,h, c])
        bboxes.append([classlabel, x, y, w, h, c])
    return bboxes



In [322]:
gts = [torch.tensor(convert_bbox(g)) for g in [gt1,gt2,gt3,gt4,gt5,gt6,gt7]]
preds = [torch.tensor(convert_bbox(p)) for p in [pred1,pred2,pred3,pred4,pred5,pred6,pred7]]

In [323]:
gts

[tensor([[0.0000, 0.2200, 0.2200, 0.1900, 0.2800, 1.0000],
         [0.0000, 0.7475, 0.7700, 0.2050, 0.3100, 1.0000]]),
 tensor([[0.0000, 0.7225, 0.1925, 0.2150, 0.2750, 1.0000],
         [0.0000, 0.3375, 0.7725, 0.2950, 0.2250, 1.0000]]),
 tensor([[0.0000, 0.1675, 0.1900, 0.1750, 0.2400, 1.0000],
         [0.0000, 0.7375, 0.2600, 0.2450, 0.2200, 1.0000],
         [0.0000, 0.6125, 0.8125, 0.2350, 0.2350, 1.0000]]),
 tensor([[0.0000, 0.3650, 0.3400, 0.2000, 0.2600, 1.0000],
         [0.0000, 0.8475, 0.3000, 0.1550, 0.1700, 1.0000]]),
 tensor([[0.0000, 0.4050, 0.2825, 0.2200, 0.2550, 1.0000],
         [0.0000, 0.3250, 0.7700, 0.1700, 0.2600, 1.0000]]),
 tensor([[0.0000, 0.3100, 0.6350, 0.2600, 0.3800, 1.0000],
         [0.0000, 0.4200, 0.4575, 0.2200, 0.3350, 1.0000]]),
 tensor([[0.0000, 0.2775, 0.3125, 0.2750, 0.3150, 1.0000],
         [0.0000, 0.4150, 0.4800, 0.2500, 0.2900, 1.0000]])]

In [331]:
MAP_metrics = MeanAveragePrecisionMetrics(gts, preds, num_classes = 1, iou_threshold_range = (0.2, 0.1, 0.6), confidence_threshold = 0.1)

In [332]:
MAP_metrics.calculate()

  pred = np.array([p for p in pred if p[-1] > self.confidence_threshold])
  pred = np.array([p for p in pred if p[-1] > self.confidence_threshold])


In [333]:
MAP_metrics.iou_threshold_range

array([0.2       , 0.33333333, 0.46666667, 0.6       ])

In [334]:
total_statistics = MAP_metrics.total_statistics



In [349]:
# statistics : class, iou_threshold, precision, recall
# by class

torch.trapz(torch.tensor(total_statistics[..., 2:3]) , torch.tensor(total_statistics[..., 2:4]), dim = -1)


tensor([0., 0., 0., 0.], dtype=torch.float64)

In [351]:
total_statistics

array([[0.        , 0.2       , 0.20833333, 0.33333333],
       [0.        , 0.33333333, 0.125     , 0.2       ],
       [0.        , 0.46666667, 0.125     , 0.2       ],
       [0.        , 0.6       , 0.04166667, 0.06666667]])

In [95]:
{0.1 : 3}

{0.1: 3}

In [94]:
samp[np.argmax(samp, axis = 0), range(samp.shape[1])]

array([0.6, 0.7, 0.8])

In [93]:
samp_zeros[np.argmax(samp, axis = 0), range(samp.shape[1])] = np.max(samp, axis = 0)

samp_zeros

array([[0. , 0. , 0.8],
       [0.6, 0.7, 0. ]])

In [86]:
np.argmax(samp, axis = 0)

array([1, 1, 0], dtype=int64)

In [70]:
samp[..., [0,1,2]]

array([[0.5, 0.6, 0.8],
       [0.6, 0.7, 0.3]])

In [72]:
import torch

samp = torch.tensor([[0.5, 0.6, 0.8],
                     [0.6, 0.7, 0.3]])

samp[[0,1,1], ...]

tensor([[0.5000, 0.6000, 0.8000],
        [0.6000, 0.7000, 0.3000],
        [0.6000, 0.7000, 0.3000]])