In [1]:
import json
import numpy as np
import torch
from scipy.cluster.vq import kmeans
from tqdm import tqdm
import random

# 1.加载数据，格式是：label_index, cx, cy, box_width, box_height, image_width, image_height

In [2]:
# label_index, cx, cy, box_width, box_height, image_width, image_height

In [3]:
with open("info.json", "r") as f:
    labels = np.array(json.load(f))

### 指定图像大小为640，这是我们的训练分辨率

In [4]:
image_size = 640

In [5]:
# 取出标注的图像宽高
labeled_image_width_height = labels[:, 5:7]

# 这里计算的是对图像进行等比缩放后的标准宽高，即  image_size * raw_width / max(raw_width, raw_height),   image_size * raw_height / max(raw_width, raw_height)
# 这里也是在告诉大家，他的训练程序是对图像有进行缩放，是等比缩放到大图的
std_image_width_height = image_size * labeled_image_width_height / labeled_image_width_height.max(1, keepdims=True)
std_image_box_width_height = std_image_width_height * labels[:, 3:5]

In [6]:
# filter small pixel object
filterd_index = (std_image_box_width_height >= 2).any(1)
keep_box_wh = std_image_box_width_height[filterd_index]

In [7]:
num_anchor = 9

In [8]:
box_std = keep_box_wh.std(0)
norm_box_wh = keep_box_wh / box_std
k, dist = kmeans(norm_box_wh, num_anchor, iter=30)
new_anchor_box = k * box_std

In [9]:
new_anchor_box[new_anchor_box.prod(1).argsort()]  # 用面积，从小到大排序

array([[ 42.52800723,  47.46115563],
       [ 88.5391382 , 104.61091678],
       [108.9610624 , 194.9371826 ],
       [246.68617722, 163.99330774],
       [169.62334094, 327.29754167],
       [488.48975246, 211.57796438],
       [337.63754851, 349.29748   ],
       [337.12914551, 529.32068074],
       [566.53491179, 393.41287146]])

In [10]:
std_image_box_width_height = torch.tensor(std_image_box_width_height, dtype=torch.float32)
new_anchor_box = torch.tensor(new_anchor_box, dtype=torch.float32)

# 2.考虑1个box被安排到哪个anchor下（总共9个anchor，3个level * 每个level3个形态）
### 条件是，max(anchor宽度 / 目标宽度, 目标宽度 / anchor宽度，anchor高度 / 目标高度, 目标高度 / anchor高度) < anchor_t，这里 anchor_t = 4

In [11]:
# 如果anchor_size = 10，obj_size = 2
anchor_size = 10
obj_size = 2
max(anchor_size / obj_size, obj_size / anchor_size)

5.0

In [12]:
# 如果anchor_size = 3，obj_size = 2
anchor_size = 3
obj_size = 2
max(anchor_size / obj_size, obj_size / anchor_size)

1.5

* 上面的10和2差距太大，不能安排。而下面的3和2接近可以安排

## 得到的结果是 
- 10和2得到的结果是5.0，5.0 < anchor_t不满足
- 3和2得到的结果是1.5，1.5 < anchor_t满足

# 过度到宽高考虑就是如下 

In [13]:
anchor_width = 100
anchor_height = 100

In [14]:
obj_width = 388
obj_height = 80

In [15]:
aw_ow = anchor_width / obj_width
ow_aw = obj_width / anchor_width
ah_oh = anchor_height / obj_height
oh_ah = obj_height / anchor_height
max(aw_ow, ow_aw, ah_oh, oh_ah)

3.88

# 3.转换表达式为min(anchor宽度 / 目标宽度, 目标宽度 / anchor宽度，anchor高度 / 目标高度, 目标高度 / anchor高度) > 1/anchor_t，这里 anchor_t = 4

In [16]:
anchor_t = 1 / 4
anchor_t

0.25

In [17]:
# 如果anchor_size = 10，obj_size = 2
anchor_size = 10
obj_size = 2
min(anchor_size / obj_size, obj_size / anchor_size)

0.2

In [18]:
# 如果anchor_size = 3，obj_size = 2
anchor_size = 3
obj_size = 2
min(anchor_size / obj_size, obj_size / anchor_size)

0.6666666666666666

## 得到的结果是一样的 
- 10和2得到的结果是0.2，0.2 > 1/anchor_t不满足
- 3和2得到的结果是0.666，0.666 > 1/anchor_t满足

# 4.计算宽宽比、高高比

In [19]:
# std_image_box_width_height is N x 2   (width, height)
# new_anchor_box             is 9 x 2   (width, height)

# std_image_box_width_height[:, None]  则是把维度变为：  N x 1 x 2
# new_anchor_box[None]                 则是把维度变为：  1 x 9 x 2

# 利用广播机制，得到的ratio是N x 9 x 2，即N个box和9个anchor的一一比值，这里是box宽度 / anchor宽度，box高度 / anchor高度
ratio = std_image_box_width_height[:, None] / new_anchor_box[None]

# ratio和其倒数取最大值，结果是 N x 9 x 2，这里反应的是，max(box宽度 / anchor宽度, anchor宽度 / box宽度)，max(box高度 / anchor高度, anchor高度 / box高度)
ratio_invermax = torch.max(ratio, 1. / ratio)

# ratio_invermax.max(2)，是在 2 这个维度得到最大值，即 宽度比值 和 高度比值 之间取最大
# 由于max函数返回的是tuple(values, indices)，我们取[0]得到values，是N x 9
aooa_ratio = ratio_invermax.max(2)[0]
aooa_ratio.shape

torch.Size([15662, 9])

# 5.计算bpr，即最佳的可能召回率(Best Possible Recall)，也就是说，每个框至少能够匹配到1个anchor，那么能够满足这个条件的框占总数的比例是多少呢？，这就是BPR

In [20]:
# 获取1维度的min，是对9个anchor的维度，取这个box对于每个anchor比例差距最小的那个，如果比例差距最小的还是超过了阈值4倍，就表示这个框无法与任何anchor进行匹配
# 得到的结果是 N，
min_ratio = aooa_ratio.min(1)[0]

# 判断每一个框，他与anchor的最小比例是否满足阈值要求，如果不满足就表示有框匹配不上
anchor_t = 4
box_matched_flag = min_ratio < anchor_t

# 对于匹配的结果，直接取mean，相当于 sum(box_matched_flag) / len(box_matched_flag)，就是取比例
# 得到的就是bpr，也就是最好的可能召回率
best_possible_recall = box_matched_flag.float().mean()
best_possible_recall

tensor(0.9968)

### 把代码实现的时候倒过来，结果一样的

In [21]:
ratio = std_image_box_width_height[:, None] / new_anchor_box[None]
ratio_invermax = torch.min(ratio, 1. / ratio)
aooa_ratio = ratio_invermax.min(2)[0]  # ratio metric
min_ratio = aooa_ratio.max(1)[0]  # x, best_x
box_matched_flag = min_ratio > (1 / anchor_t)
best_possible_recall = box_matched_flag.float().mean()
best_possible_recall

tensor(0.9968)

# 6.代码拼接起来

In [22]:
ratio = std_image_box_width_height[:, None] / new_anchor_box[None]
ratio_invermax = torch.min(ratio, 1. / ratio)
aooa_ratio = ratio_invermax.min(2)[0]
min_ratio = aooa_ratio.max(1)[0]

anchor_t = 1 / 4
box_matched_flag = min_ratio > anchor_t

# 最佳的可能召回率
best_possible_recall = box_matched_flag.float().mean()

# 平均每个box能够匹配到几个anchor
average_num_meached_anchor = (aooa_ratio > anchor_t).float().mean() * num_anchor

# 适应度，该适应度可以用于进行遗传算法迭代，选择最合适的结果
fitness = (min_ratio * (min_ratio > anchor_t).float()).mean()

best_possible_recall, average_num_meached_anchor, fitness

(tensor(0.9968), tensor(5.3932), tensor(0.7146))

# 7.选择最优anchor，遗传算法迭代

In [23]:
def fitness(anchor, std_image_box_width_height):
    ratio = std_image_box_width_height[:, None] / anchor[None]
    ratio_invermax = torch.min(ratio, 1. / ratio)
    aooa_ratio = ratio_invermax.min(2)[0]
    min_ratio = aooa_ratio.max(1)[0]

    anchor_t = 1 / 4
    box_matched_flag = min_ratio > anchor_t
    
    # 适应度，该适应度可以用于进行遗传算法迭代，选择最合适的结果
    return (min_ratio * (min_ratio > anchor_t).float()).mean()

def best_possible_recall(anchor, std_image_box_width_height):
    ratio = std_image_box_width_height[:, None] / anchor[None]
    ratio_invermax = torch.min(ratio, 1. / ratio)
    aooa_ratio = ratio_invermax.min(2)[0]
    min_ratio = aooa_ratio.max(1)[0]

    anchor_t = 1 / 4
    box_matched_flag = min_ratio > anchor_t
    return box_matched_flag.float().mean()

num_gen = 1000
anchor_shape = new_anchor_box.shape
current_fitness = fitness(new_anchor_box, std_image_box_width_height)
current_bpr = best_possible_recall(new_anchor_box, std_image_box_width_height)
print(f"current_fitness = {current_fitness:.4f}, current_bpr = {current_bpr:.4f}", flush=True)

pbar = tqdm(range(num_gen), desc="Evolving anchors with Genetic Algorithm")
for _ in pbar:
    
    v = np.ones(anchor_shape)
    while (v == 1).all():  # mutate until a change occurs (prevent duplicates)，变异，直到发生变换，避免重复
        v = ((np.random.random(anchor_shape) < 0.9) * np.random.random() * np.random.randn(*anchor_shape) * 0.1 + 1).clip(0.3, 3.0)
    
    # anchor不能小于2
    mutate_anchor = (new_anchor_box * v).clamp(min=2.0)
    mutate_fitness = fitness(mutate_anchor, std_image_box_width_height)
    pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {mutate_fitness:.4f} / {current_fitness:.4f}'
    
    if mutate_fitness > current_fitness:
        new_anchor_box = mutate_anchor
        current_fitness = mutate_fitness
        
best_fitness_bpr = best_possible_recall(new_anchor_box, std_image_box_width_height)
print(f"best_fitness: {current_fitness:.4f}, best_fitness_bpr = {best_fitness_bpr:.4f}", flush=True)

# 最后得到的anhcor
new_anchor_box[new_anchor_box.prod(1).argsort()]

current_fitness = 0.7146, current_bpr = 0.9968


Evolving anchors with Genetic Algorithm: fitness = 0.7383 / 0.7402: 100%|██████████| 1000/1000 [00:01<00:00, 655.20it/s]

best_fitness: 0.7402, best_fitness_bpr = 0.9994





tensor([[ 26.6491,  41.3163],
        [ 66.5363,  49.8376],
        [ 51.0324, 115.8925],
        [115.2030, 101.9664],
        [105.6621, 202.2551],
        [272.9572, 173.8757],
        [174.5792, 280.9813],
        [294.5808, 375.5057],
        [502.1443, 370.8934]], dtype=torch.float64)

# 8.YoloV5的anchor选择规则
- 测量anchor的BPR，如果小于0.99，则会调用kmean_anchors重新计算，对应代码如下：
- 如果计算得到的新的bpr大于旧的anchor，则替换anchor，使用新的
- 如果计算得到的新的bpr小于等于旧的anchor，则继续使用旧的提供的anchor不做更新
```Python
bpr = metric(m.anchor_grid.clone().cpu().view(-1, 2))
print('Best Possible Recall (BPR) = %.4f' % bpr, end='')
if bpr < 0.99:  # threshold to recompute
    print('. Attempting to generate improved anchors, please wait...' % bpr)
    na = m.anchor_grid.numel() // 2  # number of anchors
    new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
    new_bpr = metric(new_anchors.reshape(-1, 2))
    if new_bpr > bpr:  # replace anchors
        new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
        m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid)  # for inference
        m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
        check_anchor_order(m)
        print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
    else:
        print('Original anchors better than new anchors. Proceeding with original anchors.')
```

# 9.YoloV5的默认anchor定义为：
```Yaml
[10,13, 16,30, 33,23]  # P3/8
[30,61, 62,45, 59,119]  # P4/16
[116,90, 156,198, 373,326]  # P5/32
```