# import

In [None]:
import pandas as pd

In [None]:
import matplotlib.pyplot as plt

In [None]:
# export
from matplotlib import patches

In [None]:
from matplotlib.colors import cnames

In [None]:
import re

In [None]:
import numpy as np

In [None]:
import math

In [None]:
from IPython.core import debugger as idb

# config

In [None]:
bbox_csv_fn = './data/src_img/allboxes.csv'

# bbox height and width statistics

## functions

In [None]:
pat_num = re.compile(r'\d+')
def total_bboxs_in_df(df,bbox_col:str):
    
    def str2list(string):
        res = pat_num.findall(string)
        res = list(map(np.long, res))
        res = np.array(res).reshape(-1,4)
        res = res.tolist()
        return res
    
    total_bboxes = []
    for string in df[bbox_col]:
        total_bboxes += str2list(string)
    
    return total_bboxes

In [None]:
def bboxs2hws(bboxs:np.ndarray):
    hws = np.zeros((bboxs.shape[0],2))
    hws[:,0] = bboxs[:,2] - bboxs[:,0]
    hws[:,1] = bboxs[:,3] - bboxs[:,1]
    
    return hws

In [None]:
# export
def draw_rect(ax, x,y,w,h, color='white',lw=2):    
    patch = ax.add_patch(patches.Rectangle((x,y), w, h, fill=False, edgecolor=color, lw=lw))
    return patch

## process

### 加载数据

In [None]:
# 读入csv
df = pd.read_csv(bbox_csv_fn,index_col=0)
df.head()

In [None]:
# 获取所有bbox数据，转换为np.ndarray格式
bboxes = total_bboxs_in_df(df,'box')
bboxes = np.array(bboxes)
bboxes.shape

### 观察高和宽的分布

In [None]:
# 提取bbox的高和宽
hws = bboxs2hws(bboxes)
hws

In [None]:
# 获取h,w的最大、最小值
h_min,w_min = hws.min(0)
h_max,w_max = hws.max(0)
(h_min,h_max),(w_min,w_max)

In [None]:
# 将h在[h_min,h_max]范围内等比划分
hN = 4 # 等比划分为4段
hs = np.logspace(math.log10(h_min),math.log10(h_max),2*hN+1)
hs

In [None]:
# 将w在[w_min,w_max]范围内等比划分
wN = 4 # 等比划分为4段
ws = np.logspace(math.log10(w_min),math.log10(w_max),2*wN+1)
ws

In [None]:
# 在h和w方向上，方格中心与边界的倍率范围
print(f'h:+-{pow(h_max/h_min,1/(2*hN))}')
print(f'w:+-{pow(w_max/w_min,1/(2*wN))}')

In [None]:
# 绘制：(h,w)点，包围矩形，分方格，方格中心

plt.figure(figsize=(6,6))

# 绘制所有(h,w)点
plt.scatter(hws[:,0],hws[:,1],linewidths=1,);

# 绘制包围所有(h,w)点的最小矩形
plt.plot([h_min,h_min],[w_min,w_max],'r')
plt.plot([h_max,h_max],[w_min,w_max],'r')
plt.plot([h_min,h_max],[w_min,w_min],'r')
plt.plot([h_min,h_max],[w_max,w_max],'r')

# 将上述矩形划分为 hN*wH 个方格
for h in hs[2:-1:2]:
    plt.plot([h,h],[w_min,w_max],'r')
for w in ws[2:-1:2]:
    plt.plot([h_min,h_max],[w,w],'r')

# 绘制每个方格的中心
for h in hs[1:-1:2]:
    for w in ws[1:-1:2]:
        plt.scatter([h],[w],c='r')
        
plt.xscale('log')
plt.yscale('log')

### 用户指定方格

In [None]:
# 指定你选定哪些方格
grids = [[0,0],[0,1],[1,0],[1,1],[1,2],[2,1],[2,2],[2,3],[3,2],[3,3]]

In [None]:
# 绘制：用户选定方格的中心

plt.figure(figsize=(6,6))

# 绘制所有(h,w)点
plt.scatter(hws[:,0],hws[:,1],linewidths=1,);

# 绘制包围所有(h,w)点的最小矩形
plt.plot([h_min,h_min],[w_min,w_max],'r')
plt.plot([h_max,h_max],[w_min,w_max],'r')
plt.plot([h_min,h_max],[w_min,w_min],'r')
plt.plot([h_min,h_max],[w_max,w_max],'r')

# 将上述矩形划分为 hN*wH 个方格
for h in hs[2:-1:2]:
    plt.plot([h,h],[w_min,w_max],'r')
for w in ws[2:-1:2]:
    plt.plot([h_min,h_max],[w,w],'r')

# 绘制选定的方格的中心
for c in grids:
    h = hs[1:-1:2][c[0]]
    w = ws[1:-1:2][c[1]]
    plt.scatter([h],[w],c='r')

plt.xscale('log')
plt.yscale('log')

In [None]:
# 打印用户选定方格的(h,w)
slt_hws = [] # SeLecTed Heights and WidthS
for i,g in zip(range(len(grids)),grids):
    h = hs[1:-1:2][g[0]]
    w = ws[1:-1:2][g[1]]
    print(f'{i}: {h,w}')
    slt_hws += [[h,w]]
slt_hws = np.array(slt_hws)

In [None]:
# 绘制用户选定的h,w
_,ax = plt.subplots(1,1,figsize=(10,10))

cns = list(cnames.keys()) # Color NameS
for i in range(slt_hws.shape[0]):
    h,w = slt_hws[i,:]
    rcx,rcy = (np.random.random(2)-0.5)/5 + 1 # 对矩形位置加一定的随机偏移，防止重叠在一起难以观察
    tmp = draw_rect(ax, -w/2*rcx,-h/2*rcy,w,h, color=cns[25+i],lw=3) # cns[#+i]中的#是为了从cns中选一组辨认度高的颜色，你大可换其值
    tmp.set_label(f'{i}:{int(w),int(h)}')

hmax,wmax = slt_hws.max(0)
plt.xlim(-wmax/2*1.5,wmax/2*1.5)
plt.ylim(-hmax/2*1.3,hmax/2*1.3)
ax.legend();

# export

In [None]:
!python ../../notebook2script.py --fname 'bbox_hw_statistics.ipynb' --outputDir '../exp/'