In [23]:
import sys
import os
import numpy as np
import math
import cv2 as cv
from sklearn.cluster import KMeans
import time

# 读取图像
filename = 'messi5.jpg'
img = cv.imread(filename)

# 准备掩码(H,W)和输出(H,W,3)
mask = np.zeros(img.shape[:2], dtype=np.uint8)
output = np.zeros(img.shape, np.uint8)

# 准备矩形框
rect = (69, 59, 389, 277)  # 左下角坐标+宽和高

In [24]:

BG = 0  # 背景
FG = 1  # 前景
PR_BG = 2  # 未确定背景
PR_FG = 3  # 未确定前景
gamma = 30  # 平滑度项前面的系数

def bg_mask(mask):
    return np.where(np.logical_or(mask == BG, mask == PR_BG))  # 找mask中为0/2的坐标，即背景和未确定背景

def fg_mask(mask):
    return np.where(np.logical_or(mask == FG, mask == PR_FG))  # 找mask中为1/3的坐标，即背景和未确定背景


In [25]:
# 输入三元像素集，映射成哈希值
def rgb2hash(X, clrTmp, w):

    idx_img3f_0 = (X[:,0] * clrTmp[0]).astype(np.int32) * w[0]
    idx_img3f_1 = (X[:,1] * clrTmp[1]).astype(np.int32) * w[1]
    idx_img3f_2 = (X[:,2] * clrTmp[2]).astype(np.int32) * w[2]
    idx1i = idx_img3f_0 + idx_img3f_1 + idx_img3f_2
    return idx1i


# 输入哈希值，映射回三元像素集
def hash2rgb(num_values, w):
    # 将idx1i映射回RGB值
    color3i_init0 = (num_values / w[0]).astype(np.int32)
    color3i_init1 = (num_values % w[0]/w[1]).astype(np.int32)
    color3i_init2 = (num_values % w[1]).astype(np.int32)
    color3i = np.array([color3i_init0,color3i_init1,color3i_init2]).T  # (630, 3),按频率从小到大
    return color3i

In [26]:
t_build, t_model, t_cut = 0, 0, 0

# 将图像转成矩阵
img = np.asarray(img, dtype=np.float64)  # shape:(342, 548, 3)=(H,W,3)
rows, cols, _ = img.shape  # 图片行列数
if rect is not None:  # 矩形框非空，将掩码的矩形框范围内的值置为PR_FG（未确定前景）
    mask[rect[1]:rect[1] + rect[3], rect[0]:rect[0] + rect[2]] = PR_FG

bgd_indexes, fgd_indexes = bg_mask(mask), fg_mask(mask)
print('可能的背景数量为: %d, 可能的前景数量为: %d' % (bgd_indexes[0].size, fgd_indexes[0].size))

X=img[bgd_indexes]
ratio=0.95
colorRanks=(12,12,12)
X=X/255  # (79663, 3)
N=len(X)
# 将归一化矩阵缩的范围，减一个微小值为了最终阶数符合colorRanks（因为有0~colorRanks有colorRanks+1阶）
clrTmp = [colorRanks[0] - 0.0001, colorRanks[1] - 0.0001, colorRanks[2] - 0.0001]  # [11.9999, 11.9999, 11.9999]
# 哈希映射权值
w = [colorRanks[1] * colorRanks[2], colorRanks[2], 1]  # # [144, 12, 1]
hash_value = rgb2hash(X,clrTmp,w)  # 将像素集的每个数据映射成哈希值(79663,)
print('hash_value:',hash_value.shape,hash_value)

freq = np.bincount(hash_value)  # 拉平并统计，返回长为1728的列表，第i位表示数值i出现的次数。由于像素单一这里形状为(1584,)

sort_freq = np.sort(freq)  # 哈希值从小到大排序;(1584,) 17619 0，前面有大量的0，代表这些哈希值没有出现
sort_pos = np.argsort(freq)  # 排序后每个值对应原来位置的坐标;(1584,) 17619 0
print('sort_freq:',sort_freq.shape,sort_freq)
print('sort_pos:',sort_pos.shape,sort_pos)

可能的背景数量为: 79663, 可能的前景数量为: 107753
hash_value: (79663,) [170 170 314 ... 352 352 352]
sort_freq: (1584,) [    0     0     0 ...  6324  6775 10071]
sort_pos: (1584,) [ 791 1450 1451 ...  314  364  157]


In [27]:
# 垂直堆叠两个数组，第一行映射值，第二行原坐标;(2, 1584) 17619 0
sort_freq_pos = np.vstack((sort_freq, sort_pos))
print('sort_freq_pos:',sort_freq_pos.shape)
# 删去频率为0的列;(2, 1, 186) 17619 0,即去掉没出现的值后还剩186个映射值
sort_freq_pos = sort_freq_pos[:, np.nonzero(sort_freq)]
num = np.swapaxes(sort_freq_pos, 0, 1)[0]  # 去掉多余维度;(2, 186)
print('num:',num.shape,'第一行是出现频率，第二行是该映射值原来在freq的坐标')
print(num[:,:20])
len_num = maxNum = len(num[0])  # 有效映射值总数;186
print(f'映射后有{len_num}个有效值')



sort_freq_pos: (2, 1584)
num: (2, 186) 第一行是出现频率，第二行是该映射值原来在freq的坐标
[[   1    1    1    1    1    1    1    1    1    1    1    1    1    1
     2    2    2    2    2    2]
 [ 185 1097  223  836   51  342  316   63  993 1125  679  677 1294 1280
  1425 1583  444  533  600  630]]
映射后有186个有效值


In [28]:
maxDropNum = int(np.round(len(X) * (1 - ratio)))  # N*5%，即最多删除maxDropNum个元素
print(f'最多删除{maxDropNum}个像素')
accumulate = np.add.accumulate(num[0])  # 对num[0](频数)求前缀和
cut_pos = np.argwhere(accumulate >= maxDropNum)[0][0]  # 找到前缀和>maxDropNum的第一个位置
print(f'在num中的第{cut_pos}位划分，前面的为5%，后面的为95%')

maxNum = maxNum - cut_pos  # 有效值总数-5%分位数=剩下使用映射值数量
num_values = num[1][::-1]  # 按出现频次从高到低排序，存对应值本来的坐标
print(len(num_values))
print(num_values)

# 最终约简后的元素不能超过256个，不能少于10个
maxNum = 256 if maxNum > 256 else maxNum
if maxNum <= 10:
    maxNum = 10 if len(num) > 10 else len(num)

print(f'最终保留{maxNum}个有效值')

最多删除3983个像素
在num中的第116位划分，前面的为5%，后面的为95%
186
[ 157  364  314    0  471  156  472  144  315  220  301  628  313  784
  327  458  785  627  615  772  470  941  351  485  929  484  521  158
  508  376 1098  302  170  629  942  207  378 1086  363  459  366  379
  210 1243  219  328  326 1099  169 1399  353  640  209  377  773  496
  339  352  928 1582  340  954  391  797  365  497  473 1544 1438 1255
  771 1570 1387  666  145  483 1085 1426  303  616  221  522  196 1242
  665  510  641  509 1111  338   12  642  341  810  520  601  678  786
  182  457  232  183  194  354   13  197 1400 1230 1110  798  835  222
  652  955 1281 1112  653  367  498  980  823  171    1 1073  208  822
  992  195 1267 1124  614  809  745  953 1543  495  300  654  967  184
  979  966 1293 1136 1137  916  445  602  930  181  329  691 1254 1411
 1123  534  456  198  796  159  848  535 1268  523 1412 1149  630  600
  533  444 1583 1425 1280 1294  677  679 1125  993   63  316  342   51
  836  223 1097  185]
最终保留70个有效值

In [29]:
# 将idx1i映射回RGB值，从大到小排序;(186, 3)
color3i = hash2rgb(num_values, w)
print(f'按频率从大到小地排序RGB像素，总共有{len(color3i)}种像素，后{cut_pos}种像素要用临近像素替代，留下{maxNum}个有效值',color3i.shape)
print(color3i[:5])

按频率从大到小地排序RGB像素，总共有186种像素，后116种像素要用临近像素替代，留下70个有效值 (186, 3)
[[1 1 1]
 [2 6 4]
 [2 2 2]
 [0 0 0]
 [3 3 3]]


In [30]:
# =========================计算像素距离==============================
zero2maxNum = color3i[:maxNum]  # 前5%种像素
maxNum2len_Num = color3i[maxNum:]  # 后95%种像素
# 距离数组，行代表5%种像素，列代表后95%种像素，遍历计算5%种像素与95%种像素的最近距离
temp_matrix = np.zeros((len_num - maxNum, maxNum), dtype=np.int32)

for i, single in enumerate(maxNum2len_Num):  # 5%的像素中的第i中与所有95%的像素求距离
    temp_matrix[i] = np.sum(np.square(single - zero2maxNum), axis=1)

arg_min = np.argmin(temp_matrix, axis=1)  # 找距离最小值像素代替原值，第i位代表第i个像素距离最近的是第arg_min[i]个像素;(116,)
replaceable_colors = num_values[arg_min]  # 通过索引获取5%的颜色值中距离95%的颜色值最近的颜色值;(116,)

# 有效值：对应坐标
pallet = dict(zip(num_values[:maxNum], range(maxNum)))
print(len(pallet))
print(pallet)
# 遍历待舍弃值和它的代替像素，添加到pallet中
for num_value, index_dist in zip(num_values[maxNum:], replaceable_colors):
    pallet[num_value] = pallet[index_dist]  # # 舍弃值：代替像素的坐标
print(len(pallet))
print(pallet)

70
{157: 0, 364: 1, 314: 2, 0: 3, 471: 4, 156: 5, 472: 6, 144: 7, 315: 8, 220: 9, 301: 10, 628: 11, 313: 12, 784: 13, 327: 14, 458: 15, 785: 16, 627: 17, 615: 18, 772: 19, 470: 20, 941: 21, 351: 22, 485: 23, 929: 24, 484: 25, 521: 26, 158: 27, 508: 28, 376: 29, 1098: 30, 302: 31, 170: 32, 629: 33, 942: 34, 207: 35, 378: 36, 1086: 37, 363: 38, 459: 39, 366: 40, 379: 41, 210: 42, 1243: 43, 219: 44, 328: 45, 326: 46, 1099: 47, 169: 48, 1399: 49, 353: 50, 640: 51, 209: 52, 377: 53, 773: 54, 496: 55, 339: 56, 352: 57, 928: 58, 1582: 59, 340: 60, 954: 61, 391: 62, 797: 63, 365: 64, 497: 65, 473: 66, 1544: 67, 1438: 68, 1255: 69}
186
{157: 0, 364: 1, 314: 2, 0: 3, 471: 4, 156: 5, 472: 6, 144: 7, 315: 8, 220: 9, 301: 10, 628: 11, 313: 12, 784: 13, 327: 14, 458: 15, 785: 16, 627: 17, 615: 18, 772: 19, 470: 20, 941: 21, 351: 22, 485: 23, 929: 24, 484: 25, 521: 26, 158: 27, 508: 28, 376: 29, 1098: 30, 302: 31, 170: 32, 629: 33, 942: 34, 207: 35, 378: 36, 1086: 37, 363: 38, 459: 39, 366: 40, 379: 

In [14]:
tmp = hash_value.copy()  # (79663,)
print(tmp.shape)
idx1i_0 = np.zeros(len(X), dtype=np.int32)
# 遍历每个映射值，第i个映射值
for i, v in enumerate(tmp):
    idx1i_0[i] = pallet[v]  # 映射值v的坐标
    
print(len(idx1i_0),idx1i_0)

idx1i = idx1i_0  # 每一个像素对应的映射值的坐标

color3f = np.zeros((1, maxNum, 3), np.float32) # (1, 70, 3)
colorNum = np.zeros((1, maxNum), np.int32)  # (1, 70)

(79663,)
79663 [32 32  2 ... 57 57 57]


In [15]:
np.add.at(color3f[0], idx1i, X)
np.add.at(colorNum[0], idx1i, 1)
colorNum_reshape = colorNum.T
color3f[0] /= colorNum_reshape
print(colorNum)

[[10220  6775  6324  4841  4116  2970  2945  2780  2392  2380  1797  1664
   1611  1422  1402  1425  1528  1221  1124  1041   913  1006   881   926
    731   631  1139   605   593   550   663   525   549   472   497   496
    421   417   349   347   532   298   281   361   232   251   301   418
    218   269   203   202   242   168   157   154   154   153   152   259
    245   314   154   203   140   168   130   128   279   208]]


In [16]:
colorNum=colorNum/N
print(color3f)
print(colorNum)
# return color3f.shape[1], idx1i, color3f, colorNum

[[[0.13236947 0.12336183 0.11596915]
  [0.19881329 0.54532206 0.36935374]
  [0.20828475 0.20245281 0.2048366 ]
  [0.0652034  0.05642384 0.04372073]
  [0.28622833 0.2851117  0.29515782]
  [0.10855896 0.09450513 0.07464356]
  [0.288479   0.308955   0.36749998]
  [0.09221069 0.07691822 0.0621829 ]
  [0.21704797 0.21699256 0.2699992 ]
  [0.14962277 0.548605   0.36480087]
  [0.17989008 0.15455277 0.13715093]
  [0.38829187 0.37511355 0.36513025]
  [0.20470731 0.18064183 0.15281548]
  [0.45844743 0.44105428 0.39694652]
  [0.22830881 0.26607084 0.29479468]
  [0.27312008 0.23154768 0.2046055 ]
  [0.45991105 0.45695964 0.44517884]
  [0.3848783  0.3547138  0.3186521 ]
  [0.3550895  0.31780738 0.29319245]
  [0.44123393 0.3971434  0.36956066]
  [0.29080892 0.26476717 0.23251696]
  [0.5441987  0.52750266 0.4759585 ]
  [0.18667583 0.4750734  0.31157836]
  [0.30514842 0.3662672  0.44203043]
  [0.52641475 0.47863308 0.44466394]
  [0.30146927 0.35376602 0.38917127]
  [0.30421212 0.61280686 0.46922445]
 