## Poisson Image Editing
more information, please visit: https://github.com/PPPW/poisson-image-editing/blob/master/poisson_image_editing.ipynb

In [21]:
import numpy as np
import cv2
import scipy.sparse
from scipy.sparse.linalg import spsolve
from os import path
import os
import json
from tqdm import tqdm
import random
import matplotlib.pyplot as plt
import copy

### 1. 目标图片库targets的准备：
在准备过程中，做出以下限制：
- 只对给定的宽高的图像img_wh进行（图片太大容易内存溢出，图片太小一般都存在密集目标，不适宜增添）。
- 单图标注数量不超过4（超过可能会影响放置粘贴目标）。

In [17]:
img_wh = (720,405)
ann_upper_limit = 4

with open('seacoco/train_new.json','r') as f:
    ann = json.load(f)

target_imns = []
img_count = 0
target_dict = {'images':[], 'annotations':[], 'categories':ann['categories']}
for ann_img in tqdm(ann['images']):
    if (ann_img['width'], ann_img['height']) == img_wh:
        ann_count = 0
        target_ann = []
        for ann_ann in ann['annotations']:
            if ann_ann['image_id'] == ann_img['id']:
                ann_count += 1
                target_ann.append(ann_ann)
        if ann_count <= ann_upper_limit:
            img_count += 1
            target_imns.append(ann_img['file_name'])
            img = cv2.imread('seacoco/train/' + ann_img['file_name'])
            cv2.imwrite('seacoco/pb_data/targets/' + ann_img['file_name'], img)
            target_dict['images'].append(ann_img)
            target_dict['annotations'].append(target_ann)

print("The number of target images: ", len(target_imns))

100%|██████████| 4619/4619 [00:19<00:00, 238.33it/s]  

955





In [21]:
with open('seacoco/pb_data/target_dict.json', 'w') as f1:
    json.dump(target_dict, f1)

In [22]:
target_dict['categories']

[{'supercategory': 'holothurian', 'id': 1, 'name': 'holothurian'},
 {'supercategory': 'echinus', 'id': 2, 'name': 'echinus'},
 {'supercategory': 'scallop', 'id': 3, 'name': 'scallop'},
 {'supercategory': 'starfish', 'id': 4, 'name': 'starfish'}]

### 2. 来源图片库sources和掩图masks的准备：
在准备过程中，做出以下限制：  
- 只对目标库给定的宽高的图像img_wh进行（这样图片背景差异不会太大，避免融合突兀）。
- 只取给定的目标类别（这里取海参，因为海参检测效果最差）。
- 若一张图有多个海参，只取一个海参制作mask图。

In [80]:
img_wh = (720,405)
category_id = 1    
w, h = img_wh

with open('seacoco/train_new.json','r') as f:
    ann = json.load(f)

def get_mask(bbox, w, h):
    """draw and save mask images"""
    release_scale = 5
    x1 = int(bbox[0] - release_scale)
    y1 = int(bbox[1] - release_scale)
    x2 = int(bbox[0] + bbox[2] + release_scale)
    y2 = int(bbox[1] + bbox[3] + release_scale)
    mask = np.zeros([h, w], dtype=np.uint8)
    mask[y1:y2, x1:x2] = 255
    mask_loc = (x1, y1, bbox[2], bbox[3])
    return mask, mask_loc
    
mask_loc_dict = {}    
cat_count = 0
for ann_img in tqdm(ann['images']):
    if (ann_img['width'], ann_img['height']) == img_wh:
        for ann_ann in ann['annotations']:
            if ann_ann['category_id']== category_id and ann_ann['image_id'] == ann_img['id']:
                cat_count += 1
                mask, mask_loc = get_mask(ann_ann['bbox'], w, h)
                cv2.imwrite('seacoco/pb_data/masks/' + ann_img['file_name'], mask)
                cv2.imwrite('seacoco/pb_data/sources/' + ann_img['file_name'], cv2.imread('seacoco/train/' + ann_img['file_name']))
                mask_loc_dict[ann_img['file_name']] = mask_loc
                break

print("The number of source images with holothurian: ", cat_count)
with open('seacoco/pb_data/mask_loc_dict.json', 'w') as f2:
    json.dump(mask_loc_dict, f2)

100%|██████████| 4619/4619 [00:17<00:00, 261.48it/s] 

The number of source images with holothurian:  1309





### 3. 泊松融合：

#### 生成泊松系数矩阵：

In [2]:
def laplacian_matrix(n, m):
    """Generate the Poisson matrix. 

    Refer to: 
    https://en.wikipedia.org/wiki/Discrete_Poisson_equation

    Note: it's the transpose of the wiki's matrix 
    """
    mat_D = scipy.sparse.lil_matrix((m, m))
    mat_D.setdiag(-1, -1)
    mat_D.setdiag(4)
    mat_D.setdiag(-1, 1)
        
    mat_A = scipy.sparse.block_diag([mat_D] * n).tolil()
    
    mat_A.setdiag(-1, 1*m)
    mat_A.setdiag(-1, -1*m)
    
    return mat_A

#### 定义泊松融合方程：

In [20]:
def poisson_edit(source, target, mask, offset):
    """The poisson blending function. 

    Refer to: 
    Perez et. al., "Poisson Image Editing", 2003.
    """

    # Assume: 
    # target is not smaller than source.
    # shape of mask is same as shape of target.
    y_max, x_max = target.shape[:-1]
    y_min, x_min = 0, 0

    x_range = x_max - x_min
    y_range = y_max - y_min
        
    M = np.float32([[1,0,offset[0]],[0,1,offset[1]]])
    source = cv2.warpAffine(source,M,(x_range,y_range))
  
    mask = mask[y_min:y_max, x_min:x_max]    
    mask[mask != 0] = 1
    
    mat_A = laplacian_matrix(y_range, x_range)

    # for \Delta g
    laplacian = mat_A.tocsc()

    # set the region outside the mask to identity    
    for y in range(1, y_range - 1):
        for x in range(1, x_range - 1):
            if mask[y, x] == 0:
                k = x + y * x_range
                mat_A[k, k] = 1
                mat_A[k, k + 1] = 0
                mat_A[k, k - 1] = 0
                mat_A[k, k + x_range] = 0
                mat_A[k, k - x_range] = 0

    mat_A = mat_A.tocsc()

    mask_flat = mask.flatten()    
    for channel in range(source.shape[2]):
        source_flat = source[y_min:y_max, x_min:x_max, channel].flatten()
        target_flat = target[y_min:y_max, x_min:x_max, channel].flatten()        
        
        # inside the mask:
        # \Delta f = div v = \Delta g       
        alpha = 1
        mat_b = laplacian.dot(source_flat)*alpha

        # outside the mask:
        # f = t
        mat_b[mask_flat==0] = target_flat[mask_flat==0]
        
        x = spsolve(mat_A, mat_b)
        #print(x.shape)
        x = x.reshape((y_range, x_range))
        #print(x.shape)
        x[x > 255] = 255
        x[x < 0] = 0
        x = x.astype('uint8')

        target[y_min:y_max, x_min:x_max, channel] = x

    return target

#### 对于给定图片随机生成offset（用于mask的移动）：
- x和y轴的移动上下限分别宽松15和10个像素点（避免粘贴目标过于贴近图像边缘）。  

In [102]:
def get_random_offset(w, h, mask_loc_dict, img_name):
    '''get a random offset for the given image'''
    mask_x1, mask_y1 = mask_loc_dict[img_name][0], mask_loc_dict[img_name][1]
    mask_w, mask_h = mask_loc_dict[img_name][2], mask_loc_dict[img_name][3]
    offset1 = random.sample(range(-mask_x1 + 15, h - mask_x1 - 10), 1)
    offset2 = random.sample(range(-mask_y1 + 15, h - mask_y1 - 10), 1)
    mask_x1 = mask_x1 + offset1[0]
    mask_y1 = mask_y1 + offset2[0]
    return (offset1[0], offset2[0]), mask_x1, mask_y1, mask_w, mask_h

#### 正式进行泊松融合：
- 只对给定的宽高的图像img_wh进行（与第1和第2步的source, target和mask的准备保持一致。）
- 只对给定的类别进行（这里就是海参）。
- 每张source图片上的抠图目标将在随机抽取的aug_per_source个target图片上进行融合（这里一个抠图目标会贴在两张图上）。

In [103]:
sources_path = 'seacoco/pb_data/sources'
targets_path = 'seacoco/pb_data/targets'
masks_path = 'seacoco/pb_data/masks'

img_wh = (720,405) # the target bbox size
w, h = img_wh
category_id = 1 # the target augmented class
aug_per_source = 2 # how many target images will be used by per source image

source_imgs = os.listdir(sources_path)
target_imgs = os.listdir(targets_path)

with open('seacoco/pb_data/mask_loc_dict.json', 'r') as f3:
    mask_loc_dict = json.load(f3)

annotation = {'images':[], 'annotations':[]}
aug_count = 0
for source_img in tqdm(source_imgs[1:]):
    # load source image
    source = cv2.imread(path.join(sources_path, source_img))
    
    # random get aug_per_source target images as background
    temp = random.sample(target_imgs, aug_per_source)
    for target_img in temp:
        # load target image
        target = cv2.imread(path.join(targets_path, target_img))
        
        # load mask
        mask = cv2.imread(path.join(masks_path, source_img), cv2.IMREAD_GRAYSCALE)
        
        # get random offset and mask coordinate inforamtion after shiftting
        offset,mask_x1, mask_y1, mask_w, mask_h = get_random_offset(w, h, mask_loc_dict, source_img)
        M = np.float32([[1,0,offset[0]],[0,1,offset[1]]])
        mask = cv2.warpAffine(mask, M, (mask.shape[1], mask.shape[0]))
        
        # process poisson blending
        result = poisson_edit(source, target, mask, offset)
        
        # save augmentation images and annotations
        aug_count += 1
        fn = target_img.split('.')[0] + '_' + str(aug_count) + '.jpg'
        cv2.imwrite('seacoco/pb_data/aug_train/' + fn, result)
        result = cv2.rectangle(result, (mask_x1, mask_y1), (mask_x1 + mask_w, mask_y1 + mask_h), (255,0,0), 2)
        cv2.imwrite('seacoco/pb_data/aug_train_with_bbox/' + fn, result)
        annotation['images'].append({'file_name':fn,
                                     'width':h,
                                     'height':w,
                                     'id':aug_count})
        annotation['annotations'].append({'segmentation': [],
                                         'area': int(mask_w * mask_h),
                                         'iscrowd': 0,
                                         'image_id': aug_count,
                                         'bbox': [mask_x1, mask_y1, mask_w, mask_h],
                                         'category_id': category_id,
                                         'id': aug_count,
                                         'ignore': 0})      

with open('seacoco/pb_data/aug_train.json','w') as f4:
    json.dump(annotation, f4)

print('After augmentation:')
print('The number new image: %d' % len(annotations['images']))
print('The number new annotations(i.e., holothurian): %d' % len(annotations['annotations']))




  0%|          | 0/1308 [00:00<?, ?it/s][A[A[A


  0%|          | 1/1308 [00:12<4:27:05, 12.26s/it][A[A[A


  0%|          | 2/1308 [00:24<4:27:47, 12.30s/it][A[A[A


  0%|          | 3/1308 [00:37<4:28:34, 12.35s/it][A[A[A


  0%|          | 4/1308 [00:49<4:26:21, 12.26s/it][A[A[A


  0%|          | 5/1308 [01:01<4:26:48, 12.29s/it][A[A[A


  0%|          | 6/1308 [01:13<4:26:06, 12.26s/it][A[A[A


  1%|          | 7/1308 [01:24<4:19:01, 11.95s/it][A[A[A


  1%|          | 8/1308 [01:35<4:11:41, 11.62s/it][A[A[A


  1%|          | 9/1308 [01:46<4:08:05, 11.46s/it][A[A[A


  1%|          | 10/1308 [01:57<4:04:30, 11.30s/it][A[A[A


  1%|          | 11/1308 [02:08<4:02:22, 11.21s/it][A[A[A


  1%|          | 12/1308 [02:20<4:06:39, 11.42s/it][A[A[A


  1%|          | 13/1308 [02:33<4:12:34, 11.70s/it][A[A[A


  1%|          | 14/1308 [02:46<4:22:15, 12.16s/it][A[A[A


  1%|          | 15/1308 [02:57<4:16:50, 11.92s/it][A[A[A


  1%|    

 20%|█▉        | 257/1308 [48:11<3:14:41, 11.11s/it][A[A[A


 20%|█▉        | 258/1308 [48:22<3:13:44, 11.07s/it][A[A[A


 20%|█▉        | 259/1308 [48:34<3:14:02, 11.10s/it][A[A[A


 20%|█▉        | 260/1308 [48:45<3:14:12, 11.12s/it][A[A[A


 20%|█▉        | 261/1308 [48:56<3:15:53, 11.23s/it][A[A[A


 20%|██        | 262/1308 [49:08<3:17:00, 11.30s/it][A[A[A


 20%|██        | 263/1308 [49:19<3:17:21, 11.33s/it][A[A[A


 20%|██        | 264/1308 [49:31<3:18:17, 11.40s/it][A[A[A


 20%|██        | 265/1308 [49:42<3:18:12, 11.40s/it][A[A[A


 20%|██        | 266/1308 [49:53<3:16:48, 11.33s/it][A[A[A


 20%|██        | 267/1308 [50:05<3:17:28, 11.38s/it][A[A[A


 20%|██        | 268/1308 [50:16<3:18:07, 11.43s/it][A[A[A


 21%|██        | 269/1308 [50:28<3:18:04, 11.44s/it][A[A[A


 21%|██        | 270/1308 [50:39<3:17:07, 11.39s/it][A[A[A


 21%|██        | 271/1308 [50:51<3:17:57, 11.45s/it][A[A[A


 21%|██        | 272/1308 [51:02<3:17:26

 39%|███▉      | 507/1308 [1:35:47<2:29:04, 11.17s/it][A[A[A


 39%|███▉      | 508/1308 [1:35:59<2:29:09, 11.19s/it][A[A[A


 39%|███▉      | 509/1308 [1:36:10<2:30:07, 11.27s/it][A[A[A


 39%|███▉      | 510/1308 [1:36:21<2:29:31, 11.24s/it][A[A[A


 39%|███▉      | 511/1308 [1:36:32<2:29:00, 11.22s/it][A[A[A


 39%|███▉      | 512/1308 [1:36:44<2:29:44, 11.29s/it][A[A[A


 39%|███▉      | 513/1308 [1:36:55<2:29:09, 11.26s/it][A[A[A


 39%|███▉      | 514/1308 [1:37:06<2:28:28, 11.22s/it][A[A[A


 39%|███▉      | 515/1308 [1:37:17<2:27:55, 11.19s/it][A[A[A


 39%|███▉      | 516/1308 [1:37:28<2:27:10, 11.15s/it][A[A[A


 40%|███▉      | 517/1308 [1:37:40<2:28:02, 11.23s/it][A[A[A


 40%|███▉      | 518/1308 [1:37:51<2:27:24, 11.20s/it][A[A[A


 40%|███▉      | 519/1308 [1:38:02<2:27:22, 11.21s/it][A[A[A


 40%|███▉      | 520/1308 [1:38:13<2:26:53, 11.18s/it][A[A[A


 40%|███▉      | 521/1308 [1:38:24<2:26:21, 11.16s/it][A[A[A


 40%|███▉ 

 58%|█████▊    | 755/1308 [2:22:21<1:39:49, 10.83s/it][A[A[A


 58%|█████▊    | 756/1308 [2:22:32<1:39:31, 10.82s/it][A[A[A


 58%|█████▊    | 757/1308 [2:22:43<1:39:38, 10.85s/it][A[A[A


 58%|█████▊    | 758/1308 [2:22:54<1:39:15, 10.83s/it][A[A[A


 58%|█████▊    | 759/1308 [2:23:05<1:39:23, 10.86s/it][A[A[A


 58%|█████▊    | 760/1308 [2:23:15<1:39:11, 10.86s/it][A[A[A


 58%|█████▊    | 761/1308 [2:23:26<1:38:43, 10.83s/it][A[A[A


 58%|█████▊    | 762/1308 [2:23:37<1:38:31, 10.83s/it][A[A[A


 58%|█████▊    | 763/1308 [2:23:48<1:38:45, 10.87s/it][A[A[A


 58%|█████▊    | 764/1308 [2:23:59<1:38:06, 10.82s/it][A[A[A


 58%|█████▊    | 765/1308 [2:24:10<1:38:09, 10.85s/it][A[A[A


 59%|█████▊    | 766/1308 [2:24:21<1:38:32, 10.91s/it][A[A[A


 59%|█████▊    | 767/1308 [2:24:33<1:41:14, 11.23s/it][A[A[A


 59%|█████▊    | 768/1308 [2:24:44<1:42:24, 11.38s/it][A[A[A


 59%|█████▉    | 769/1308 [2:24:57<1:44:40, 11.65s/it][A[A[A


 59%|█████

 77%|███████▋  | 1003/1308 [3:20:22<58:54, 11.59s/it]  [A[A[A


 77%|███████▋  | 1004/1308 [3:20:33<57:22, 11.32s/it][A[A[A


 77%|███████▋  | 1005/1308 [3:20:43<56:20, 11.16s/it][A[A[A


 77%|███████▋  | 1006/1308 [3:20:54<55:20, 10.99s/it][A[A[A


 77%|███████▋  | 1007/1308 [3:21:05<54:43, 10.91s/it][A[A[A


 77%|███████▋  | 1008/1308 [3:21:15<54:14, 10.85s/it][A[A[A


 77%|███████▋  | 1009/1308 [3:21:26<53:49, 10.80s/it][A[A[A


 77%|███████▋  | 1010/1308 [3:21:37<53:23, 10.75s/it][A[A[A


 77%|███████▋  | 1011/1308 [3:21:47<53:14, 10.75s/it][A[A[A


 77%|███████▋  | 1012/1308 [3:21:58<52:54, 10.73s/it][A[A[A


 77%|███████▋  | 1013/1308 [3:22:09<52:30, 10.68s/it][A[A[A


 78%|███████▊  | 1014/1308 [3:22:19<52:15, 10.66s/it][A[A[A


 78%|███████▊  | 1015/1308 [3:22:30<52:06, 10.67s/it][A[A[A


 78%|███████▊  | 1016/1308 [3:22:41<52:10, 10.72s/it][A[A[A


 78%|███████▊  | 1017/1308 [3:22:51<51:49, 10.69s/it][A[A[A


 78%|███████▊  | 1018/1

 96%|█████████▌| 1255/1308 [4:05:41<09:25, 10.66s/it][A[A[A


 96%|█████████▌| 1256/1308 [4:05:51<09:14, 10.65s/it][A[A[A


 96%|█████████▌| 1257/1308 [4:06:02<09:03, 10.66s/it][A[A[A


 96%|█████████▌| 1258/1308 [4:06:13<08:52, 10.65s/it][A[A[A


 96%|█████████▋| 1259/1308 [4:06:23<08:41, 10.65s/it][A[A[A


 96%|█████████▋| 1260/1308 [4:06:34<08:33, 10.69s/it][A[A[A


 96%|█████████▋| 1261/1308 [4:06:45<08:23, 10.71s/it][A[A[A


 96%|█████████▋| 1262/1308 [4:06:55<08:12, 10.71s/it][A[A[A


 97%|█████████▋| 1263/1308 [4:07:06<08:02, 10.72s/it][A[A[A


 97%|█████████▋| 1264/1308 [4:07:17<07:53, 10.77s/it][A[A[A


 97%|█████████▋| 1265/1308 [4:07:28<07:42, 10.75s/it][A[A[A


 97%|█████████▋| 1266/1308 [4:07:38<07:30, 10.72s/it][A[A[A


 97%|█████████▋| 1267/1308 [4:07:49<07:17, 10.66s/it][A[A[A


 97%|█████████▋| 1268/1308 [4:08:00<07:05, 10.63s/it][A[A[A


 97%|█████████▋| 1269/1308 [4:08:10<06:55, 10.64s/it][A[A[A


 97%|█████████▋| 1270/130

After augmentation:


NameError: name 'annotations' is not defined

### 4. 融合数据清洗：
在执行下列代码前，先至aug_train_with_bbox中查看生成可视化的结果，如果有融合突兀或不合理的，请人工剔除掉，原生成图片数是2616张，但经过人工筛选后，剩下1441张融合较好的图片，同样地，由于每张target图只加了一个海参，所以annotations只有1441个标注。之后便执行下列代码，该代码会根据清洗后的aug_train_with_bbox，自动删除aug_train中不合格的图片，同时形成清洗后的标注字典reserve_aug_train。

In [73]:
with open('seacoco/pb_data/aug_train.json', 'r') as f:
    aug_train = json.load(f)
    
reserve_list = os.listdir('seacoco/pb_data/aug_train_with_bbox/')

reserve_aug_train = {'images':[], 'annotations':[]}
for i, at_image in enumerate(aug_train['images']):
    if at_image['file_name'] in reserve_list:
        reserve_aug_train['images'].append(at_image)
        reserve_aug_train['annotations'].append(aug_train['annotations'][i])
#     else:
#         os.remove('seacoco/pb_data/aug_train/' + at_image['file_name'])

因为之前使用的target图片原本就包含少量的标注，因为这里需要将'未增强数据标注train_new.json‘和’增强数据标注reserve_aug_train‘进行合并，得到原数据和增强数据的混合标注字典。

In [74]:
# merge reserve_aug_train and train_new.json
with open('seacoco/train_new.json','r') as f11:
    ann = json.load(f11)
    
max_imgid = max([int(i['id']) for i in ann['images']])
max_annid = max([int(i['id']) for i in ann['annotations']])

images = copy.deepcopy(ann['images'])
annotations = copy.deepcopy(ann['annotations'])
categories = copy.deepcopy(ann['categories'])

new2old_id = {}
for rat_image in reserve_aug_train['images']:
    rat_fn = rat_image['file_name'].split('_')[0] + '.jpg'
    for ann_image in ann['images']:
        if ann_image['file_name'] == rat_fn:
            new2old_id[rat_image['id']+max_imgid] = ann_image['id']


# update reserve_aug_train['images']
for i, rat_image in enumerate(reserve_aug_train['images']):
    reserve_aug_train['images'][i]['id'] = rat_image['id'] + max_imgid

# update reserve_aug_train'annotations']
for i, rat_ann in enumerate(reserve_aug_train['annotations']):
    reserve_aug_train['annotations'][i]['image_id'] = rat_ann['image_id'] + max_imgid
for i, rat_img in enumerate(reserve_aug_train['images']):
    newid = rat_img['id']
    oldid = new2old_id[newid]
    for ann_ann in ann['annotations']:
        if ann_ann['image_id'] == oldid:
            temp_ann = copy.deepcopy(ann_ann)
            temp_ann['id'] = temp_ann['id'] + max_annid
            temp_ann['image_id'] = newid
            reserve_aug_train['annotations'].append(temp_ann)            

In [75]:
# merge annotations and save them
with open('seacoco/train_new.json','r') as f11:
    ann = json.load(f11)

for rat_image in reserve_aug_train['images']:
    ann['images'].append(rat_image)
for rat_ann in reserve_aug_train['annotations']:
    ann['annotations'].append(rat_ann)

with open('seacoco/aug_train_new.json', 'w') as f5:
    json.dump(ann, f5)