In [1]:
import os
import cv2
import numpy as np 
import glob
import random
from random import randrange
from random import randint
import re
import matplotlib.pyplot as plt
from imgaug import augmenters as iaa


def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

def generate_position(rgb,mask,scene):
    center_scene = (random_scene.shape[1]//2,random_scene.shape[0]//2)
    center_object = (object_rgb.shape[1]//2,object_rgb.shape[0]//2)
    min_x = center_object[1]
    max_x = random_scene.shape[1]-center_object[0]
    min_y = center_object[0]
    max_y = random_scene.shape[0]-center_object[1]    
    pos_x_generated = randint(min_x, max_x)
    pos_y_generated = randint(min_y, max_y)
    return (pos_x_generated,pos_y_generated)

def prepare_object_data(all_object_names):
    data = {}
    for objects in all_object_names:
        data[objects] = {}
        paths_rgb = glob.glob("../data/Objects/" + str(objects) + "/*/extracted/*png")
        paths_rgb =  natural_sort(paths_rgb)
        paths_mask = glob.glob("../data/Objects/" + str(objects) + "/*/mask/*png")
        paths_mask =  natural_sort(paths_mask)
        paths_result = glob.glob("../data/Objects/" + str(objects) + "/*/result/*png")
        paths_result =  natural_sort(paths_result)
        data[objects]['rgb'] = paths_rgb
        data[objects]['mask'] = paths_mask
        data[objects]['result'] = paths_result
    return data

def get_random_object_name(all_object_names):
    object_name = random.choice(all_object_names)
    return object_name

def get_random_background(all_background_images):
    random_path_scene = random.choice(all_background_images)
    random_scene = cv2.imread(random_path_scene)
    return random_scene

def get_random_image(dict_data,object_name):
    random_index = randrange(len(dict_data[object_name]['rgb']))
    random_path_object = dict_data[object_name]['rgb'][random_index]
    random_path_mask = dict_data[object_name]['mask'][random_index]
    random_path_result = dict_data[object_name]['result'][random_index]
    random_object = cv2.imread(random_path_object)
    random_mask = cv2.imread(random_path_mask)
    random_result =  cv2.imread(random_path_result)
    return (random_object,random_mask,random_result)


def init_position_matrix(scene_shape):
    occupation_matix = np.zeros((scene_shape[0],scene_shape[1]))
    return occupation_matix


def generate_position(rgb,mask,scene,occupation_matrix):
    #Prevent border cropping
    center_scene = (scene.shape[0]//2,scene.shape[1]//2)
    center_object = (rgb.shape[0]//2,rgb.shape[1]//2)
    min_x = center_object[0]
    max_x = scene.shape[0]-center_object[0]
    min_y = center_object[1]
    max_y = scene.shape[1]-center_object[1]
    occupation_matrix = occupation_matrix[min_y:max_y,min_x:max_x]
    
    new_occupation_matrix=cv2.copyMakeBorder(occupation_matrix,
                              top=min_y,
                              bottom=min_y,
                              left=min_x,
                              right=min_x,
                              borderType= cv2.BORDER_CONSTANT,
                              value=[1] )
    
    
    indices = np.where(new_occupation_matrix == [0])
    coordinates = list(zip(indices[1], indices[0]))
    random_pos = random.choice(coordinates)
    (pos_x_generated,pos_y_generated) = random_pos
    
    #fig, ax = plt.subplots()
    #plt.imshow(new_occupation_matix)
    #circle = plt.Circle(random_pos, 4, color='r')
    #ax.add_patch(circle)
    
    return (pos_y_generated,pos_x_generated)

In [2]:
def alpha_cloning(background,foreground,alpha,position):
    
    object_mask = alpha.copy()
    object_mask = cv2.cvtColor(alpha, cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(object_mask,127,255,0)
    _,contours,hierarchy = cv2.findContours(thresh, 1, 2)
    cnt = max(contours, key = cv2.contourArea)
    M = cv2.moments(cnt)
    cx = int(M['m10']/M['m00'])
    cy = int(M['m01']/M['m00'])
    x,y,w,h = cv2.boundingRect(cnt)

    foreground = foreground[y:y+h,x:x+w]
    alpha = alpha[y:y+h,x:x+w]

    top    =  position[1] - foreground.shape[0]//2 
    bottom =  background.shape[0] - top - foreground.shape[0]

    left   =  position[0] - foreground.shape[1]//2
    right  =  background.shape[1] - left - foreground.shape[1]



    foreground=cv2.copyMakeBorder(foreground,
                              top=top,
                              bottom=bottom,
                              left=left,
                              right=right,
                              borderType= cv2.BORDER_CONSTANT,
                              value=[0] )

    alpha=cv2.copyMakeBorder(alpha,
                              top=top,
                              bottom=bottom,
                              left=left,
                              right=right,
                              borderType= cv2.BORDER_CONSTANT,
                              value=[0] )

    # Convert uint8 to float
    foreground = foreground.astype(float)
    #cv2.circle(foreground,(position[1],position[0]), 3, (0,0,0), -1)
    #cv2.imwrite("foreground.png", foreground);
    background = background.astype(float)
    # Normalize the alpha mask to keep intensity between 0 and 1
    alpha = alpha.astype(float)/255
    #print(background.shape)
    #print(foreground.shape)
    #print(alpha.shape)
    # Multiply the foreground with the alpha matte
    foreground = cv2.multiply(alpha, foreground)
    # Multiply the background with ( 1 - alpha )
    background = cv2.multiply(1.0 - alpha, background)
    # Add the masked foreground and background.
    outImage = cv2.add(foreground, background)
    
    return outImage

In [3]:
def get_transformator():
    seq = iaa.Sequential([
          iaa.Affine(rotate=(-180, 180)), # rotate -15 to 15 deg
        ])
    seq = seq.to_deterministic()
    return seq

sometimes = lambda aug: iaa.Sometimes(0.5, aug)

def scene_transformator():
    seq = iaa.Sequential(
        [
            iaa.Multiply((0.2, 1.5), per_channel=0.5),
            iaa.SomeOf((0, 5),
                [
                    iaa.OneOf([
                        iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
                        iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
                        iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
                    ]),
                    iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
                    iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
                    # search either for all edges or for directed edges,
                    # blend the result with the original image using a blobby mask
                    iaa.SimplexNoiseAlpha(iaa.OneOf([
                        iaa.EdgeDetect(alpha=(0.5, 1.0)),
                        iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
                    ])),
                    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
                    iaa.OneOf([
                        iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
                        iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
                    ]),
                    iaa.Add((-30, 30), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
                    iaa.AddToHueAndSaturation((-20, 20)), # change hue and saturation
                    # either change the brightness of the whole image (sometimes
                    # per channel) or change the brightness of subareas
                    iaa.OneOf([
                        iaa.Multiply((0.2, 1.5), per_channel=0.5),
                        iaa.FrequencyNoiseAlpha(
                            exponent=(-4, 0),
                            first=iaa.Multiply((0.5, 1.5), per_channel=True),
                            second=iaa.ContrastNormalization((0.5, 2.0))
                        )
                    ]),
                    iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
                    iaa.Grayscale(alpha=(0.0, 1.0)),
                    sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
                    sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))), # sometimes move parts of the image around
                    sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1)))
                ],
                random_order=True
            )
        ],
        random_order=True
    )
    return seq

In [4]:
all_background_path = glob.glob("../data/Background_images/COCO/*")
all_object_names_path = glob.glob("../data/Objects/*")
all_object_names = []
for objects in all_object_names_path:
    all_object_names.append(os.path.basename(objects))


OBJECT_DATA = prepare_object_data(all_object_names)

In [5]:
###
# Multi-object image generator


DATA_FILE_NAME = "../generated/"
NB_OBJECT_MAX = 4
NB_OBJECT_MIN = 1
NB_SCENE = 50
file = open(DATA_FILE_NAME + "data.txt","w")

for j in range (NB_SCENE):
    try :
        random_scene = get_random_background(all_background_path)
        nb_object = randrange(NB_OBJECT_MIN,NB_OBJECT_MAX+1)
        occupation_matix = init_position_matrix(random_scene.shape)
        
        image_name = "" + DATA_FILE_NAME +  "images/training_" + str(j) + ".png"
        line = "" + DATA_FILE_NAME +  "images/training_" + str(j) + ".png"
        for i in range(nb_object):

            # Chose random object and mask
            object_name = get_random_object_name(all_object_names)
            (random_object,random_mask,random_result) = get_random_image(OBJECT_DATA,object_name)
            
            #Change size randomly
            ratio = random.uniform(0.3, 1.3)
            random_object = cv2.resize(random_object,None,fx=ratio,fy=ratio)
            random_mask = cv2.resize(random_mask,None,fx=ratio,fy=ratio)
            random_result = cv2.resize(random_result,None,fx=ratio,fy=ratio)
            
            #Create the image transformer
            aug = get_transformator()
            #Transform both the mask and image with the same trasformation
            object_rgb = aug.augment_images([random_object])[0]
            object_mask = aug.augment_images([random_mask])[0]
            object_result =  aug.augment_images([random_result])[0]

            # Dilate mask
            kernel = np.ones((5,5),np.uint8)
            dilation = cv2.dilate(object_mask,kernel,iterations = 2)

            #Generate the random position
            position = generate_position(object_rgb,object_mask,random_scene,occupation_matix)
            #print(position)
            # Clone seamlessly.
            # Draw bounding box
            output = alpha_cloning(random_scene, object_result, object_mask, position)
            object_mask = cv2.cvtColor(dilation, cv2.COLOR_BGR2GRAY)
            ret,thresh = cv2.threshold(object_mask,1,255,0)
            _,contours,hierarchy = cv2.findContours(thresh, 1, 2)
            cnt = max(contours, key = cv2.contourArea)
            M = cv2.moments(cnt)
            cx = int(M['m10']/M['m00'])
            cy = int(M['m01']/M['m00'])
            x,y,w,h = cv2.boundingRect(cnt)
            x += (position[0] - cx)
            y += (position[1] - cy)
            #cv2.rectangle(output,(x,y),(x+w,y+h),(0,255,0),4)
            #occupation_matix = alpha_cloning(occupation_matix,object_mask,object_mask,position)
            random_scene = output
            info_bb = " " + str(x) + "," + str(y) + "," + str(x+w) + "," + str(y+h) + "," + str(all_object_names.index(object_name))
            line += "" + info_bb
        
        scene_aug = scene_transformator()
        output = scene_aug.augment_images([output])[0]
        file.write(line) 
        file.write("\n") 
        cv2.imwrite(image_name, output);
    except :
        continue

file.close() 