In [1]:
# We are using OpenCV 3.1.0
# To install OpenCV, run the following command in tensorflow virtual environment
# $ conda install -c https://conda.binstar.org/menpo opencv3
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os
from random import randint
# make sure we are using OpenCV version high than 3.0
print cv2.__version__
import time
import tensorflow as tf

3.1.0


In [2]:
'''
X-Y Coordinate System on OpenCV
x,y(Low)
+ + + + + + x(high)
+
+
+
+
+
y(high)
'''
class Rectangle:
    def __init__(self, x, y, w, h):
        # x, y are the coordinates of the upper left point
        self.x = x
        self.y = y
        self.w = w
        self.h = h
    @staticmethod
    # More detail, please refer to http://www.geeksforgeeks.org/find-two-rectangles-overlap/
    # We are checking if two rectangles overlap or not
    def check_2rects_overlap(rect1, rect2):
        tl_x1, tl_y1 = rect1.x, rect1.y
        br_x1, br_y1 = rect1.x + rect1.w, rect1.y + rect1.h
        tl_x2, tl_y2 = rect2.x, rect2.y
        br_x2, br_y2 = rect2.x + rect2.w, rect2.y + rect2.h
        # If one rectangle is on the left side of the other
        if tl_x1 > br_x2 or tl_x2 > br_x1:
            return False
        # If one rectangle in above other
        if tl_y1 > br_y2 or tl_y2 > br_y1:
            return False
        return True

In [3]:
class ImageOperations:
    @staticmethod
    
    def extract_objs_from_imgs_dir(src_imgs_dir, dst_objs_dir, scale_factor):
        src_imgs_lst = []
        '''
        for img_name in os.listdir(imgs_dir):
            if img_name.endswith(".jpg"):
                images.append(img_name)
        '''
        import fnmatch
        # Find out all the images in current directory by recursively visiting all the folders
        for root, dirnames, filenames in os.walk(src_imgs_dir):
            for filename in fnmatch.filter(filenames, '*.jpg'):
                src_imgs_lst.append(os.path.join(root, filename))
        # Extract objest out of the images and save them to the objs folder
        for i in range(len(src_imgs_lst)):
            ImageOperations.do_extraction_on_img(src_imgs_lst[i], os.path.join(objs_dir, str(i)+'.jpg'), scale_factor)

    @staticmethod
    # Extract object out of the image and save them to the destination
    def do_extraction_on_img(img_name, obj_name, scale_factor):
        img = cv2.imread(img_name)
        # Convert image into grayscale
        # Since the image has white background, it would be easier to extract the objects out in grayscale mode
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Do simple thresholding
        # See example at http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html
        ret, thresh = cv2.threshold(img_gray,250,255, cv2.THRESH_BINARY_INV)
        # Find contours
        _, contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        areas = [cv2.contourArea(c) for c in contours]
        # The contour with largest area is the object of interest
        max_index = np.argmax(areas)
        cnt=contours[max_index]
        x,y,w,h = cv2.boundingRect(cnt)
        # Use the following code to draw rectangle to validate our extraction is doing correctly
        #cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
        roi = img[y:y+h, x:x+w]
        # Scale down the image by some factor
        roi_resized = ImageOperations.resize_img(roi, scale_factor)
        cv2.imwrite(obj_name, roi_resized)    
    
    @staticmethod
    # resize the image by some factor
    def resize_img(ori_img, scale_factor):
        #new size (w,h)
        newx,newy = ori_img.shape[1]/scale_factor, ori_img.shape[0]/scale_factor
        new_img = cv2.resize(ori_img, (newx,newy))
        return new_img
    
    @staticmethod
    # blend the overlaying objects with the background
    # refer to https://pythonprogramming.net/image-arithmetics-logic-python-opencv-tutorial/
    def blend_overlays_with_bgd(overlay_lst, pos_lst, white_bgd, noisy_bgd):
        for i in range(len(overlay_lst)):
            overlay = overlay_lst[i]
            (x,y,w,h) = pos_lst[i]
            roi = white_bgd[y:y+h, x:x+w]
            overlay2gray = cv2.cvtColor(overlay, cv2.COLOR_BGR2GRAY)
            ret, mask = cv2.threshold(overlay2gray, 245, 255, cv2.THRESH_BINARY_INV)
            mask_inv = cv2.bitwise_not(mask)

            bgd_bg = cv2.bitwise_and(roi,roi,mask=mask_inv)
            overlay_fg = cv2.bitwise_and(overlay,overlay,mask=mask)

            dst = cv2.add(bgd_bg, overlay_fg)
            white_bgd[y:y+h, x:x+w] = dst
        return ImageOperations.seamless_clone_overlay_with_bgd(white_bgd, noisy_bgd)

    
    @staticmethod
    # See tutorial at http://www.learnopencv.com/seamless-cloning-using-opencv-python-cpp/
    def seamless_clone_overlay_with_bgd(overlay, bgd):
        mask = 255 * np.ones(overlay.shape, overlay.dtype)
        width, height, _ = bgd.shape
        center = (height/2, width/2)
        mixed_clone = cv2.seamlessClone(overlay, bgd, mask, center, cv2.MIXED_CLONE)
        return mixed_clone
    

In [4]:
def get_objs_from_dir(objs_dir):
    obj_names_lst = []
    for img_name in os.listdir(objs_dir):
        if img_name.endswith(".jpg"):
                obj_names_lst.append(os.path.join(objs_dir, img_name))
    print "objects total:\t" + str(len(obj_names_lst))
    return obj_names_lst


    
def create_composed_images_with_idl(noisy_bgd, idl_file_name, outputs_dir,
                                        obj_names_lst, objs_total_in_img, 
                                        imgs_output_total, use_occlusion=False):
    idl_file = open(idl_file_name, 'w')
    bgd_w = noisy_bgd.shape[1]
    bgd_h = noisy_bgd.shape[0]
    white_bgd = np.full((bgd_h, bgd_w, 3), 255, np.uint8)
    print noisy_bgd.shape
    # It would be better if we create a noisy bgd with random noise
    # In order to prevent our model to memorize the bgd
    # It would be much easier for objects detection with bgd memorized
    # noisy_bgd = np.random.randint(0, 255, (bgd_h, bgd_w, 3))
    print noisy_bgd.shape
    print noisy_bgd
    for i in range(imgs_output_total):
        white_bgd_copy = white_bgd.copy()
        noisy_bgd_copy = noisy_bgd.copy()
        mixed_filename = os.path.join(outputs_dir, 'output_'+str(i)+'mixed.jpg')
        pos_lst = []
        overlay_lst = []
        rect_lst = []
        line1 = []
        line1.append('"' + mixed_filename + '": ')
        one_decimal = "{0:0.1f}"
        for j in range(objs_total_in_img):
            obj_index = randint(0, len(obj_names_lst)-1)
            obj = cv2.imread(obj_names_lst[obj_index])
        
            obj_w = obj.shape[1]
            obj_h = obj.shape[0]
        
            obj_x = randint(0, bgd_w - obj_w - 1)        
            obj_y = randint(0, bgd_h - obj_h - 1)

            rect = Rectangle(obj_x, obj_y, obj_w, obj_h)
            
            rect_lst.append(rect)

            overlay_lst.append(obj)
            pos_lst.append((obj_x, obj_y, obj_w, obj_h))
            #cv2.rectangle(bgd_copy,(x,y),(x+w,y+h),(0,255,0),2)
        
        if not use_occlusion:
            ind2remove = []
            for i in range(len(rect_lst)):
                for j in range(i+1, len(rect_lst)):
                    if Rectangle.check_2rects_overlap(rect_lst[i], rect_lst[j]):
                        ind2remove.append(i)
                    
            pos_lst = [x for i,x in enumerate(pos_lst) if i not in ind2remove]
            overlay_lst = [x for i,x in enumerate(overlay_lst) if i not in ind2remove]
        
        for (x,y,w,h) in pos_lst:
            line1.append('(' + 
                one_decimal.format(x) + ', ' + 
                one_decimal.format(y) + ', ' + 
                one_decimal.format(x+w)  + ', ' + 
                one_decimal.format(y+h) + ')')
            line1.append(',')
        line1[-1] = ';' + "\n"

        composed_img = ImageOperations.blend_overlays_with_bgd(overlay_lst, pos_lst, white_bgd_copy, noisy_bgd_copy)

        cv2.imwrite(mixed_filename, composed_img)

        text_line1 = ''.join(line1)
        idl_file.write(text_line1)
    idl_file.close()

In [5]:
# You need to specify the image folder, objects folder and output folder name
# You need to run the following process two times to get training data and testing data separately using different source of images data
imgs_dir = '/home/zhenyu/Desktop/multiple_objects_recognition/prepare_data_set/imgs/imgs_test'
objs_dir = '/home/zhenyu/Desktop/multiple_objects_recognition/prepare_data_set/objs/objs_test'
outputs_dir = '/home/zhenyu/Desktop/multiple_objects_recognition/prepare_data_set/outputs_test'
idl_file_name = 'test.idl'
noisy_bgd_dir = '/home/zhenyu/Desktop/multiple_objects_recognition/prepare_data_set/noisy_bgd.jpg'
noisy_bgd = cv2.imread(noisy_bgd_dir)
noisy_bgd = cv2.resize(noisy_bgd, (640, 480))

In [6]:
ImageOperations.extract_objs_from_imgs_dir(imgs_dir, objs_dir, 5)

In [7]:
obj_names_lst = get_objs_from_dir(objs_dir)
print len(obj_names_lst)

objects total:	750
750


In [8]:
# You can change the parameter here to satisfy your own need
# objs_total_in_img is the number of objects you want to have in one single image
# imgs_output_total is the total output images for training or testing
create_composed_images_with_idl(noisy_bgd, idl_file_name, outputs_dir, 
                                obj_names_lst, 10, 500, True)

(480, 640, 3)
(480, 640, 3)
[[[118 130 142]
  [116 128 140]
  [116 128 140]
  ..., 
  [126 137 145]
  [126 137 145]
  [125 136 144]]

 [[119 131 143]
  [117 129 141]
  [116 128 140]
  ..., 
  [127 138 146]
  [127 138 146]
  [126 137 145]]

 [[119 131 143]
  [117 129 141]
  [117 129 141]
  ..., 
  [129 140 148]
  [129 140 148]
  [128 139 147]]

 ..., 
 [[ 73  95 137]
  [ 72  94 135]
  [ 78 100 141]
  ..., 
  [ 88 133 184]
  [ 88 133 184]
  [ 95 142 193]]

 [[ 73  92 135]
  [ 70  92 133]
  [ 68  90 131]
  ..., 
  [ 96 140 191]
  [ 91 136 187]
  [ 91 134 185]]

 [[ 74  92 135]
  [ 75  97 138]
  [ 72  94 135]
  ..., 
  [ 97 137 189]
  [ 93 135 187]
  [ 95 138 189]]]
