In [576]:
# utils
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import random
import unidecode
import re
from datetime import datetime

# image
from PIL import Image, ImageFilter

# openCV
import cv2

# scikit learn
from skimage import io, util
from skimage.color import rgb2gray
from scipy import misc

# imgaug
import imgaug as ia
from imgaug import augmenters as iaa

# noise
import noise
from perlin import PerlinNoiseFactory

# ocr
import pytesseract

In [19]:
# loading test image
img = cv2.imread('test.jpg')

In [589]:
# draw a randomic overlay to be merged with image
def random_overlay(shape, color=None):
    import random
    import cv2
    # get image shape
    height, width, channels = shape

    # get random color
    color_range = random.choice(range(0, 55))
    color = (color_range, color_range, color_range)
    
    # create blank white image
    overlay = 255 * np.ones(shape=[height, width, channels], dtype=np.uint8)
    
    # randomize crappy shape and position
    ## 0 - rectangle
    ## 1 - line
    ## 2 - circle
    for i in range(10):
        # get randomic shape position
        x1, y1 = [random.choice(range(0, width)), random.choice(range(0, height))]
        x2, y2 = [x1 + random.choice(range(0, 200)), y1 + random.choice(range(0, 200))]
        
        # select random shape
        shape_range = range(0, 2)
        randomic_shape = random.choice(shape_range)
        if randomic_shape == 0:
            cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
        elif randomic_shape == 1:
            random_thickness = random.choice(range(1, 80))
            cv2.line(overlay, (x1, y1), (x2, y2), color, random_thickness)        
        else:
            cv2.circle(overlay, (x1, y1), (x2, y2), color, -1)
    overlay = cv2.blur(overlay, (150, 150))
    return overlay

In [590]:
def crappy_pipeline(image):
    # image values
    height, width, channels = image.shape
    
    # apply some ImageFilter filters from PIL to add noise to borders
    pil_image = Image.fromarray(image)
    allowed_pil_filters = ['EDGE_ENHANCE_MORE', 'CONTOUR', 'SHARPEN']
    random_pil_filter = random.choice(allowed_pil_filters)
    pil_image = pil_image.filter(ImageFilter.__dict__[random_pil_filter])
    
    # draw random shapes on image using OpenCV
    opencv_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    overlay = random_overlay(opencv_image.shape)
    alpha = 0.6
    opencv_image = cv2.addWeighted(opencv_image, alpha, overlay, 1 - alpha, 0)
    
    # add some imgaug augmentation methods to add noise to image
    ## define function to randomize choice of a augment execution
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)
    
    salt_and_pepper_p = random.choice(range(20, 60))/1000
    sequential_aug_pipeline = iaa.Sequential([
        iaa.PerspectiveTransform(scale=0.025),
        sometimes(
            iaa.SaltAndPepper(p=salt_and_pepper_p)
        ),
        iaa.SomeOf((0, 2), [
            iaa.GammaContrast(5),
            iaa.JpegCompression(compression=20)
        ]),
        iaa.Dropout(p=0.001),
        iaa.Affine(rotate=0.05),
        sometimes(iaa.SimplexNoiseAlpha(
            iaa.EdgeDetect(0.05),
            upscale_method="linear"
        ))
    ])
    
    imgaug_image = sequential_aug_pipeline.augment_image(opencv_image)
    
    # noise lib to add perlin noise
    #     x1, y1 = [random.choice(range(0, width)), random.choice(range(0, height))]
    #     x2, y2 = [x1 + random.choice(range(0, 200)), y1 + random.choice(range(0, 200))]
    #     if x2 > width:
    #         x2 = width

    #     if y2 > height:
    #         y2 = height
    #     scale = 100.0
    #     octaves = 3
    #     persistence = 0.5
    #     lacunarity = 2.0

    #     noise_image = imgaug_image
    #     for i in range(x1, x2):
    #         for j in range(y1, y2):
    #             scale_factor = PerlinNoiseFactory(abs(x1 - x2), tile=(0, x2)).scale_factor
    #             noise_image[j][i] = np.int_(noise_image[j][i] * scale_factor)
    
    return imgaug_image

In [591]:
crappy_image = crappy_pipeline(img)

## Building crappy images

In [601]:
image_paths = glob('./is_ocr_images/*.jpg')
count = 0
current_line = 0
times = 0
print('[{}] Info: Pipeline has started. Ammount of data: {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(image_paths)))
for path in image_paths:
    try:
        current_line += 1
        count += 1
        if count == 1000:
            times += 1
            print('[{}] Info: Pipeline has processed and generated {} crappy images.'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), times * count))
            count = 0
        image_name = re.sub('^\.\/is_ocr_images\/', '', path)
        current_image = cv2.imread(path)
        crappy_image = crappy_pipeline(current_image)
        cv2.imwrite('./crappy_images/crappy_{}'.format(image_name), crappy_image)
    except Exception as err:
        print('[{}] Error: Error in {} line (consuming image {})'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), current_line, image_name))
        print(err)
    

[2019-11-06 21:41:02] Info: Pipeline has started. Ammount of data: 80624
[2019-11-06 21:42:06] Info: Pipeline has processed and generated 1000 crappy images.
[2019-11-06 21:43:11] Info: Pipeline has processed and generated 2000 crappy images.
[2019-11-06 21:44:18] Info: Pipeline has processed and generated 3000 crappy images.
[2019-11-06 21:45:24] Info: Pipeline has processed and generated 4000 crappy images.
[2019-11-06 21:46:30] Info: Pipeline has processed and generated 5000 crappy images.
[2019-11-06 21:47:35] Info: Pipeline has processed and generated 6000 crappy images.
[2019-11-06 21:48:42] Info: Pipeline has processed and generated 7000 crappy images.
[2019-11-06 21:49:48] Info: Pipeline has processed and generated 8000 crappy images.
[2019-11-06 21:50:54] Info: Pipeline has processed and generated 9000 crappy images.
[2019-11-06 21:52:00] Info: Pipeline has processed and generated 10000 crappy images.
[2019-11-06 21:53:06] Info: Pipeline has processed and generated 11000 crapp

'2019-11-06 21:15:08'