# Setup

In [1]:
import random
import string
import os

from pylibdmtx.pylibdmtx import encode
from PIL import Image, ImageFilter

from torchvision.transforms import v2
from torchvision.transforms.v2.functional import adjust_brightness

import numpy as np

# Setting Parameters

To synthesize the dataset the following parameters and amounts will be used:

- 20 randomized strings in following letter (L) number (N) format:
    - NLNLNNNNNNN + NNNN
    - \+ NNNN denotes the last 4 numbers in a serial number which increment by 1. Therefore this will range from 0000 to 0100 to mimic the real-world data more closely.
- 20 configurations of shape transformations of the following types:
    - random horizontal flip
    - random vertical flip
    - random rotation
    - random affine
    - random perspective
- 5 different scalings between 0-1
- 20 configurations of color transformations of the following types:
    - color jitter
    - random photometric distort
    - random grayscale
    - gaussian blur
    - gaussian noise
    - random invert
    - random adjust sharpness

This will lead to $20*20*5*20*2 = 80,000$ images being generated (40,000 ground truth and 40,000 noisy data).

## Helpers

These helpers do each individual part of the pipeline

In [2]:
def gen_string():
    '''
    Generates a serial number to encode
    
    Serial numbers are:
    - 11 characters long
    - Index 0, 2, 4, 5, 6, 7, 8, 9, 10 are random digits
    - Index 1 and 3 are uppercase letters
    - Index 11, 12, 13, 14 are an incremental number starting from 0001

    Example serial number: 4 L 4 N 0418028 0001
    '''
    to_encode = ''

    # first 11 indexes
    for j in range(11):
        # 1 and 3 are uppercase
        if j in [1, 3]:
            to_encode += random.choice(string.ascii_uppercase)
        else:
            to_encode += str(random.randrange(0, 10))
    
    # last 4 indexes
    end = str(random.randrange(1, 99))
    if len(end) == 1:
        end = '0' + end
    elif len(end) == 2:
        end = '00' + end
    else:
        end = '000' + end
    to_encode += end

    return to_encode

def encode_image(to_encode):
    '''Creates a PIL image containing DMC encoding of given string'''
    encoded = encode(to_encode.encode('utf8'))
    img = Image.frombytes('RGB', (encoded.width, encoded.height), encoded.pixels)
    img = img.crop((10, 10, img.width-10, img.height-10)) # crop image to remove white borders
    img = img.resize((img.width*10, img.height*10), Image.BILINEAR) # upscale image
    return img

def shape_transform(img):
    '''Applies random shape transformations to image'''
    transforms = v2.Compose([
        v2.Pad(1500, fill=255, padding_mode='constant'),
        v2.RandomHorizontalFlip(),
        v2.RandomVerticalFlip(),
        v2.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=Image.BILINEAR, fill=255),
        v2.RandomRotation(random.randrange(0, 360), fill=255, interpolation=Image.BILINEAR),
        v2.RandomAffine(degrees=0,
                        translate=(0.2, 0.2), # random "shift" on x and y axis
                        scale=(0.5, 1.5), # randomly scale image size between 0.5 and 1.5
                        # random "squish" on x and y axis
                        shear=(-random.randrange(10,20), random.randrange(10,20), -random.randrange(10,20), random.randrange(10,20)),
                        fill=255,
                        interpolation=Image.BILINEAR,
                        ),
        # v2.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=Image.BILINEAR, fill=255),
    ])

    img = transforms(img)
    return img

def brightness_transform(img, increment):
    '''
    Applies random brightness transformations to image
    Incremenets by 1 initially to avoid 0 pixel values
    Converts to int16 to avoid overflow (would make white pixels black)
    '''
    img = np.array(img, dtype=np.int16)
    img = np.clip(img + increment, 0, 255)
    img = img.astype(np.uint8)
    img = Image.fromarray(img)

    return img

def blend_texture(img, texture_path):
    '''
    Blends img with a random texture image from texture_path
    takes a random crop of the texture
    resizes texture to match img size
    '''
    texture = Image.open(texture_path)

    # random crop with pytorch RandomCrop
    crop_size = int(0.8 * min(texture.size)) # crop size based on texture size
    transforms = v2.Compose([
        v2.RandomCrop(size=(crop_size, crop_size)), # smallest resolution image is 1280 x 1024
    ])
    texture = transforms(texture)
    texture = texture.resize(img.size, Image.BILINEAR)
    blended = Image.blend(img, texture, 0.5)

    return blended

def color_transform(img):
    '''
    Applies random color transformations to image
    Parameters chosen to be within a realistic range similar to real world images
    '''
    transforms = v2.Compose([
        v2.ColorJitter(brightness = (0.8, 1.2),  # small range as extreme lighting is not so present in real world
                       contrast   = (0.8, 1.5),  # wider range as contrast occurs in more examples
                       saturation = (0.8, 1.2),  # small range
                       hue        = (-0.1, 0.1), # idk
                       ),
        v2.RandomPhotometricDistort(brightness = (0.8, 1.2),
                                    contrast   = (0.7, 1.3),
                                    saturation = (0.8, 1.2),
                                    hue        = (-0.05, 0.05),
                                    ),
        v2.GaussianBlur(kernel_size=25, sigma=(0.1, 100.0)), # chance to blur a lot or a little - mostly an ok amount
        # v2.RandomSolarize(0.5, 0.5), # NOT USED as it may interfere with binarizer learning and white on black is not common
    ])

    img = transforms(img)
    return img

## Main Function

"gen_imgs" takes in params and uses helpers to synthesize both the ground truth and noisy images

In [3]:
def gen_imgs(N_imgs, texture_path):
    '''Generates N images'''
    for i in range(N_imgs):

        # generate string to encode into DMC
        to_encode = gen_string()
        dmc_img = encode_image(to_encode)
        # dmc_img.show()
        # break

        # generate shape transformed DMC
        shape_img = shape_transform(dmc_img)

        # save ground truth image
        ground_truth = shape_img.copy()
        ground_truth.resize((1000, 1000), Image.BILINEAR)
        ground_truth.save(f'../data/synth_data/ground_truth/{to_encode}.png')

        # shape_img.show()
        # break

        # generate scaled DMC
        increment = random.randint(1, 250)
        scale_img = brightness_transform(shape_img, increment=increment)
        # scale_img.show()
        # break

        # blend into metal texture
        texture = f'{os.path.dirname(texture_path)}/{random.choice(os.listdir(texture_path))}'
        blended_img = blend_texture(scale_img, texture)
        # blended_img.show()
        # break

        # generate colored DMC
        color_img = color_transform(blended_img)

        # resize to size for binarizer
        color_img = color_img.resize((1000, 1000), Image.BILINEAR)

        # final sharpening to ground_truth image
        # ground_truth = ground_truth.filter(ImageFilter.SHARPEN)

        # color_img.show()
        # ground_truth.show()
        # break

        # save ground noisy image
        color_img.save(f'../data/synth_data/noisy/{to_encode}.png')

        # print progress
        if i % 100 == 0:
            print(f'{i}/{N_imgs}')

    return

# on average, the size of a pair of color_img and ground_truth iamges are 600KB
# so the total size of the dataset will be 10,000 * 600KB = 6,000,000KB = ~6GB

# generates randomized imgs
N_imgs = 10000
gen_imgs(N_imgs, texture_path = '../data/textures/')

0/10000
100/10000
200/10000
300/10000
400/10000
500/10000
600/10000
700/10000
800/10000
900/10000
1000/10000
1100/10000
1200/10000
1300/10000
1400/10000
1500/10000
1600/10000
1700/10000
1800/10000
1900/10000
2000/10000
2100/10000
2200/10000
2300/10000
2400/10000
2500/10000
2600/10000
2700/10000
2800/10000
2900/10000
3000/10000
3100/10000
3200/10000
3300/10000
3400/10000
3500/10000
3600/10000
3700/10000
3800/10000
3900/10000
4000/10000
4100/10000
4200/10000
4300/10000
4400/10000
4500/10000
4600/10000
4700/10000
4800/10000
4900/10000
5000/10000
5100/10000
5200/10000
5300/10000
5400/10000
5500/10000
5600/10000
5700/10000
5800/10000
5900/10000
6000/10000
6100/10000
6200/10000
6300/10000
6400/10000
6500/10000
6600/10000
6700/10000
6800/10000
6900/10000
7000/10000
7100/10000
7200/10000
7300/10000
7400/10000
7500/10000
7600/10000
7700/10000
7800/10000
7900/10000
8000/10000
8100/10000
8200/10000
8300/10000
8400/10000
8500/10000
8600/10000
8700/10000
8800/10000
8900/10000
9000/10000
9100/10000


In [4]:
# def gen_imgs(N_strings, N_shapes, N_scalings, texture_path, N_colors):
#     '''Generates parameters for N images to feed to the image generator'''
#     print(f'Generating {N_strings * N_shapes * (N_scalings + 1) * N_colors * 2} images...')
#     print('(N_strings * N_shapes * (N_scalings + 1) * N_colors * 2)')

#     # generate N_strings DMCs
#     for str1 in range(N_strings):
#         to_encode = gen_string()
#         dmc_img = encode_image(to_encode)
#         # dmc_img.show()
#         # break

#         # generate N_shapes shape transformed DMCs of each DMC
#         for str2 in range(N_shapes):
#             shape_img = shape_transform(dmc_img)

#             # save ground truth image
#             ground_truth = shape_img.copy()
#             ground_truth.resize((1000, 1000), Image.BILINEAR)
#             ground_truth.save(f'../data/synth_data/ground_truth/{str1}_{str2}.png')

#             # shape_img.show()
#             # break

#             # generate N_scalings scaled DMCs of each DMC
#             # incremenets with N_scalings = 5: [0, 50, 100, 150, 200, 250]
#             base = int(250 / N_scalings)
#             increments = [base * i for i in range(N_scalings)]
#             increments.append(250) # close to white (very pale DMC)
#             for increment in increments:
#                 str3 = increment
#                 scale_img = brightness_transform(shape_img, increment=increment)
#                 # scale_img.show()
#                 # break

#                 # blend into metal texture
#                 blended_img = blend_texture(scale_img, texture_path)
#                 # blended_img.show()
#                 # break

#                 # generate N_colors colored DMCs of each DMC
#                 for str4 in range(N_colors):
#                     color_img = color_transform(blended_img)

#                     # resize to size for binarizer
#                     color_img = color_img.resize((1000, 1000), Image.BILINEAR)

#                     # final sharpening to ground_truth image
#                     # ground_truth = ground_truth.filter(ImageFilter.SHARPEN)

#                     # color_img.show()
#                     # ground_truth.show()
#                     # break

#                     # save ground noisy image
#                     color_img.save(f'../data/synth_data/noisy/{str1}_{str2}_{str3}_{str4}.png')

#     return

# # with these parameters, we generate 12 * 12 * 6 * 12 * 2 = 20,736 images
# # on average, the size of a pair of color_img and ground_truth iamges are 600KB
# # so the total size of the dataset is 20,736/2 * 600KB = 10,368 * 600KB = 6,220,800KB = ~6.2GB
# # which is a reasonable size for a dataset
# gen_imgs(N_strings    = 500, # we want to generalize for many different DMCs
#          N_shapes     = 10,    # we want to generalize for different shapes (but dont need that many)
#          N_scalings   = 3,    # generates N_scalings + 1 for fades of DMCs
#          texture_path = '../data/textures/',
#          N_colors     = 1)   # We want to generalize for different colors