## Dependencies

In [1]:
import os
from PIL import Image
import PIL

from torchvision.transforms import v2
import random

import gdown
import zipfile

## Download and extract dataset

In [2]:
if not os.path.exists("wb_recognition_dataset.zip"):
    !gdown --id 1PdWkZe8Vt6xdsTj8KeqxhAd5Tmx4pyNh
else:
    print("File already exists, skipping download.")

Downloading...
From (original): https://drive.google.com/uc?id=1PdWkZe8Vt6xdsTj8KeqxhAd5Tmx4pyNh
From (redirected): https://drive.google.com/uc?id=1PdWkZe8Vt6xdsTj8KeqxhAd5Tmx4pyNh&confirm=t&uuid=8bb7c6ad-570b-438b-91ce-983248a23cd1
To: /content/wb_recognition_dataset.zip
100% 105M/105M [00:00<00:00, 141MB/s]


In [3]:
if not os.path.exists("wb_recognition_dataset"):
    with zipfile.ZipFile("wb_recognition_dataset.zip", "r") as zip_ref:
        zip_ref.extractall("wb_recognition_dataset")

In [4]:
colab_dir = '/content/wb_recognition_dataset/wb_recognition_dataset'
dataset_dir = f'{colab_dir}'
trainset_dir = f'{dataset_dir}/train'

## Export augmented images with labels

In [5]:
def augment_handwriting_images(image_dir):
    aug_name = ["affine" for _ in range(1)] + ["rotate" for _ in range(1)] + ["elastic" for _ in range(1)] + ["perspective" for _ in range(1)]

    for filename in os.listdir(image_dir):
        if filename.endswith((".jpg", ".jpeg", ".png", ".bmp")) and not filename.startswith(("affine", "rotate", "elastic", "perspective")):
            image_path = os.path.join(image_dir, filename)
            img = Image.open(image_path)

            img_width, img_height = img.size

            # pad with white pixels if image not square
            if img_width <= 20 or img_height <= 20:
              img = v2.Resize(size=(32,32))(img)

            # random affine transform with rotation = (-20,20), translation = (0,0.1), scale = (0.95,1)
            affine_transfomer = v2.RandomAffine(degrees=(-20, 20), translate=(0, 0.1), scale=(0.95, 1), fill=255)
            affine_imgs = [affine_transfomer(img) for _ in range(1)]

            # random rotation with angle = (-10,10)
            rotater = v2.RandomRotation(degrees=(-10, 10), fill=255)
            rotated_imgs = [rotater(img) for _ in range(1)]

            # random elastic transformation with alpha = 75
            elastic_transformer = v2.ElasticTransform(fill=255, alpha=75)
            transformed_imgs = [elastic_transformer(img) for _ in range(1)]

            # random perspective transformatioon with distortion = 0.5, p = 1
            perspective_transformer = v2.RandomPerspective(distortion_scale=0.5, p=1, fill=255)
            perspective_imgs = [perspective_transformer(img) for _ in range(1)]

            aug_imgs = affine_imgs + rotated_imgs + transformed_imgs + perspective_imgs

            for i, aug_img in enumerate(aug_imgs):
                # name the augmented images with corresponding label and a random number
                new_filename = f"{aug_name[i]}_{random.randint(1000, 9999)}_{os.path.splitext(filename)[0]}{os.path.splitext(filename)[1]}"
                # save the augmented images to the same folder with the original image
                aug_img.save(os.path.join(image_dir, new_filename))

In [None]:
for subdir in os.listdir(trainset_dir):
    augment_handwriting_images(os.path.join(trainset_dir, subdir))