In [9]:
import cv2
import numpy as np
import albumentations as A
import random
import os

from tqdm import tqdm

# Safe mild augmentations: Tight constraints for scaling, rotation, and translation
MILD_AUGMENTATIONS = A.Compose([
    A.Affine(
        scale=(0.98, 1.02),  # Very tight scaling range
        translate_percent=(-0.03, 0.03),  # Slight translation limits
        rotate=(-3, 3),  # Tight rotation limits
        shear=(-2, 2),  # Minimal shear
        p=0.7
    ),
    A.GaussianBlur(blur_limit=(3, 5), p=0.4),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.RandomSizedCrop(min_max_height=(100, 200), size=(64, 256), p=0.5),  # Controlled crop size
])

# Safe moderate augmentations with additional safety for larger images
MODERATE_AUGMENTATIONS = A.Compose([
    A.Affine(
        scale=(0.95, 1.05),  # Slight scaling range
        translate_percent=(-0.02, 0.02),  # Small translation
        rotate=(-5, 5),  # Slight rotation
        shear=(-3, 3),  # Minimal shear
        p=0.7
    ),
    A.GaussianBlur(blur_limit=(3, 5), p=0.4),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
])

EXTREME_AUGMENTATIONS = A.Compose([
    A.Affine(
        scale=(0.85, 1.15),  # Controlled scaling range
        translate_percent=(-0.05, 0.05),  # Small translations
        rotate=(-15, 15),  # Moderate rotation range
        shear=(-10, 10),  # Limited shear
        p=0.9
    ),
    A.MotionBlur(blur_limit=7, p=0.6),
    A.GaussNoise(std_range=(0.2, 0.4), p=0.3),
])

# Balanced Sampling Function
def get_augmentation_type():
    """Randomly choose an augmentation category based on probability."""
    return random.choices(
        ["mild", "moderate", "extreme"], 
        weights=[0.5, 0.35, 0.15],  # 50% mild, 35% moderate, 15% extreme
        k=1
    )[0]

# Apply augmentations
def apply_augmentations(image, category):
    """Apply augmentation based on the selected category."""
    if category == "mild":
        transform = MILD_AUGMENTATIONS
    elif category == "moderate":
        transform = MODERATE_AUGMENTATIONS
    else:
        transform = EXTREME_AUGMENTATIONS

    augmented_image = transform(image=image)["image"]

    # After augmentation, pad the image to ensure no clipping
    augmented_image = pad_image(augmented_image, image.shape[0], image.shape[1])
    
    return augmented_image

# Function to compute the bounding box of the non-black areas (content)
def get_content_bounding_box(image):
    """Compute the bounding box of the content in the image (non-black areas)."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, threshold = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
    coords = np.column_stack(np.where(threshold > 0))

    if len(coords) == 0:
        return (0, 0, 0, 0)

    top_left = coords.min(axis=0)
    bottom_right = coords.max(axis=0)

    return top_left[0], top_left[1], bottom_right[0], bottom_right[1]

# Function to pad the image to the original size after transformation if needed
def pad_image(image, original_height, original_width):
    """Ensure the image is padded back to the original dimensions."""
    current_height, current_width = image.shape[:2]

    # Add padding to ensure the image dimensions match the original
    pad_top = max(0, (original_height - current_height) // 2)
    pad_bottom = max(0, original_height - current_height - pad_top)
    pad_left = max(0, (original_width - current_width) // 2)
    pad_right = max(0, original_width - current_width - pad_left)

    padded_image = cv2.copyMakeBorder(
        image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0)
    )

    return padded_image

# Apply augmentations safely based on bounding box tracking
def apply_safe_augmentation(image, transform, original_height, original_width):
    """Apply augmentation and ensure text stays within the image bounds."""
    # Get the content bounding box of the original image
    orig_top, orig_left, orig_bottom, orig_right = get_content_bounding_box(image)

    # Apply transformation
    augmented_image = transform(image=image)["image"]

    # Get the new bounding box of the transformed image
    aug_top, aug_left, aug_bottom, aug_right = get_content_bounding_box(augmented_image)

    # Ensure the content is not moved outside of the image bounds
    if aug_top < 0 or aug_left < 0 or aug_bottom > original_height or aug_right > original_width:
        augmented_image = pad_image(augmented_image, original_height, original_width)
    
    return augmented_image

# Apply mild, moderate, and extreme augmentations
def augment_single_image(image_path, output_folder, num_variations=3):
    """
    Augment a single image and save outputs.
    
    Args:
        image_path (str): Path to the image file.
        output_folder (str): Path to save augmented images.
        num_variations (int): Number of augmented versions per image.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Failed to load {image_path}")
        return
    
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    original_height, original_width = image.shape[:2]

    # Generate augmented versions and save them
    for i in range(num_variations):
        category = get_augmentation_type()
        
        # Select appropriate augmentation based on category
        if category == "mild":
            transform = MILD_AUGMENTATIONS
        elif category == "moderate":
            transform = MODERATE_AUGMENTATIONS
        else:
            transform = EXTREME_AUGMENTATIONS
        
        augmented_image = apply_safe_augmentation(image, transform, original_height, original_width)
        
        output_path = os.path.join(output_folder, f"{base_name}_{category}_{i}.png")
        cv2.imwrite(output_path, augmented_image)

# Function to process an entire folder for augmentations
def augment_dataset(input_path, output_folder, num_variations=3):
    """
    Augment images in a folder or a single image file.
    
    Args:
        input_path (str): Path to the folder or single image.
        output_folder (str): Path to save augmented images.
        num_variations (int): Number of augmented versions per image.
    """
    if os.path.isfile(input_path):
        # If input is a single image
        print(f"Processing single image: {input_path}")
        augment_single_image(input_path, output_folder, num_variations)
    elif os.path.isdir(input_path):
        # If input is a folder
        print(f"Processing folder: {input_path}")
        images = [f for f in os.listdir(input_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
        
        for img_name in tqdm(images, desc="Augmenting Images"):
            img_path = os.path.join(input_path, img_name)
            augment_single_image(img_path, output_folder, num_variations)
    else:
        print("Error: Input path is neither a valid file nor a directory.")

In [10]:
# Run Augmentation on a dataset folder OR a single image
input_path = "/home/af-ml-dev/JFreaks/OCR/EasyOCR/deep-text-recognition-benchmark/scraped_captchas_train_aug/data"  # Change to your file or folder
output_folder = "/home/af-ml-dev/JFreaks/OCR/EasyOCR/deep-text-recognition-benchmark/scraped_captchas_train_aug/data"  # Change to your output folder
augment_dataset(input_path, output_folder, num_variations=10) 

Processing folder: /home/af-ml-dev/JFreaks/OCR/EasyOCR/deep-text-recognition-benchmark/scraped_captchas_train_aug/data


Augmenting Images: 100%|██████████| 850/850 [00:08<00:00, 104.74it/s]


FIX FILENAMES AND gt.txt AFTER APPLYING AUGMENTATIONS (only works if image filenames are in the format: image_1.png etc)

In [11]:
import os
import re

# Paths
image_folder = "/home/af-ml-dev/JFreaks/OCR/EasyOCR/deep-text-recognition-benchmark/scraped_captchas_train_aug/data"  # Folder containing images
gt_file = "/home/af-ml-dev/JFreaks/OCR/EasyOCR/deep-text-recognition-benchmark/scraped_captchas_train_aug/gt.txt"  # Ground truth file

# Read the gt.txt file and store labels in a dictionary
labels_dict = {}
with open(gt_file, "r") as f:
    for line in f:
        parts = line.strip().split(" ", 1)
        if len(parts) == 2:
            filename, label = parts
            labels_dict[filename] = label

# Get the highest numbered original image
original_images = [fname for fname in os.listdir(image_folder) if re.match(r"image_\d+\.png", fname)]
highest_number = max(int(re.search(r"image_(\d+)\.png", img).group(1)) for img in original_images)

# Process augmented images
new_gt_entries = []
for fname in os.listdir(image_folder):
    match = re.match(r"(image_\d+)_(mild|moderate|extreme)_\d+\.png", fname)
    if match:
        original_base = match.group(1) + ".png"  # Extract original image filename
        if original_base in labels_dict:
            highest_number += 1
            new_filename = f"image_{highest_number}.png"
            os.rename(os.path.join(image_folder, fname), os.path.join(image_folder, new_filename))
            new_gt_entries.append(f"{new_filename} {labels_dict[original_base]}")

# Append new entries to gt.txt
with open(gt_file, "a") as f:
    f.write("\n".join(new_gt_entries) + "\n")

print("Renaming and gt.txt update completed successfully.")


Renaming and gt.txt update completed successfully.
