In [2]:
import os
import json
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter
from tqdm import tqdm

# ======================
# CONFIG
# ======================
OUTPUT_DIR = "dataset"
IMAGE_SIZE = (200, 64)
FONTS_DIR = "./fonts"  # Ensure this folder exists and contains .ttf files
WORDS = [
    "Acnestis", "Ephemeral", "Byzantine", "Neural",
    "Captcha", "Vision", "Learning", "Model",
    "Python", "Torch", "Dataset", "Noise"
]

EASY_SAMPLES = 500
HARD_SAMPLES = 1000
BONUS_SAMPLES = 1000

os.makedirs(OUTPUT_DIR, exist_ok=True)

# ======================
# UTILS
# ======================

def load_fonts():
    """Loads fonts from directory or returns a list of system paths."""
    if not os.path.exists(FONTS_DIR):
        os.makedirs(FONTS_DIR)
        print(f"Warning: '{FONTS_DIR}' directory created. Please add .ttf files there.")
    
    fonts = [os.path.join(FONTS_DIR, f) for f in os.listdir(FONTS_DIR) if f.endswith(".ttf")]
    
    if len(fonts) == 0:
        # Fallback to common system fonts if the folder is empty
        print("No fonts found in ./fonts. Attempting to find system fonts...")
        fallbacks = [
            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", # Linux
            "C:\\Windows\\Fonts\\Arial.ttf",                   # Windows
            "/Library/Fonts/Arial.ttf"                         # macOS
        ]
        fonts = [f for f in fallbacks if os.path.exists(f)]
    
    assert len(fonts) > 0, "No fonts found! Please place .ttf files in the ./fonts folder."
    return fonts

FONTS = load_fonts()

def random_word():
    return random.choice(WORDS)

def random_case(word):
    return "".join(c.upper() if random.random() < 0.5 else c.lower() for c in word)

def add_noise(img):
    arr = np.array(img).astype(np.float32)
    noise = np.random.normal(0, 20, arr.shape)
    arr = np.clip(arr + noise, 0, 255)
    return Image.fromarray(arr.astype(np.uint8))

def textured_background(size):
    bg = np.random.randint(180, 255, (*size[::-1], 3), dtype=np.uint8)
    return Image.fromarray(bg)

# ======================
# RENDERING
# ======================

# ======================
# RENDERING (UPDATED)
# ======================

# ======================
# UTILS (UPDATED)
# ======================

def load_fonts():
    # Only grab files ending in .ttf or .otf
    valid_extensions = ('.ttf', '.otf')
    fonts = []
    
    if os.path.exists(FONTS_DIR):
        fonts = [
            os.path.join(FONTS_DIR, f) for f in os.listdir(FONTS_DIR) 
            if f.lower().endswith(valid_extensions)
        ]
    
    if not fonts:
        print("No valid fonts found in ./fonts. Falling back to system default.")
        # This returns None, which tells ImageFont to use the basic built-in font
        return [None] 
        
    return fonts

import requests

def download_fix_font():
    font_url = "https://github.com/google/fonts/raw/main/ofl/roboto/Roboto-Regular.ttf"
    target_path = os.path.join(FONTS_DIR, "Roboto-Regular.ttf")
    
    if not os.path.exists(target_path):
        print("Downloading a fresh font (Roboto)...")
        r = requests.get(font_url)
        with open(target_path, 'wb') as f:
            f.write(r.content)
        print("Download complete.")
    return target_path

# Run this before calling generate functions

def load_fonts():
    # Standard macOS font paths
    macos_font_paths = [
        "/Library/Fonts/Arial.ttf",
        "/Library/Fonts/Verdana.ttf",
        "/Library/Fonts/Tahoma.ttf",
        "/System/Library/Fonts/Helvetica.ttc",
        "/System/Library/Fonts/Cache/Avenir.ttc"
    ]
    
    fonts = [f for f in macos_font_paths if os.path.exists(f)]
    
    if not fonts:
        # Fallback search if the specific ones above aren't found
        import glob
        fonts = glob.glob("/Library/Fonts/*.ttf")[:5]
        
    if not fonts:
        print("Warning: No system fonts found. Check System Settings > Fonts.")
        return [None]
        
    print(f"Loaded {len(fonts)} system fonts.")
    return fonts

fresh_font = load_fonts()
FONTS = [fresh_font] # Force the script to use the verified font


def render_text(text, font_path, font_size, bg_color=(255, 255, 255), bg_image=None, noise=False, rotate=False):
    # Create background
    if bg_image:
        img = bg_image.copy()
    else:
        img = Image.new("RGB", IMAGE_SIZE, bg_color)
        
    draw = ImageDraw.Draw(img)

    # Robust Font Loading
    try:
        if font_path:
            # macOS .ttc files sometimes need an index, but 0 usually works
            font = ImageFont.truetype(font_path, font_size)
        else:
            font = ImageFont.load_default()
    except Exception as e:
        # If a specific font fails, try a generic macOS fallback
        try:
            font = ImageFont.truetype("/Library/Fonts/Arial.ttf", font_size)
        except:
            font = ImageFont.load_default()

    # Get text dimensions using textbbox
    bbox = draw.textbbox((0, 0), text, font=font)
    w = bbox[2] - bbox[0]
    h = bbox[3] - bbox[1]

    # Center text
    x = (IMAGE_SIZE[0] - w) // 2
    y = (IMAGE_SIZE[1] - h) // 2

    draw.text((x, y), text, fill=(0, 0, 0), font=font)

    if rotate:
        img = img.rotate(random.uniform(-5, 5), expand=0, fillcolor=bg_color)

    if noise:
        img = add_noise(img)
        img = img.filter(ImageFilter.GaussianBlur(radius=0.5))

    return img


Loaded 1 system fonts.


In [23]:
# ======================
# DATASET GENERATION
# ======================

def generate_easy():
    path = os.path.join(OUTPUT_DIR, "easy")
    os.makedirs(os.path.join(path, "images"), exist_ok=True)
    labels = {}

    font = FONTS[0]

    for i in tqdm(range(EASY_SAMPLES), desc="Easy"):
        word = random_word()
        img = render_text(
            word,
            font_path=font,
            font_size=36
        )

        fname = f"{i}.png"
        img.save(os.path.join(path, "images", fname))
        labels[fname] = word

    with open(os.path.join(path, "labels.json"), "w") as f:
        json.dump(labels, f, indent=2)


def generate_hard():
    path = os.path.join(OUTPUT_DIR, "hard")
    os.makedirs(path + "/images", exist_ok=True)
    labels = {}

    for i in tqdm(range(HARD_SAMPLES), desc="Hard"):
        word = random_case(random_word())
        font = random.choice(FONTS)
        bg = textured_background(IMAGE_SIZE)

        # Now calling with bg_image instead of manual blending later
        img = render_text(
            word,
            font_path=font,
            font_size=random.randint(28, 40),
            bg_image=bg, # This now matches the function signature
            noise=True,
            rotate=True
        )

        fname = f"{i}.png"
        img.save(os.path.join(path, "images", fname))
        labels[fname] = word

    json.dump(labels, open(os.path.join(path, "labels.json"), "w"), indent=2)

def generate_bonus():
    path = os.path.join(OUTPUT_DIR, "bonus")
    os.makedirs(os.path.join(path, "images"), exist_ok=True)
    labels = {}

    for i in tqdm(range(BONUS_SAMPLES), desc="Bonus"):
        word = random_case(random_word())
        font = random.choice(FONTS)

        is_red = random.random() < 0.5
        bg_color = (255, 0, 0) if is_red else (0, 255, 0)

        render_word = word[::-1] if is_red else word

        img = render_text(
            render_word,
            font_path=font,
            font_size=random.randint(28, 40),
            bg_color=bg_color,
            noise=True,
            rotate=True
        )

        fname = f"{i}.png"
        img.save(os.path.join(path, "images", fname))

        # Label is ORIGINAL word
        labels[fname] = word

    with open(os.path.join(path, "labels.json"), "w") as f:
        json.dump(labels, f, indent=2)



In [24]:

generate_easy()
generate_hard()
generate_bonus()
print("✅ Dataset generation complete.")


Easy: 100%|██████████| 500/500 [00:01<00:00, 423.30it/s]
Hard: 100%|██████████| 1000/1000 [00:05<00:00, 190.75it/s]
Bonus: 100%|██████████| 1000/1000 [00:04<00:00, 222.04it/s]

✅ Dataset generation complete.





In [None]:
import os
import json
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter
from tqdm import tqdm

# ======================
# CONFIGURATION
# ======================
OUTPUT_DIR = "dataset"
IMAGE_SIZE = (200, 64)
WORDS = [
    "Acnestis", "Ephemeral", "Byzantine", "Neural",
    "Captcha", "Vision", "Learning", "Model",
    "Python", "Torch", "Dataset", "Noise"
]

# Samples count
EASY_SAMPLES = 500
HARD_SAMPLES = 1000
BONUS_SAMPLES = 1000

# Create folder structure
for subset in ["easy", "hard", "bonus"]:
    os.makedirs(os.path.join(OUTPUT_DIR, subset, "images"), exist_ok=True)

# ======================
# UTILITIES
# ======================

def load_fonts():
    """Finds common system fonts for macOS/Linux/Windows."""
    paths = [
        "/System/Library/Fonts/Supplemental/Arial.ttf", # macOS
        # "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", # Linux
        # "C:\\Windows\\Fonts\\Arial.ttf", # Windows
        # "/Library/Fonts/Arial.ttf"
    ]
    fonts = [p for p in paths if os.path.exists(p)]
    return fonts if fonts else [None]

FONTS = load_fonts()

def add_color_grain(img, intensity=0.18):
    """
    Adds the multicolored static/grain seen in the hard set.
    Adjust intensity (0.0 to 1.0) to control density of noise.
    """
    arr = np.array(img).astype(np.float32)
    # Generate random RGB pixels
    noise = np.random.randint(0, 255, arr.shape, dtype='uint8')
    # Create a mask to apply noise to only a percentage of pixels
    mask = np.random.rand(*arr.shape[:2]) < intensity
    arr[mask] = noise[mask]
    return Image.fromarray(arr.astype(np.uint8))

def render_text_sample(text, font_path, font_size, mode="easy", bg_color=(255, 255, 255)):
    # 1. Background Setup
    if mode == "hard":
        bg_color = (242, 245, 250) # Light bluish-grey background
    
    img = Image.new("RGB", IMAGE_SIZE, bg_color)
    draw = ImageDraw.Draw(img)

    # 2. Robust Font Loading
    try:
        font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
    except:
        font = ImageFont.load_default()

    # 3. Center Text
    bbox = draw.textbbox((0, 0), text, font=font)
    w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
    x, y = (IMAGE_SIZE[0] - w) // 2, (IMAGE_SIZE[1] - h) // 2

    # 4. Text Color (High contrast for clarity)
    if mode == "easy":
        text_color = (0, 0, 0)
    else:
        # Deep blue/purple/black variants like in your example
        text_colors = [(10, 15, 80), (60, 20, 110), (15, 15, 15)]
        text_color = random.choice(text_colors)

    draw.text((x, y), text, fill=text_color, font=font)

    # 5. Effects for Hard/Bonus Sets
    if mode != "easy":
        # Subtle Rotation (-3 to 3 degrees)
        img = img.rotate(random.uniform(-3, 3), resample=Image.BICUBIC, expand=0, fillcolor=bg_color)
        
        # Apply Colorful Static Noise
        img = add_color_grain(img, intensity=0.2 if mode == "hard" else 0.15)
        
        # Micro Blur: Just enough to blend, but kept low (0.25) to remain clear
        img = img.filter(ImageFilter.GaussianBlur(radius=0.25))

    return img

# ======================
# DATASET GENERATION
# ======================

def generate_dataset():
    # --- EASY SET ---
    labels_easy = {}
    for i in tqdm(range(EASY_SAMPLES), desc="Generating Easy Set"):
        word = random.choice(WORDS)
        img = render_text_sample(word, FONTS[0], 36, mode="easy")
        fname = f"{i}.png"
        img.save(os.path.join(OUTPUT_DIR, "easy/images", fname))
        labels_easy[fname] = word
    json.dump(labels_easy, open(os.path.join(OUTPUT_DIR, "easy/labels.json"), "w"), indent=2)

    # --- HARD SET ---
    labels_hard = {}
    for i in tqdm(range(HARD_SAMPLES), desc="Generating Hard Set"):
        # Randomized capitalization
        word = "".join(c.upper() if random.random() < 0.5 else c.lower() for c in random.choice(WORDS))
        font = random.choice(FONTS)
        img = render_text_sample(word, font, random.randint(32, 40), mode="hard")
        fname = f"{i}.png"
        img.save(os.path.join(OUTPUT_DIR, "hard/images", fname))
        labels_hard[fname] = word
    json.dump(labels_hard, open(os.path.join(OUTPUT_DIR, "hard/labels.json"), "w"), indent=2)

    # --- BONUS SET ---
    labels_bonus = {}
    for i in tqdm(range(BONUS_SAMPLES), desc="Generating Bonus Set"):
        word = "".join(c.upper() if random.random() < 0.5 else c.lower() for c in random.choice(WORDS))
        font = random.choice(FONTS)
        
        # Logic: Red = Reversed, Green = Normal
        is_red = random.random() < 0.5
        bg_color = (220, 80, 80) if is_red else (80, 220, 80)
        display_text = word[::-1] if is_red else word
        
        img = render_text_sample(display_text, font, random.randint(32, 40), mode="bonus", bg_color=bg_color)
        fname = f"{i}.png"
        img.save(os.path.join(OUTPUT_DIR, "bonus/images", fname))
        labels_bonus[fname] = word # Store original word as label
    json.dump(labels_bonus, open(os.path.join(OUTPUT_DIR, "bonus/labels.json"), "w"), indent=2)

    print(f"\n✅ Dataset generation complete! Files saved to '{OUTPUT_DIR}'")

if __name__ == "__main__":
    generate_dataset()

Generating Easy Set: 100%|██████████| 500/500 [00:00<00:00, 579.82it/s]
Generating Hard Set: 100%|██████████| 1000/1000 [00:03<00:00, 276.38it/s]
Generating Bonus Set: 100%|██████████| 1000/1000 [00:03<00:00, 276.60it/s]


✅ Dataset generation complete! Files saved to 'dataset'





In [7]:
FONTS

['/System/Library/Fonts/Supplemental/Arial.ttf']