In [103]:
import os
import numpy as np
from PIL import Image, ImageDraw, ImageFont

In [104]:
# standard image size
IMG_SIZE = (32,32)

# dictionary of 'label' : 'character' for all possible chars
CHARS = {'dot':'.','divide':'/', 'asterisk':'*', 'plus':'+', 'minus':'-', 'equal':'=', 'zero':'0', 'one':'1', 'two':'2', 'three':'3', 'four':'4', 'five':'5', 'six':'6', 'seven':'7', 'eight':'8', 'nine':'9', 'comma':',', 'colon':':', 'double_quotes':'"', "single_quote":"'"}

for i in list('ABCDEFGHIJKLMNPQRSTUVWXYZabcdefghijklmnpqrstuvwxyz'):        # exclude upper and lowerase 'o', only '0' is used.
    CHARS[i]=i

# list of fonts to use
FONTS = ['NewYorkSmall-Heavy.otf', 'NewYorkLarge-HeavyItalic.otf', 'SF-Mono-Heavy.otf', 'SF-Pro-Display-Regular.otf', 'SF-Pro-Display-UltralightItalic.otf', 'NewYorkMedium-BlackItalic.otf', 'SF-Pro-Text-Semibold.otf', 'SF-Pro-Display-Heavy.otf', 'SF-Pro-Rounded-Ultralight.otf', 'SF-Pro-Rounded-Black.otf', 'NewYorkExtraLarge-Regular.otf', 'NewYorkLarge-Black.otf', 'SF-Pro-Italic.ttf', 'NewYorkSmall-RegularItalic.otf', 'SF-Pro-Display-Thin.otf', 'SF-Pro-Text-Heavy.otf', 'SF-Compact-Text-LightItalic.otf', 'NewYorkSmall-HeavyItalic.otf', 'SF-Compact-Text-Ultralight.otf', 'NewYorkExtraLarge-Bold.otf', 'NewYorkExtraLarge-MediumItalic.otf', 'SF-Compact-Text-Semibold.otf', 'SF-Mono-HeavyItalic.otf', 'SF-Compact-Text-RegularItalic.otf', 'NewYorkSmall-Bold.otf', 'SF-Compact-Text-SemiboldItalic.otf', 'SF-Pro-Display-SemiboldItalic.otf', 'SF-Compact-Display-Black.otf', 'SF-Compact-Text-Black.otf', 'SF-Mono-MediumItalic.otf', 'NewYorkMedium-Medium.otf', 'NewYorkExtraLarge-Black.otf', 'SF-Compact-Text-Light.otf', 'SF-Pro-Display-Light.otf', 'SF-Pro-Text-RegularItalic.otf', 'SF-Pro.ttf', 'SF-Compact-Display-Light.otf', 'SF-Compact-Display-Medium.otf', 'SF-Mono-Medium.otf', 'NewYorkLarge-Regular.otf', 'NewYorkMedium-MediumItalic.otf', 'SF-Compact-Display-Ultralight.otf', 'SF-Compact-Text-Heavy.otf', 'SF-Pro-Display-Medium.otf', 'SF-Compact-Text-ThinItalic.otf', 'SF-Compact-Rounded-Semibold.otf', 'NewYorkLarge-Bold.otf', 'SF-Compact-Rounded-Ultralight.otf', 'SF-Pro-Display-RegularItalic.otf', 'SF-Compact-Rounded-Black.otf', 'SF-Compact-Rounded-Heavy.otf', 'NewYorkSmall-Regular.otf', 'SF-Compact-Text-Regular.otf', 'NewYorkSmall-MediumItalic.otf', 'SF-Compact-Display-Regular.otf', 'NewYorkSmall-BlackItalic.otf', 'NewYorkLarge-MediumItalic.otf', 'SF-Pro-Display-MediumItalic.otf', 'SF-Pro-Text-BlackItalic.otf', 'NewYorkMedium-Regular.otf', 'Arial Unicode.ttf', 'NewYorkMedium-SemiboldItalic.otf', 'SF-Compact-Rounded-Regular.otf', 'SF-Compact-Text-MediumItalic.otf', 'NewYorkExtraLarge-HeavyItalic.otf', 'SF-Compact-Rounded-Bold.otf', 'SF-Compact-Display-Thin.otf', 'SF-Pro-Text-Black.otf', 'SF-Pro-Rounded-Heavy.otf', 'SF-Compact.ttf', 'NewYorkLarge-Semibold.otf', 'NewYorkMedium-RegularItalic.otf', 'NewYorkExtraLarge-BoldItalic.otf', 'SF-Pro-Display-Semibold.otf', 'SF-Pro-Display-BoldItalic.otf', 'SF-Mono-Semibold.otf', 'NewYorkLarge-Medium.otf', 'NewYorkMedium-Bold.otf', 'NewYorkLarge-RegularItalic.otf', 'NewYorkExtraLarge-Medium.otf', 'SF-Compact-Text-Thin.otf', 'SF-Pro-Rounded-Light.otf', 'NewYorkMedium-Semibold.otf', 'SF-Mono-LightItalic.otf', 'SF-Compact-Italic.ttf', 'SF-Mono-Bold.otf', 'NewYorkSmall-SemiboldItalic.otf', 'SF-Pro-Display-ThinItalic.otf', 'SF-Pro-Rounded-Thin.otf', 'SF-Pro-Text-Thin.otf', 'NewYorkMedium-Black.otf', 'NewYorkMedium-HeavyItalic.otf', 'SF-Mono-Light.otf', 'SF-Pro-Display-LightItalic.otf', 'SF-Compact-Text-BoldItalic.otf', 'SF-Pro-Rounded-Semibold.otf', 'SF-Pro-Text-SemiboldItalic.otf', 'SF-Compact-Text-Bold.otf', 'SF-Compact-Display-Bold.otf', 'SF-Pro-Text-Light.otf', 'NewYorkSmall-BoldItalic.otf', 'SF-Pro-Rounded-Bold.otf', 'SF-Compact-Rounded-Light.otf', 'SF-Pro-Text-UltralightItalic.otf', 'SF-Pro-Display-HeavyItalic.otf', 'SF-Compact-Rounded-Medium.otf', 'NewYorkExtraLarge-SemiboldItalic.otf', 'NewYorkExtraLarge-BlackItalic.otf', 'NewYorkLarge-Heavy.otf', 'NewYorkSmall-Black.otf', 'NewYorkLarge-BoldItalic.otf', 'SF-Mono-Regular.otf', 'SF-Compact-Display-Semibold.otf', 'SF-Pro-Rounded-Regular.otf', 'NewYorkMedium-BoldItalic.otf', 'SF-Pro-Text-HeavyItalic.otf', 'SF-Pro-Text-Regular.otf', 'SF-Pro-Text-MediumItalic.otf', 'SF-Mono-SemiboldItalic.otf', 'SF-Compact-Text-BlackItalic.otf', 'SF-Pro-Text-BoldItalic.otf', 'SF-Compact-Rounded-Thin.otf', 'NewYorkSmall-Medium.otf', 'SF-Pro-Display-Black.otf', 'SF-Pro-Text-ThinItalic.otf', 'NewYorkItalic.ttf', 'SF-Compact-Display-Heavy.otf', 'NewYorkExtraLarge-RegularItalic.otf', 'SF-Pro-Display-Ultralight.otf', 'NewYorkSmall-Semibold.otf', 'SF-Pro-Text-LightItalic.otf', 'NewYorkMedium-Heavy.otf', 'NewYork.ttf', 'NewYorkExtraLarge-Semibold.otf', 'SF-Compact-Text-UltralightItalic.otf', 'SF-Compact-Text-HeavyItalic.otf', 'SF-Pro-Text-Ultralight.otf', 'NewYorkLarge-SemiboldItalic.otf', 'SF-Pro-Text-Bold.otf', 'SF-Compact-Text-Medium.otf', 'SF-Pro-Display-BlackItalic.otf', 'SF-Pro-Rounded-Medium.otf', 'SF-Pro-Text-Medium.otf', 'NewYorkExtraLarge-Heavy.otf', 'SF-Mono-BoldItalic.otf', 'NewYorkLarge-BlackItalic.otf', 'SF-Pro-Display-Bold.otf', 'SF-Mono-RegularItalic.otf']

VERT_OFFSET = 3
HORIZ_OFFSET = 0

In [105]:
def get_label_list():
    return [x[0] for x in CHARS.items()]
print(get_label_list())

['dot', 'divide', 'asterisk', 'plus', 'minus', 'equal', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'comma', 'colon', 'double_quotes', 'single_quote', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [106]:
def print_cleaned_fonts(FONTS):
    font_list = []
    # print(len(FONTS))
    FONTS = list(set(FONTS))
    # print(len(FONTS))
    for font_name in FONTS:
        try:
            font_name = font_name if font_name.count('.') > 0 else font_name+'.otf'
            font = ImageFont.truetype(f'{font_name}', 5)
            font_list.append(font_name)
        except:
            pass
    print(font_list)
    print(len(font_list))
# print(FONTS)
print_cleaned_fonts(FONTS)

['NewYorkSmall-Heavy.otf', 'SF-Mono-Heavy.otf', 'NewYorkLarge-HeavyItalic.otf', 'SF-Pro-Display-UltralightItalic.otf', 'SF-Pro-Display-Regular.otf', 'NewYorkMedium-BlackItalic.otf', 'SF-Pro-Text-Semibold.otf', 'SF-Pro-Display-Heavy.otf', 'SF-Pro-Rounded-Ultralight.otf', 'SF-Pro-Rounded-Black.otf', 'NewYorkExtraLarge-Regular.otf', 'NewYorkLarge-Black.otf', 'SF-Pro-Italic.ttf', 'NewYorkSmall-RegularItalic.otf', 'SF-Pro-Display-Thin.otf', 'SF-Pro-Text-Heavy.otf', 'SF-Compact-Text-LightItalic.otf', 'NewYorkSmall-HeavyItalic.otf', 'SF-Compact-Text-Ultralight.otf', 'NewYorkExtraLarge-Bold.otf', 'NewYorkExtraLarge-MediumItalic.otf', 'SF-Compact-Text-Semibold.otf', 'SF-Mono-HeavyItalic.otf', 'SF-Compact-Text-RegularItalic.otf', 'NewYorkSmall-Bold.otf', 'SF-Compact-Text-SemiboldItalic.otf', 'SF-Pro-Display-SemiboldItalic.otf', 'SF-Compact-Display-Black.otf', 'SF-Compact-Text-Black.otf', 'SF-Mono-MediumItalic.otf', 'NewYorkMedium-Medium.otf', 'NewYorkExtraLarge-Black.otf', 'SF-Compact-Text-Light

In [107]:
def generate_character_image(character, font_name, font_size=30):
    # Create a blank image with white background
    image = Image.new("L", IMG_SIZE, "white")
    draw = ImageDraw.Draw(image)
    
    # Load the specified font
    # font = ImageFont.truetype(f'{font_name}.ttf', font_size)
    font_name = font_name if font_name.count('.') > 0 else font_name+'.otf'
    font = ImageFont.truetype(f'{font_name}', font_size)
    
    # Calculate text size and position
    _,_,w,h = draw.textbbox((0,0),text=character, font=font)
    position = ((IMG_SIZE[0] - w) // 2 - HORIZ_OFFSET, (IMG_SIZE[1] - h) // 2 - VERT_OFFSET)
    # print(f'width: {w}, height: {h}, position: {position}')
    
    # Draw the character onto the image
    draw.text(position, character, fill="black", font=font)
    
    return image

In [108]:
def downscale(image):
    # Convert the image to grayscale
    image = image.convert("L")
    
    # Convert the image to a numpy array
    image_array = np.array(image)
    
    # Find the bounding box of the black character
    non_zero_pixels = np.nonzero(image_array)
    min_x = np.min(non_zero_pixels[1])
    max_x = np.max(non_zero_pixels[1])
    min_y = np.min(non_zero_pixels[0])
    max_y = np.max(non_zero_pixels[0])
    
    # Calculate the width and height of the bounding box
    width = max_x - min_x + 1
    height = max_y - min_y + 1
    
    # Calculate the scale factor to fit the bounding box into a 32x32 square
    scale_factor = min(32 / width, 32 / height)
    
    # Calculate the new width and height after scaling
    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)
    
    # Resize the image using the calculated width and height
    resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
    
    # Create a new blank image with a 32x32 size
    downscaled_image = Image.new("L", (32, 32), "white")
    
    # Calculate the position to paste the resized image in the center of the blank image
    paste_position = ((32 - new_width) // 2, (32 - new_height) // 2)
    
    # Paste the resized image onto the blank image
    downscaled_image.paste(resized_image, paste_position)
    
    return downscaled_image
    

In [109]:
def gen_and_save(character, name, folder, font):
    # Generate the image
    image = generate_character_image(character, font)
    image = downscale(image)
    # Save the image
    image.save(f'{folder}/{font.split(".")[0]}.png')

In [110]:
def handle_char(char, name, folder):
    if name.isupper():
        name='_'+name
    folder=f'{folder}/{name}'
    if not os.path.exists(folder):
        os.makedirs(folder)
    for font in FONTS:
        gen_and_save(char, name, folder, font)

In [111]:
IMG_SIZE = (IMG_SIZE[0]+30, IMG_SIZE[1]+30)

folder='dataset'
if not os.path.exists(folder):
    os.makedirs(folder)
print(f'NUM_LABELS = {len(CHARS)}')

for name, char in CHARS.items():
    handle_char(char=char, name=name, folder=folder)

NUM_LABELS = 70
