In [1]:
# import lib
import os
from PIL import Image, ImageDraw, ImageFont, ImageTransform
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
from glob import glob
import sys
sys.path.append('../../database')
from db_util import *
from db import *

In [2]:
# set simulator data
fontdir = '../../../data/font/*.ttf'
fontpath = glob(fontdir)
textdata = pd.read_csv('../../../data/textdata.csv',sep=',',header=None)
textdata = list(textdata.values)

In [98]:
# set simulator parameter
imageSize = (256*4, 128)
fontsSize = (10,30)

In [99]:
# some generators
def fontGenerator(fontpath, fontsSize):
    while True:
        fontselection = np.random.randint(0,len(fontpath),1)[0]
        fontsize = np.random.randint(fontsSize[0],fontsSize[1],1)[0]
        yield ImageFont.truetype(fontpath[fontselection], fontsize)
fontgen = fontGenerator(fontpath, fontsSize)

def textGenerator(textdata):
    while True:
        textselection = np.random.randint(0,len(textdata),1)[0]
        yield str(textdata[textselection])       
textgen = textGenerator(textdata[0])

def locGenerator(imageSize):
    while True:
        x = np.random.randint(imageSize[0]//8,imageSize[0]//4,1)[0]
        y = np.random.randint(imageSize[1]//8,imageSize[1]//4,1)[0]
        yield [x,y]
locgen = locGenerator(imageSize)

def textcoordinates(font, text, loc):
    char_table = pd.DataFrame({'x1':[], 'y1':[], 'x2':[], 'y2':[], 'x3':[], 'y3':[],
                                     'x4':[], 'y4':[], 'char':[]})
    for i, char in enumerate(text):
        b1 = font.getsize('A')[1]
        r, b2 = font.getsize(text[:i+1])
        b = b1 if b1 < b2 else b2
        w, _ = font.getmask(char).size
        _, h = font.getmask('A').size
        r += loc[0]
        b += loc[1]
        t = b - h
        l = r - w
        right = int(np.round(r))
        bottom = int(np.round(b))
        top = int(np.round(t))
        left = int(np.round(l))
        if i == 0:
            char_data = pd.DataFrame({'x1':[left-2], 'y1':[top], 
                                     'x2':[left-2], 'y2':[top],
                                     'x3':[left-2], 'y3':[bottom],
                                     'x4':[left-2], 'y4':[bottom], 'char':' '})
            char_table = char_table.append(char_data, ignore_index=True)
        if text[i] != ' ':
            char_data = pd.DataFrame({'x1':[left], 'y1':[top], 
                                     'x2':[right], 'y2':[top],
                                     'x3':[right], 'y3':[bottom],
                                     'x4':[left], 'y4':[bottom], 'char':text[i]})
            char_table = char_table.append(char_data, ignore_index=True)
        if i == len(text)-1:
            char_data = pd.DataFrame({'x1':[right+2], 'y1':[top], 
                                     'x2':[right+2], 'y2':[top],
                                     'x3':[right+2], 'y3':[bottom],
                                     'x4':[left+2], 'y4':[bottom], 'char':' '})
            char_table = char_table.append(char_data, ignore_index=True)
            
    return char_table

def create_image():
    # create image
    img = Image.new('RGB', imageSize, color='White')
    draw = ImageDraw.Draw(img)
    
    # first text
    font = next(fontgen)
    text = next(textgen)
    loc = [10, 64]
    draw.text(loc, text, fill=(0,0,0), font=font)
    tb = textcoordinates(font, text, loc)
    cr = [min(tb['x1']),min(tb['y1'])]-np.random.randint(2,6,2)
    cr = np.append(cr, [max(tb['x3']),max(tb['y3'])]+np.random.randint(2,6,2))
    img = img.crop(cr)
    return img, text

"\nfor x1,y1,_,_,x3,y3,_,_,_ in tb.values:\n    draw.rectangle((x1,y1,x3,y3),None,'#f00')\ndisplay(tb)\nimg\n"

In [189]:
path = '/home/dk/docr2/DB/'
name = 'recog_test_1'
label = 'recog'
numImage = 100
db_data = create_data_db(path=path, name=name, description='datadb_recog',data_class=label)
for idx in tqdm(range(numImage)):
    img, txt = create_image()
    update_data(db_data, index=idx, label=label, ref=label, text=txt, image=img)


  0%|          | 0/100 [00:00<?, ?it/s][A
  5%|▌         | 5/100 [00:00<00:02, 46.53it/s][A
 13%|█▎        | 13/100 [00:00<00:01, 52.62it/s][A
 22%|██▏       | 22/100 [00:00<00:01, 59.57it/s][A
 29%|██▉       | 29/100 [00:00<00:01, 60.50it/s][A
 35%|███▌      | 35/100 [00:00<00:01, 56.45it/s][A
 43%|████▎     | 43/100 [00:00<00:00, 58.13it/s][A
 49%|████▉     | 49/100 [00:00<00:00, 53.04it/s][A
 55%|█████▌    | 55/100 [00:00<00:00, 52.69it/s][A
 61%|██████    | 61/100 [00:01<00:00, 44.81it/s][A
 67%|██████▋   | 67/100 [00:01<00:00, 46.72it/s][A
 72%|███████▏  | 72/100 [00:01<00:00, 45.47it/s][A
 77%|███████▋  | 77/100 [00:01<00:00, 45.14it/s][A
 83%|████████▎ | 83/100 [00:01<00:00, 47.56it/s][A
 90%|█████████ | 90/100 [00:01<00:00, 51.69it/s][A
100%|██████████| 100/100 [00:01<00:00, 53.60it/s][A
