In [10]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import time
import io
import cv2
import textwrap
import matplotlib.patches as patches
import math
import os
from pathlib import Path
from datetime import datetime
import json

seed = 42
np.random.seed(seed)

In [11]:
img_dir_path = Path("output", "images")
lbl_dir_path = Path("output", "labels")
jsn_dir_path = Path("output")
today = f'{(int(datetime.now().strftime("%m%d")) - int("1001")):04}'
imgfn_prefix = f"image_{today}_"
jsnfn_prefix = f"label_{today}"
startno = 0
endno = 1e8
wiki_maxnum = 100


In [12]:

def edit_dist_score(a, b, add=0.4, remove=0.4, replace=2):
    len_a = len(a) + 1
    len_b = len(b) + 1
    arr = [[-1 for col in range(len_a)] for row in range(len_b)]
    arr[0][0] = 0
    for row in range(1, len_b):
        arr[row][0] = arr[row - 1][0] + add
    for col in range(1, len_a):
        arr[0][col] = arr[0][col - 1] + remove
    def go(row, col):
        if (arr[row][col] != -1):
            return arr[row][col]
        else:
            dist1 = go(row - 1, col) + add
            dist2 = go(row, col - 1) + remove
            dist3 = go(row - 1, col - 1)
            arr[row][col] = min(dist1, dist2, dist3) if (b[row - 1] == a[col - 1]) else min(dist1, dist2, dist3 + replace)
        return arr[row][col]
    return (max(len(a) - go(len_b - 1, len_a - 1), 0) / max(len(a), 1))

dummy_img = Image.open("mizuho.png")
def fetch_image(randomn=-1, blacklist = [], saveflag = True):
    maxn = 25315
    while True:
        if (randomn <= 0 or randomn > maxn):
            randomn = int(np.random.random()*maxn)
            if not randomn in blacklist:
                break
            else:
                randomn = -1
    if not saveflag:
        return dummy_img
    url1 = f"https://www.irasutoya.com/feeds/posts/summary?start-index={randomn}&max-results=1&alt=json-in-script&callback=showLucky"
    response1 = requests.get(url1)
    response1_text = response1.text.split(",")
    for r1txt in response1_text:
        if "href" in r1txt and "default" in r1txt:
            url2 = r1txt.split("\"")[3].replace("\\", "")
            response2 = requests.get(url2)
            response2_text = response2.text.split("\'")
            for r2txt in response2_text:
                if "irasu" in r2txt:
                    url3 = r2txt
                    response3 = requests.get(url3)
                    response3_text = response3.text
                    soup3 = BeautifulSoup(response3_text)
                    soup_title = soup3.find_all('title')[0].get_text().split("|")[0].strip()
                    soup_imgs = soup3.find_all('img')
                    eq1 = lambda soup_title, soup_img_alt, f: soup_title in soup_img_alt
                    eq2 = lambda soup_title, soup_img_alt, f: edit_dist_score(soup_title, soup_img_alt) > 0.75
                    eq3 = lambda soup_title, soup_img_alt, f: f 
                    eqs = [eq1, eq2, eq3]
                    for i in range(len(eqs)):
                        flag = False
                        for soup_img in soup_imgs:
                            try:
                                if eqs[i](soup_title, soup_img["alt"], flag):
                                    random_image_url = soup_img["src"]
                                    print(f"No.{randomn} eq{i}", "alt:", soup_img["alt"], ", image url: ", random_image_url)
                                    random_img = Image.open(io.BytesIO(requests.get(random_image_url).content))
                                    return random_img
                                if soup_img["alt"] in "このエントリーをはてなブックマークに追加":
                                    flag = True
                            except:
                                pass
            break

def check_img(img):
    plt.figure(figsize=(5,5))
    plt.imshow(img)

In [13]:
def read_wikidata(n = -1):
    wikipath = "wiki.txt"
    wikidata = []
    with open(wikipath) as f:
        for s_line in f:
            wd = s_line.rstrip()
            wikidata.append(wd)
            if len(wikidata) > n and n > 0:
                break
    return wikidata

In [14]:
jpfonts = ['HGSoeiPresenceEB', 'MS Mincho',
            'HGMaruGothicMPRO', 'Microsoft JhengHei', 'Microsoft YaHei',
            'BIZ UDGothic', 'HGGyoshotai', 'HGMinchoE',
            'MS Gothic', 'UD Digi Kyokasho N-B', 'Yu Gothic',
            'HGGothicE', 'BIZ UDMincho', 'Yu Mincho',#15
            'SimSun', 'HGGothicM', 'HGMinchoB',
            'Meiryo', 'HGSoeiKakugothicUB', 'HGKyokashotai',
            'UD Digi Kyokasho N-R', 'HGSoeiKakupoptai', 'HGSeikaishotaiPRO']
width, height = 640, 480



In [15]:
def hex_to_rgb(hex):  
    return tuple(int(hex[i:i+2], 16) for i in (0, 2, 4))  

def luminance(rgb):  
    r, g, b = rgb  
    a = [x/255 for x in (r, g, b)]  
    for i in range(3):  
        if a[i] <= 0.03928:  
            a[i] /= 12.92  
        else:  
            a[i] = ((a[i]+0.055)/1.055)**2.4  
    return 0.2126*a[0] + 0.7152*a[1] + 0.0722*a[2]  

def contrast_ratio(color1, color2):
    l1 = luminance(color1)  
    l2 = luminance(color2)  
    if l1 > l2:  
        return (l1 + 0.05) / (l2 + 0.05)  
    else:  
        return (l2 + 0.05) / (l1 + 0.05)  

def is_readable(color1, color2):  
    # WCAG 2.0 Level AA requires a contrast ratio of at least 4.5:1  
    return contrast_ratio(color1, color2) >= 4.5  

In [16]:
def generate_random_colorcode():
    cc = "#"
    dic = [str(x) for x in range(10)] + ["a", "b", "c", "d", "e", "f"]
    for i in range(6):
        ri = np.random.randint(0, 16)
        cc += dic[ri]
    return cc

def get_text_bw_color(bg_color):  
    r = int(bg_color[1:3], 16)
    g = int(bg_color[3:5], 16)
    b = int(bg_color[5:7], 16)
    brightness = (r * 299 + g * 587 + b * 114) / 1000  
    if brightness > 128:  
        return "#000000"  # 黒色  
    else:  
        return "#FFFFFF"  # 白色

def parse_rgb(colorcode):
    r, g, b = int(colorcode[1:3], 16), int(colorcode[3:5], 16), int(colorcode[5:7], 16)
    return (r,g,b)

def generate_str_colorcode(facecolor):
    # fr, fg, fb = int(facecolor[1:3], 16), int(facecolor[3:5], 16), int(facecolor[5:7], 16)
    facecolor_rgb = parse_rgb(facecolor)
    for _ in range(100):
        color = generate_random_colorcode()
        color_rgb = parse_rgb(color)
        if is_readable(color_rgb, facecolor_rgb):
            res = color
            break
    else:
        res = get_text_bw_color(facecolor)
        #colors = [get_text_bw_color(facecolor)]
    
    # res = np.random.choice(colors)
    return res

def expand_rect(rect, rect_margin):
    points = []
    for x, y in rect:
        points.append((x, y))
    x_values, y_values = zip(*points)

    center_x = sum(x_values) / len(points)
    center_y = sum(y_values) / len(points)

    dx = points[1][0] - points[0][0]
    dy = points[1][1] - points[0][1]
    angle = math.atan2(dy, dx)  

    cos = math.cos(angle)
    sin = math.sin(angle)

    rotated_points = []
    for x, y in points:
        xr = (x - center_x) * cos + (y - center_y) * sin + center_x
        yr = -(x - center_x) * sin + (y - center_y) * cos + center_y
        rotated_points.append((xr, yr))

    x_values, y_values = zip(*rotated_points)

    x_min = min(x_values) - rect_margin
    x_max = max(x_values) + rect_margin
    y_min = min(y_values) - rect_margin
    y_max = max(y_values) + rect_margin

    rect_points = [
        (x_min, y_min),
        (x_max, y_min),
        (x_max, y_max),
        (x_min, y_max)
    ]

    expanded_points = []
    cos = math.cos(-angle)
    sin = math.sin(-angle)
    for x, y in rect_points:
        xr = (x - center_x) * cos + (y - center_y) * sin + center_x
        yr = -(x - center_x) * sin + (y - center_y) * cos + center_y
        expanded_points.append((xr, yr))

    return expanded_points

def rotate_points(points, rotation):
    theta = np.radians(rotation)  
    cos, sin = np.cos(theta), np.sin(theta)  
    rotation_matrix = np.array([[cos, -sin], [sin, cos]])

    points_rotated = []
    minx = width
    miny = height
    minx_org = width
    miny_org = height
    for point in points:
        point = np.array(point)
        point_translated = point - np.array(points[0])
        point_rotated = np.dot(rotation_matrix, point_translated)
        minx_org = min(minx_org, point[0])
        miny_org = min(miny_org, point[1])
        
        point_rotated += np.array(points[0])
        minx = min(minx, point_rotated[0])
        miny = min(miny, point_rotated[1])
        points_rotated.append(point_rotated)

    dx = minx_org - minx
    dy = miny_org - miny
    for i, p in enumerate(points_rotated):
        points_rotated[i] = (p[0] + dx, p[1] + dy)

    return points_rotated

def draw_polygon(ax, points, color = 'r'):
    polygon = patches.Polygon(points, fill=None, edgecolor=color)
    ax.add_patch(polygon)

def select_str():
    global wikidata
    res = []
    cn = 0
    for i in range(10):
        d = wikidata.pop(0)
        res.append(d)
        cn += len(d)
        if cn >= 20 and i>=2-1 and (np.random.random()*2.6*i/3 > 1 or cn >= 400):
            break
    # print(len(res), cn, res)
    return res

def is_inside_figure(rect):
    x_values, y_values = zip(*rect)
    if min(x_values) >= 0 and max(x_values) <= width and min(y_values) >= 0 and max(y_values) <= height:  
        return True
    return False

def is_halfwidth(char):
    code_point = ord(char)
    if (0x0020 <= code_point <= 0x007E) or (0xFF61 <= code_point <= 0xFF9F) or (0x0030 <= code_point <= 0x0039) or (0x0041 <= code_point <= 0x005A) or (0x0061 <= code_point <= 0x007A) or (0x0021 <= code_point <= 0x002F) or (0x003A <= code_point <= 0x0040) or (0x005B <= code_point <= 0x0060) or (0x007B <= code_point <= 0x007E):
        return True
    else:
        return False

def counthalf(s):
    count = 0
    for c in s:
        if is_halfwidth(c):
            count += 1
    return count

def set_fontsize(len_s):
    maxfont = 10
    if len_s <= 10:
        maxfont = 14
    if len_s <= 5:
        maxfont = 18
    fontsize = np.random.randint(8, maxfont)
    return fontsize

def calc_charmaxnum(s):
    for _ in range(10):
        charmaxnum_horizon = np.random.randint(5, 16)
        if not len(s) - charmaxnum_horizon == 1:
            charmaxnum_horizon = min(charmaxnum_horizon, len(s))
            charmaxnum_vertical = ((len(s)-1)//charmaxnum_horizon + 1)
            charmaxnum_horizon_display = 0
            for j in range(charmaxnum_vertical):
                ss = s[charmaxnum_horizon*j : charmaxnum_horizon*(j+1)]
                lm = len(ss) - 0.5 * counthalf(ss)
                charmaxnum_horizon_display = max(charmaxnum_horizon_display, lm)
            break
    return charmaxnum_horizon, charmaxnum_vertical, charmaxnum_horizon_display

def calc_strarea(ax, s, rects, font, fontsize, saveflag):
    rect_margin = 0 #7
    for i in range(100):
        charmaxnum = calc_charmaxnum(s)
        charmaxnum_horizon, charmaxnum_vertical, charmaxnum_horizon_display = charmaxnum

        x, y = np.random.randint(0, width), np.random.randint(0, height)
        rotation = np.random.randint(-90, 90)
        sx, sy = x, y
        ex = sx + (charmaxnum_horizon_display * 2 * fontsize * 0.9)
        ey = sy + (charmaxnum_vertical * 2 * fontsize * fontspace[font])

        points = [(sx, sy), (sx, ey), (ex, ey), (ex, sy)]
        points_rotated = rotate_points(points, rotation)
        points_rotated_expanded = expand_rect(points_rotated, rect_margin)

        if not is_inside_figure(points_rotated_expanded):
            continue
        if is_non_overlapping(points_rotated_expanded, rects):
            endstate = 0
            break
    else:
        if saveflag:
            print("endless overlap")
        endstate = -1
    points_rotated_expanded = [(float(tx), float(ty)) for tx, ty in points_rotated_expanded]
    info = (x, y, rotation, charmaxnum_horizon, points_rotated_expanded)
    return endstate, info

def make_answertext(img_filename, str_labels):
    anslist = [img_filename]
    sorted_data = sort_label(str_labels)
    for sd in sorted_data:
        anslist.append(sd)
    ansstr = "\t".join(anslist)
    return ansstr

def calc_ans_point(points):
    x = min(point[0] for point in points)
    y = max(point[1] for point in points)
    return (x, y)

def is_swap(la, lb):
    la_point = calc_ans_point(la["points"])
    lb_point = calc_ans_point(lb["points"])
    dx = lb_point[0] - la_point[0]
    dy = lb_point[1] - la_point[1] 
    if dy > 10:
        return True
    elif dx < 0 and dy > -10:
        return True
    return False

def sort_label(str_labels):
    for i in range(len(str_labels)):
        for j in range(i+1, len(str_labels)):
            if is_swap(str_labels[i], str_labels[j]):
                str_labels[j], str_labels[i] = str_labels[i], str_labels[j]
    res = []
    for sl in str_labels:
        res.append(sl["string"])
    return res


In [17]:
def is_non_overlapping(new_rect, existing_rects):
    for existing_rect in existing_rects:
        if is_rect_intersecting(new_rect, existing_rect):
            return False
        elif is_rect_insideoutside(new_rect, existing_rect):
            return False
    return True

def is_point_inside_rect(point, rect):  
    x, y = point  
    n = len(rect)  
    inside = False  
    p1x, p1y = rect[0]  
    for i in range(n + 1):  
        p2x, p2y = rect[i % n]  
        if y > min(p1y, p2y):  
            if y <= max(p1y, p2y):  
                if x <= max(p1x, p2x):  
                    if p1y != p2y:  
                        xints = (y - p1y) * (p2x - p1x) / (p2y - p1y) + p1x  
                    if p1x == p2x or x <= xints:  
                        inside = not inside  
        p1x, p1y = p2x, p2y  
    return inside

def is_rect_insideoutside(rect1, rect2):
    # rect1がrect2の中にあるならTrue
    # rect2がrect1の中にあるならTrue
    f = []
    for p in rect1:
        f.append(is_point_inside_rect(p, rect2))
    a = f[0]
    for i in range(1,len(f)):
        if not f[i] == a:
            return False
    if a is True:
        return True
    q = rect2[0]
    if is_point_inside_rect(q, rect1):
        return True
    return False

def is_cross_lines(line1, line2):
    x1, y1 = line1[0]
    x2, y2 = line1[1]
    x3, y3 = line2[0]
    x4, y4 = line2[1]
    ta = (x3 - x4) * (y1 - y3) + (y3 - y4) * (x3 - x1)
    tb = (x3 - x4) * (y2 - y3) + (y3 - y4) * (x3 - x2)
    tc = (x1 - x2) * (y3 - y1) + (y1 - y2) * (x1 - x3)
    td = (x1 - x2) * (y4 - y1) + (y1 - y2) * (x1 - x4)
    return tc * td < 0 and ta * tb < 0

def is_rect_intersecting(newrect, existrects):
    for i in range(4):
        for j in range(i + 1, 4):
            line1 = [newrect[i],newrect[j]]
            for ii in range(4):
                for jj in range(ii+1, 4):
                    line2 = [existrects[ii], existrects[jj]]
                    if is_cross_lines(line1, line2):
                        return True
    return False



In [None]:
wikidata = read_wikidata(wiki_maxnum)
np.random.seed(seed)
np.random.shuffle(wikidata)

fontspace = {'HGSoeiPresenceEB':1, 'MS Mincho':1, 'HGMaruGothicMPRO':1,
            'Microsoft JhengHei':1.1, 'Microsoft YaHei':1.12, 'BIZ UDGothic':1.01,
            'HGGyoshotai':0.93, 'HGMinchoE':1, 'MS Gothic':0.97,
            'UD Digi Kyokasho N-B':0.95, 'Yu Gothic':0.97, 'HGGothicE':1,
            'BIZ UDMincho':1, 'Yu Mincho':1.02, 'SimSun':0.97, #15
            'HGGothicM':0.96, 'HGMinchoB':1.02, 'Meiryo':1.02,
            'HGSoeiKakugothicUB':0.99, 'HGKyokashotai':0.89, 'UD Digi Kyokasho N-R':0.93,
            'HGSoeiKakupoptai':1.02, 'HGSeikaishotaiPRO':0.98}

def trans_answer_s(s):
    dic = {"．":".", "，":",", "？":"?", "！":"!", "：":":", "；":";", "／":"/", "＜":"<", "＞":">", "＝":"=", "＋":"+", "ー":"-", "＆":"&", "％":"%", "＃":"#", "￥":"\\", "（":"(", "）":")", "［":"[", "］":"]"}
    trans = str.maketrans(dic) 
    return s.translate(trans)

def draw_str(ax, str, facecolor, saveflag):
    rects = []
    strlabel_list = []
    strdetail_list = []
    for i, s in enumerate(str):
        strcolor = generate_str_colorcode(facecolor)
        font = np.random.choice(jpfonts)
        fontsize = set_fontsize(len(s))
        endstate, info_strarea = calc_strarea(ax, s, rects, font, fontsize, saveflag)
        x, y, rotation, charmaxnum_horizon, points = info_strarea
        if endstate == -1:
            continue
        rects.append(points)
        # draw_polygon(ax, points, 'y')
        answer_s = trans_answer_s(s) 
        dic_label = {"id":i, "string": answer_s, "points": points}
        dic_detail = {"id":i, "rotation": rotation, "fontsize": fontsize, "fontname": font, "string_color": strcolor, "character_maxnum_horizon": charmaxnum_horizon}
        strlabel_list.append(dic_label)
        strdetail_list.append(dic_detail)
        ax.text(x, y, textwrap.fill(s, width=charmaxnum_horizon), rotation=rotation, fontsize=fontsize, fontname=font, color=strcolor,  horizontalalignment='left', verticalalignment='bottom')
    return strlabel_list, strdetail_list

def draw_backimg(ax, img):
    imgsize_w = np.random.randint(350, 600)
    imgsize_h = int(imgsize_w * (img.height / img.width))
    img = img.resize((imgsize_w, imgsize_h))
    x, y = int((width - imgsize_w) / 2.0), int((height - imgsize_h) / 2.0)
    ax.imshow(img, extent=(x, x + imgsize_w, y, y + imgsize_h))

def draw_sample_img(img, str, imgout_filename, saveflag):
    fig, ax = plt.subplots(figsize=(width / 100.0, height / 100.0))
    facecolor= generate_random_colorcode()
    fig.set_facecolor(facecolor)
    ax.set_facecolor(facecolor)
    labeldata = {"image_name": imgout_filename.name}
    draw_backimg(ax, img)
    strlabel_list , strdetail_list= draw_str(ax, str, facecolor, saveflag)
    answer = make_answertext(imgout_filename.name, strlabel_list)
    labeldata["string_labels"] = strlabel_list
    labeldata["string_details"] = strdetail_list
    labeldata["answer"] = answer

    # 640 * 480
    margin_img = 2
    draw_polygon(ax, [(0-margin_img, 0-margin_img), (width+margin_img, 0-margin_img), (width+margin_img, height+margin_img), (0-margin_img, height+margin_img)], 'b')

    ax.set_xlim(0, width)
    ax.set_ylim(0, height)
    ax.axis('off')

    if saveflag:
        plt.show()
        fig.savefig(imgout_filename, dpi=300, edgecolor="none", bbox_inches='tight', pad_inches=0)#facecolor)
        make_annotationfile(imgout_filename, labeldata["string_labels"])
    else:
        plt.close('all') 
    return labeldata

def generate_sample_img(n, saveflag):
    imgout_filename = img_dir_path / f'{imgfn_prefix}{n:010}.png'
    blacklist = [13347, 20003]
    str_in_img = select_str()
    random_img = fetch_image(blacklist=blacklist, saveflag=saveflag)
    labeldata = draw_sample_img(random_img, str_in_img, imgout_filename, saveflag)
    return labeldata

def calc_yolo_cxywh(points):
    lux = min(point[0] for point in points)
    luy = height - max(point[1] for point in points)
    rdx = max(point[0] for point in points)
    rdy = height - min(point[1] for point in points)
    w = abs(rdx - lux)
    h = abs(rdy - luy)
    cx = lux + w / 2
    cy = luy + h / 2
    res = f"0 {cx/width} {cy/height} {w/width} {h/height}"
    return res

def make_annotationfile(fn, strs):
    labelfn = fn.name[:-4] + ".txt"
    labelfn = lbl_dir_path / labelfn
    with open(labelfn, "w") as f:
        for s in strs:
            p = calc_yolo_cxywh(s["points"]) + "\n"
            f.write(p)

def main():
    lblout_filename = jsn_dir_path / f'{jsnfn_prefix}.json'
    lblout_data = []
    for i in range(150):
        try:
            saveflag = (i >= startno and i < endno)
            d = generate_sample_img(i, saveflag)
            if saveflag:
                lblout_data.append(d)
        except:
            print(i, "error --------------------------------------------------------------------------------------------------------")
    # print(lblout_data)
    with open(lblout_filename, 'w', encoding='utf-8') as f:
        json.dump(lblout_data, f, ensure_ascii=False, indent=4)

main()
# os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'