In [None]:
from __future__ import annotations

import json
import os
import random
from typing import Iterable

from tqdm import tqdm

from PIL import Image

import config
from ipywidgets_helper import render_images
from kvg import Kvg
from utility import pathstr, char2code, LImageCompositionResult, compose_L_images

In [None]:
class Composer:
    label2radicalname2kvgids: list[dict[str, list[str]]]
    combinations: dict[tuple[int, ...], int]

    def __init__(self, radical_clustering_path: str, *, log=False):
        with open(pathstr(radical_clustering_path, "label2radicalname2kvgids.json")) as f:
            self.label2radicalname2kvgids = json.load(f)

        kvgid2label: dict[str, int] = {}
        for label, radicalname2kvgids in enumerate(self.label2radicalname2kvgids):
            for kvgids in radicalname2kvgids.values():
                for kvgid in kvgids:
                    kvgid2label[kvgid] = label
        
        with open(pathstr(radical_clustering_path, "decompositions.json")) as f:
            decompositions = json.load(f)
        
        self.combinations: dict[tuple[int, ...], int] = dict()
        broken = 0
        for kvgid, childkvgids in decompositions.items():
            combination = tuple(sorted(set(kvgid2label[c] for c in childkvgids)))
            if len(combination) != len(childkvgids):
                broken += 1
                continue

            self.combinations.setdefault(combination, 0)
            self.combinations[combination] += 1

        if log: print(f"label combinations: {len(self.combinations)} ({broken} removed)")
        if log: print(f"max of combination length: {max(map(lambda c: len(c), self.combinations))}")

    @staticmethod
    def generate(
        kvgids: Iterable[str],
        image_size: int,
        padding: int,
        stroke_width: int,
    ) -> LImageCompositionResult:
        images: list[Image.Image] = []

        for kvgid in kvgids:
            charcode = kvgid.split("-")[0]
            directory_path = config.output_main_kvg_path(charcode)

            images.append(Image.open(pathstr(
                directory_path,
                f"{image_size}x,pad={padding},sw={stroke_width} {kvgid}.png",
            )))

        return compose_L_images(images)
    
    def count_candidates(self):
        counts = []
        for combination in self.combinations.keys():
            c = 1
            for label in combination:
                c *= len(self.label2radicalname2kvgids[label])
            counts.append(c)
        return counts


def test(image_size, padding, stroke_width, n_images, pb_threshold):
    assert 0 <= pb_threshold
    
    composer = Composer(config.output_radical_clustering_path("edu+jis_l1,2", 384, 16, 2, 2), log=True)
    print(f"candidates: {sum(composer.count_candidates())}")

    tried = 0
    images_to_render = []
    while len(images_to_render) < n_images:
        tried += 1

        combination = random.choice(tuple(composer.combinations.keys()))
        kvgids = [random.choice(random.choice(tuple(composer.label2radicalname2kvgids[label].values()))) for label in combination]

        result = Composer.generate(kvgids, image_size, padding, stroke_width)
    
        if (image_size ** 2) * pb_threshold < result.n_blended:
            continue
        images_to_render.append((result.image, f"{result.n_blended}"))
    
    print(f"tried: {tried}")

    return render_images(images_to_render, columns=8)


test(image_size=64, padding=16, stroke_width=2, n_images=64, pb_threshold=0)

In [None]:
def get_used_radicalnames(etlcdb_path, etlcdb_name) -> set[str]:
    used_radicalnames: set[str] = set()

    with open(pathstr(etlcdb_path, f"{etlcdb_name}.json")) as f:
        json_data = json.load(f)

    for item in json_data:
        charname = item["Character"] # ex) "あ"
        assert isinstance(charname, str)

        charcode = char2code(charname)
        directory_path = config.output_main_kvg_path(charcode)

        if not os.path.isfile(pathstr(directory_path, f"{charcode}.json")):
            continue

        with open(pathstr(directory_path, f"{charcode}.json")) as f:
            root_kvg = Kvg.from_dict(json.load(f))
        
        stack = [root_kvg]
        while len(stack):
            kvg = stack.pop()
            stack += kvg.children

            if kvg.name is None:
                continue

            name = kvg.name
            if kvg.part is not None:
                name = f"{name}_{kvg.part}"

            used_radicalnames.add(name)

    return used_radicalnames

In [None]:
def save_random(
    composition_name: str,
    composer: Composer,
    
    n_compositions: int,
    radicalname_filter: set[str],

    image_size: int,
    padding: int,
    stroke_width: int,
    n_blended_threshold: int,
):
    assert 0 <= n_blended_threshold
    
    print(f"candidates: {sum(composer.count_candidates())}")

    compositions: set[tuple[str, ...]] = set()
    if os.path.exists(config.output_composition_path(composition_name)):
        with open(config.output_composition_path(composition_name)) as f:
            for item in json.load(f):
                compositions.add(item)
   
    radicalname_and_kvgid_list_for_label: list[list[tuple[str, str]]] = []
    for radicalname2kvgids in composer.label2radicalname2kvgids:
        radicalname_and_kvgid_list = []
        for radicalname, kvgids in radicalname2kvgids.items():
            for kvgid in kvgids:
                radicalname_and_kvgid_list.append((radicalname, kvgid))
        radicalname_and_kvgid_list_for_label.append(radicalname_and_kvgid_list)

    with tqdm(total=n_compositions) as pbar:
        tried = 0
        while len(compositions) < n_compositions:
            tried += 1
            pbar.set_postfix(tried=tried)

            combination = random.choices(
                tuple(composer.combinations.keys()),
                weights=tuple(composer.combinations.values()),
            )[0]

            radicalnames = []
            kvgids = []
            for label in combination:
                radicalname, kvgid = random.choice(radicalname_and_kvgid_list_for_label[label])
                radicalnames.append(radicalname)
                kvgids.append(kvgid)
            
            if any(map(lambda r: r not in radicalname_filter, radicalnames)):
                continue

            result = composer.generate(kvgids, image_size, padding, stroke_width)
            if n_blended_threshold < result.n_blended:
                continue
            
            l = len(compositions)
            compositions.add(tuple(sorted(kvgids)))
            pbar.update(len(compositions) - l)

    with open(config.output_composition_path(composition_name), "w") as f:
        json.dump({
            "image_size": image_size,
            "padding": padding,
            "stroke_width": stroke_width,
            "n_blended_threshold": n_blended_threshold,
            "compositions": list(compositions),
        }, f)


# save_random(
#     composition_name="ETL8G(imsize=64,pad=4,sw=2,bt=0)",
#     composer=Composer(config.output_radical_clustering_path("edu+jis_l1,2(new_decomp)", 384, 16, 2, 2), log=True),

#     n_compositions=200000,
#     radicalname_filter=get_used_radicalnames("~/datadisk/dataset/etlcdb", "ETL8G"),

#     image_size=64,
#     padding=4,
#     stroke_width=2,
#     n_blended_threshold=0,
# )

In [None]:
def render_saved_images(
    composition_name: str,

    n_images: int,
    image_size: int,
    padding: int,
    stroke_width: int,
):
    with open(config.output_composition_path(composition_name)) as f:
        info = json.load(f)
        compositions = info["compositions"]
        
    images_to_render = []
    for i, kvgids in random.sample(tuple(enumerate(compositions)), n_images):
        images_to_render.append((Composer.generate(kvgids, image_size, padding, stroke_width).image, f"{i=}"))
    return render_images(images_to_render, columns=8)


render_saved_images(
    composition_name="ETL8G(imsize=64,pad=4,sw=2,bt=0)",

    n_images=64,
    image_size=64,
    padding=4,
    stroke_width=2,
)