In [None]:
from __future__ import annotations

import json
import os
from dataclasses import dataclass
from typing import Optional

from tqdm import tqdm

import numpy as np
import bs4

import character_utility as charutil
from ipywidgets_helper import render_images
from utility import pathstr

In [None]:
@dataclass
class Kvg:
    kvgid: str
    name: Optional[str]
    part: Optional[str]
    position: Optional[str]
    svg: list[str]
    children: list[Kvg]
    
    @staticmethod
    def parse(char, *, log=False):
        code = format(ord(char), "#07x")[len("0x"):]
        if log: print(f"code: {code}")

        with open(pathstr("kanjivg", "kanji", f"{code}.svg"), encoding="utf-8") as f:
            soup = bs4.BeautifulSoup(f, features="xml")

        soup = soup.select(f"g[id='kvg:{code}']")[0]

        if log: print(soup)

        def parse(soup, level=0):
            nonlocal log

            kvgid = soup.get("id")[len("kvg:"):]
            name = soup.get("kvg:element")
            part = soup.get("kvg:part")
            position = soup.get("kvg:position")
            svg = [path.attrs["d"] for path in soup.find_all("path", recursive=False)]
            children = [parse(c, level + 1) for c in soup.find_all("g", recursive=False)]

            if log: print("  " * level + (f"{name}" if part is None else f"{name} ({part})"))
            
            return Kvg(kvgid=kvgid, name=name, part=part, position=position, svg=svg, children=children)

        return parse(soup)
    
    @staticmethod
    def from_dict(dct):
        return Kvg(**dct)
    
    def to_dict(self):
        from dataclasses import asdict
        return asdict(self)
    
    @property
    def charcode(self):
        return self.kvgid.split("-")[0]
    
    @property
    def directory_path(self):
        return pathstr("output", self.charcode[:-2] + "00", self.charcode)
    
    def get_image_path(self, image_size: int, padding: int, stroke_width: int):
        return pathstr(
            self.directory_path,
            f"{image_size}x,pad={padding},sw={stroke_width} {self.kvgid}.png",
        )
    
    def draw(self, image_size, padding, stroke_width):
        import subprocess
        from uuid import uuid4
        
        from PIL import Image, ImageMath
        
        image = Image.new("L", (image_size, image_size), 0)
        
        if len(self.svg):
            tmp_filename = f"tmp_{uuid4()}"

            svg_paths = [f'<path stroke="white" stroke-width="{stroke_width * 109 / (image_size - padding * 2)}" fill="none" stroke-linecap="round" stroke-linejoin="round" d="{path}"/>' for path in self.svg]
            svg = f'<?xml version="1.0" encoding="UTF-8"?><svg xmlns="http://www.w3.org/2000/svg" width="109" height="109" viewBox="0 0 109 109">{"".join(svg_paths)}</svg>'

            svg_filename = f"{tmp_filename}.svg"
            with open(svg_filename, "w") as f:
                print(svg, file=f)

            png_filename = f"{tmp_filename}.png"
            subprocess.run([
                "convert",
                "-background", "black",
                "-resize", f"{image_size - padding * 2}x{image_size - padding * 2}",
                svg_filename, png_filename,
            ])
            
            png = Image.open(png_filename)
            if png.mode == "L":
                pass
            elif png.mode == "RGB" or png.mode == "RGBA":
                png = png.convert("L")
            elif png.mode == "I":
                png = ImageMath.eval("image >> 8", image=png).convert("L")
            else:
                raise Exception(f"unknown mode: {png.mode} in {self.kvgid}")
            assert png.mode == "L"
            
            image.paste(png, (padding, padding))
            
            os.remove(svg_filename)
            os.remove(png_filename)

        images = []
        for child in self.children:
            child_images = child.draw(image_size=image_size, padding=padding, stroke_width=stroke_width)
            images += child_images
            image = ImageMath.eval("image | child", image=image, child=child_images[0][1]).convert("L")
        
        images.insert(0, (self, image))
        
        return images


def test():
    kvg = Kvg.parse("標", log=True)
    print()
    
    print(json.dumps(kvg.to_dict(), ensure_ascii=False, indent=2))
    print()
    
    images = kvg.draw(image_size=64, padding=4, stroke_width=2)
    
    return render_images([image for _, image in images])
    
test()

In [None]:
def save_info(chars):
    assert isinstance(chars, list)
    
    pbar = tqdm(chars)
    for char in pbar:
        kvg = Kvg.parse(char)
        pbar.set_postfix(kvgid=kvg.kvgid, name=kvg.name)
        
        os.makedirs(kvg.directory_path, exist_ok=True)
        with open(pathstr(kvg.directory_path, f"{kvg.kvgid}.json"), "w") as f:
            json.dump(kvg.to_dict(), f)


# save_info(charutil.kanjis.all())

In [None]:
def save_images(chars, image_size, padding, stroke_width):
    assert isinstance(chars, list)
    
    pbar = tqdm(chars)
    for char in pbar:
        kvg = Kvg.parse(char)
        pbar.set_postfix(kvgid=kvg.kvgid, name=kvg.name)
        
        images = kvg.draw(image_size=image_size, padding=padding, stroke_width=stroke_width)
        for kvg, image in images:
            os.makedirs(kvg.directory_path, exist_ok=True)
            image.save(kvg.get_image_path(image_size=image_size, padding=padding, stroke_width=stroke_width))


# save_images(charutil.kanjis.all(), image_size=64, padding=4, stroke_width=2)

In [None]:
!find ./output -type f -name "*.png" | wc -l