In [1]:
from time import time
import re
from pathlib import Path

import pandas as pd 

import ollama

from utils import *

In [2]:
model_name = "qwen2.5"  # "llama3.1"
pattern = r'```json\s*(.*?)\s*```'

In [3]:
prompt_zi = """
You are an expert in Chinese language, 

can you generate a holistic view on this chinese character  {zi}
in terms of the following attributes:
含义
字形
读音 
字源
常用词组
成语
例句
短故事
诗词
图片
音频 
视频 
电影
参考资料
有趣网站

(1) give the answer in Chinese 
(2) format the answer in valid json and ensure quotes are properly escaped (specifically avoid double-quotes nested in doube-quotes)
(3) whenever possible, give 5 or more examples for the following attributes:

常用词组
成语
例句
短故事
诗词
图片
音频 
视频 
电影
参考资料
有趣网站

"""

In [4]:
df = pd.read_csv("char360_fib_num.csv")
zi_fib_nums = df.to_dict("records")

In [None]:
for xx in zi_fib_nums:
    zi, fib_num = xx['zi'], xx['fib_num']
    print(f"Processing {zi} ...")
    ts_1 = time()
    try:
        resp = ollama.generate(model=model_name, prompt=prompt_zi.format(zi=zi))
    except Exception as e:
        print(f"[ERROR] {zi} : Failed to call ollama.generate()")
        continue
        
    ts_2 = time()
    del_t = ts_2 - ts_1
    del_t_str = f"{del_t:.3f}"
    json_raw = resp.get('response', "")
    if not json_raw: 
        print(f"[ERROR] No LLM response: {zi}")
        continue

    print(f"... Completed {zi} in {del_t_str} sec")

    match = re.search(pattern, json_raw, re.DOTALL)
    fib_num_str = str(fib_num).zfill(4)
    fp = Path(f"./llm_models/{model_name}") / f"{fib_num_str}-{zi}-1.json"
    if match:
        json_string = match.group(1)
        if not json_string:
            print(f"[WARN] No JSON extracted: {zi}")
            continue
            
        # print(json_string)
        with open(fp, "w", encoding="utf-8") as f:
            f.write(json_string)
    else:
        print(f"[ERROR] No JSON extracted: {zi}")

Processing 气 ...
... Completed 气 in 155.168 sec
Processing 炁 ...
... Completed 炁 in 112.290 sec
Processing 空 ...
... Completed 空 in 93.135 sec
Processing 心 ...
... Completed 心 in 101.023 sec
Processing 日 ...
... Completed 日 in 183.890 sec
Processing 月 ...
... Completed 月 in 94.961 sec
Processing 阳 ...
... Completed 阳 in 124.519 sec
Processing 阴 ...
... Completed 阴 in 147.412 sec
Processing 人 ...
... Completed 人 in 70.126 sec
Processing 儿 ...
