In [51]:
import requests
import json
from time import time, sleep 
from openai import OpenAI 
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Replace with your actual DeepSeek API endpoint and API key
DEEPSEEK_API_URL = "https://api.deepseek.com"
DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY')

In [2]:
deepseek_client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url=DEEPSEEK_API_URL)

In [None]:
def generate_custom_prompt(character):
    """
    Generates a custom prompt for the DeepSeek API based on your template.

    Args:
        character (str): The Chinese character to query.

    Returns:
        str: A formatted prompt for the API.
    """
    prompt = f"""
    You are an expert in Chinese language. 
    Generate a holistic view on the Chinese character '{character}' 
    in terms of the following attributes:

    1. 含义 (meaning): Explain its meaning.
    2. 字形 (character structure): Describe its structure.
    3. 读音 (pronunciation): Provide its pronunciation.
    4. 字源 (etymology): Explain its origin.
    5. 含此字的字 (composite characters): List characters that contain '{character}'.
    6. 同音字 (homophone characters): List other characters that sound like '{character}'.
    7. 常用词组 (common phrases): Provide common phrases containing '{character}' (often 2-characters).
    8. 成语 (idioms): List idioms containing '{character}' (often 4-characters).
    9. 例句 (example sentences): Provide example short sentences or famous quotes using '{character}'.
    10. 短故事 (short stories): Share 1-2 short stories related to '{character}'.
    11. 诗词 (poetry): Include 1-2 famous poems containing and describing '{character}'.
    12. 图片 (images): Suggest simple images illustrating '{character}'.
    13. 音频 (audio): Suggest short audio clips illustrating '{character}'.
    14. 视频 (video): Suggest short video clips illustrating '{character}'.
    15. 电影 (movies): List famous movies related to '{character}' or movie titles containing '{character}'.
    16. 参考资料 (references): Provide additional popular reference materials.
    17. 有趣网站 (interesting websites): Suggest popular websites related to '{character}'.

    Format the output as a valid JSON object.
    """
    return prompt.strip()

In [28]:
def extract_json_string(response):
    # Find the start and end of the JSON string
    start = response.find("```json") + len("```json")
    end = response.find("```", start)

    # Extract the JSON string and strip any leading/trailing whitespace
    json_str = response[start:end].strip()

    return json_str

In [22]:
def call_deepseek_api(prompt, system_message = "You are a helpful assistant", client=None):
    if client is None:
        client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url=DEEPSEEK_API_URL)
    ts_start = time()
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": prompt},
        ],
        stream=False
    )
    ts_stop = time()

    return response.choices[0].message.content, (ts_stop-ts_start)

In [53]:
def main(character = "子", test_flag=False, debug_flag=False):
    # Call the DeepSeek API
    if test_flag:
        prompt = "What is 10! in math"
    else: 
        prompt = generate_custom_prompt(character)
    if debug_flag: 
        print(f"Prompt:\n{prompt}\n")

    response, ts_delta = call_deepseek_api(prompt, client=deepseek_client)
    json_str = extract_json_string(response)
    json_obj = json.loads(json_str)
    json_out = json.dumps(json_obj, indent=2, ensure_ascii=False)
    file_json = f"deepseek/{character}-1.json"
    if debug_flag: 
        print(f"API call completed in {ts_delta} sec, \n Response (JSON):\n {json_out} \n written to {file_json}")
    else: 
        print(f"API call for {character} completed in {ts_delta} sec, \n output written to {file_json}")
    with open(file_json, "w", encoding="utf-8") as f:
        f.write(json_out)
            

# if __name__ == "__main__":
#     main()

In [7]:
main(character = "人")

Generated Prompt:
You are an expert in Chinese language. 
    Generate a holistic view on the Chinese character '人' 
    in terms of the following attributes:

    1. 含义 (meaning): Explain its meaning.
    2. 字形 (character structure): Describe its structure.
    3. 读音 (pronunciation): Provide its pronunciation.
    4. 字源 (etymology): Explain its origin.
    5. 含此字的字 (composite characters): List characters that contain '人'.
    6. 同音字 (homophone characters): List other characters that sound like '人'.
    7. 常用词组 (common phrases): Provide common phrases containing '人' (often 2-characters).
    8. 成语 (idioms): List idioms containing '人' (often 4-characters).
    9. 例句 (example sentences): Provide example short sentences or famous quotes using '人'.
    10. 短故事 (short stories): Share 1-2 short stories related to '人'.
    11. 诗词 (poetry): Include 1-2 famous poems containing and describing '人'.
    12. 图片 (images): Suggest simple images illustrating '人'.
    13. 音频 (audio): Suggest short audi

In [14]:
main(character = "子")

Generated Prompt:
You are an expert in Chinese language. 
    Generate a holistic view on the Chinese character '子' 
    in terms of the following attributes:

    1. 含义 (meaning): Explain its meaning.
    2. 字形 (character structure): Describe its structure.
    3. 读音 (pronunciation): Provide its pronunciation.
    4. 字源 (etymology): Explain its origin.
    5. 含此字的字 (composite characters): List characters that contain '子'.
    6. 同音字 (homophone characters): List other characters that sound like '子'.
    7. 常用词组 (common phrases): Provide common phrases containing '子' (often 2-characters).
    8. 成语 (idioms): List idioms containing '子' (often 4-characters).
    9. 例句 (example sentences): Provide example short sentences or famous quotes using '子'.
    10. 短故事 (short stories): Share 1-2 short stories related to '子'.
    11. 诗词 (poetry): Include 1-2 famous poems containing and describing '子'.
    12. 图片 (images): Suggest simple images illustrating '子'.
    13. 音频 (audio): Suggest short audi

In [16]:
main(test_flag=True)

Prompt:
What is 10! in math

API call completed in 6.957432270050049 sec, 
 Response (JSON):
 "To calculate \\(10!\\) (10 factorial) in mathematics, you multiply all positive integers from 1 to 10 together:\n\n\\[\n10! = 10 \\times 9 \\times 8 \\times 7 \\times 6 \\times 5 \\times 4 \\times 3 \\times 2 \\times 1\n\\]\n\nLet's compute it step by step:\n\n\\[\n\\begin{align*}\n10! &= 10 \\times 9 \\times 8 \\times 7 \\times 6 \\times 5 \\times 4 \\times 3 \\times 2 \\times 1 \\\\\n&= 90 \\times 8 \\times 7 \\times 6 \\times 5 \\times 4 \\times 3 \\times 2 \\times 1 \\\\\n&= 720 \\times 7 \\times 6 \\times 5 \\times 4 \\times 3 \\times 2 \\times 1 \\\\\n&= 5040 \\times 6 \\times 5 \\times 4 \\times 3 \\times 2 \\times 1 \\\\\n&= 30240 \\times 5 \\times 4 \\times 3 \\times 2 \\times 1 \\\\\n&= 151200 \\times 4 \\times 3 \\times 2 \\times 1 \\\\\n&= 604800 \\times 3 \\times 2 \\times 1 \\\\\n&= 1814400 \\times 2 \\times 1 \\\\\n&= 3628800 \\times 1 \\\\\n&= 3628800\n\\end{align*}\n\\]\n\nSo

In [44]:
main(character = "子")

API call for 子 completed in 26.212156534194946 sec, 
 output written to deepseek/子-1.json


In [46]:
import pandas as pd 

In [48]:
df = pd.read_csv("elemental_zi_no_radical.csv")

In [49]:
df.columns 

Index(['zi', 'strokes', 'fib_family', 'note'], dtype='object')

In [50]:
zis = df["zi"].to_list()

In [52]:
len(zis), zis[:10]

(312, ['一', '乙', '二', '人', '儿', '入', '八', '几', '刁', '刀'])

In [None]:
processed = []

In [None]:
for zi in zis[2:]:
    try:
        main(character = zi)
        processed.append(zi)
    except Exception as e: 
        print(f"Failed to process {zi}:\n str(e)")

API call for 二 completed in 15.694165706634521 sec, 
 output written to deepseek/二-1.json
API call for 人 completed in 26.498515129089355 sec, 
 output written to deepseek/人-1.json
API call for 儿 completed in 14.086272954940796 sec, 
 output written to deepseek/儿-1.json
API call for 入 completed in 15.613361597061157 sec, 
 output written to deepseek/入-1.json
API call for 八 completed in 16.348150730133057 sec, 
 output written to deepseek/八-1.json
API call for 几 completed in 15.041701555252075 sec, 
 output written to deepseek/几-1.json
API call for 刁 completed in 13.612976551055908 sec, 
 output written to deepseek/刁-1.json
API call for 刀 completed in 21.008795022964478 sec, 
 output written to deepseek/刀-1.json
API call for 乃 completed in 14.373852968215942 sec, 
 output written to deepseek/乃-1.json
API call for 力 completed in 22.555760383605957 sec, 
 output written to deepseek/力-1.json
API call for 丁 completed in 15.013811111450195 sec, 
 output written to deepseek/丁-1.json
API call f

In [55]:
processed

['一', '乙']

In [37]:
!cd 

C:\Users\p2p2l\projects\wgong\zistory\zinets\dev\notebook


In [45]:
24*60*60 / 30 

2880.0

In [39]:
x1 = "{\n  \"含义\": \"The character '子' (zǐ) has multiple meanings, including 'child', 'son', 'seed', 'a person', or a respectful title for a learned or virtuous person (e.g., Confucius is often referred to as '孔子'). It can also denote something small or a part of something larger.\",\n  \"字形\": \"The character '子' is a simple, two-stroke character. It consists of a horizontal stroke at the top and a vertical stroke that curves slightly to the right at the bottom, resembling a small child or a seed.\",\n  \"读音\": \"The pronunciation of '子' is 'zǐ' in Mandarin Chinese, with the third tone.\",\n  \"字源\": \"The character '子' originated from a pictograph of a small child or a seed. In ancient oracle bone script, it depicted a baby with arms outstretched. Over time, it evolved into its current simplified form.\",\n  \"含此字的字\": [\n    \"好 (hǎo - good)\",\n    \"字 (zì - character)\",\n    \"学 (xué - study)\",\n    \"孩 (hái - child)\",\n    \"孙 (sūn - grandson)\",\n    \"孔 (kǒng - hole, also part of Confucius's name)\"\n  ],\n  \"同音字\": [\n    \"紫 (zǐ - purple)\",\n    \"仔 (zǐ - young, small)\",\n    \"籽 (zǐ - seed)\",\n    \"姊 (zǐ - elder sister)\"\n  ],\n  \"常用词组\": [\n    \"孩子 (háizi - child)\",\n    \"儿子 (érzi - son)\",\n    \"女子 (nǚzǐ - woman)\",\n    \"男子 (nánzǐ - man)\",\n    \"种子 (zhǒngzi - seed)\"\n  ],\n  \"成语\": [\n    \"君子之交 (jūnzǐ zhī jiāo - friendship between gentlemen)\",\n    \"望子成龙 (wàng zǐ chéng lóng - to hope one's child becomes successful)\",\n    \"子虚乌有 (zǐxū wūyǒu - purely fictitious)\"\n  ],\n  \"例句\": [\n    \"孔子是中国古代著名的思想家。 (Kǒngzǐ shì Zhōngguó gǔdài zhùmíng de sīxiǎngjiā. - Confucius was a famous ancient Chinese philosopher.)\",\n    \"这个孩子很聪明。 (Zhège háizi hěn cōngmíng. - This child is very smart.)\"\n  ],\n  \"短故事\": [\n    \"In ancient China, '子' was often used as a respectful title for scholars. Confucius, known as '孔子', was one of the most revered figures in Chinese history. His teachings emphasized morality, justice, and sincerity, and his influence continues to this day.\",\n    \"In another story, '子' represents the beginning of life. A farmer once planted a seed ('子') in his field, nurturing it with care. Over time, it grew into a strong tree, symbolizing the potential within every small beginning.\"\n  ],\n  \"诗词\": [\n    \"《静夜思》 - 李白 (Jìng Yè Sī - Lǐ Bái):\\n床前明月光，疑是地上霜。\\n举头望明月，低头思故乡。\\n(The poem reflects on the moon and homesickness, with '子' not explicitly mentioned but often associated with themes of family and home.)\",\n    \"《游子吟》 - 孟郊 (Yóu Zǐ Yín - Mèng Jiāo):\\n慈母手中线，游子身上衣。\\n临行密密缝，意恐迟迟归。\\n(This poem describes a mother's love for her traveling son, using '子' to represent the child.)\"\n  ],\n  \"图片\": [\n    \"An image of a small child with outstretched arms, representing the original pictograph of '子'.\",\n    \"A seed sprouting from the ground, symbolizing the meaning of '子' as a seed or beginning.\"\n  ],\n  \"音频\": [\n    \"A short audio clip of the pronunciation 'zǐ' in Mandarin.\",\n    \"A recording of a Chinese teacher explaining the meaning and usage of '子'.\"\n  ],\n  \"视频\": [\n    \"A short animation showing the evolution of the character '子' from ancient script to modern form.\",\n    \"A video of a Chinese calligrapher writing the character '子' with a brush.\"\n  ],\n  \"电影\": [\n    \"《孔子》 (Kǒngzǐ - Confucius): A biographical film about the life of Confucius.\",\n    \"《鬼子来了》 (Guǐzi Láile - Devils on the Doorstep): A historical drama film.\"\n  ],\n  \"参考资料\": [\n    \"《说文解字》 (Shuōwén Jiězì - Explaining Simple and Analyzing Compound Characters): An ancient Chinese dictionary.\",\n    \"《现代汉语词典》 (Xiàndài Hànyǔ Cídiǎn - Modern Chinese Dictionary): A comprehensive modern Chinese dictionary.\"\n  ],\n  \"有趣网站\": [\n    \"https://www.zdic.net/ - An online Chinese dictionary with detailed explanations of characters.\",\n    \"https://www.chinese-tools.com/ - A resource for learning Chinese characters and culture.\"\n  ]\n}"

In [40]:
x1_json = json.loads(x1)

In [41]:
x1_json

{'含义': "The character '子' (zǐ) has multiple meanings, including 'child', 'son', 'seed', 'a person', or a respectful title for a learned or virtuous person (e.g., Confucius is often referred to as '孔子'). It can also denote something small or a part of something larger.",
 '字形': "The character '子' is a simple, two-stroke character. It consists of a horizontal stroke at the top and a vertical stroke that curves slightly to the right at the bottom, resembling a small child or a seed.",
 '读音': "The pronunciation of '子' is 'zǐ' in Mandarin Chinese, with the third tone.",
 '字源': "The character '子' originated from a pictograph of a small child or a seed. In ancient oracle bone script, it depicted a baby with arms outstretched. Over time, it evolved into its current simplified form.",
 '含此字的字': ['好 (hǎo - good)',
  '字 (zì - character)',
  '学 (xué - study)',
  '孩 (hái - child)',
  '孙 (sūn - grandson)',
  "孔 (kǒng - hole, also part of Confucius's name)"],
 '同音字': ['紫 (zǐ - purple)',
  '仔 (zǐ - yo

In [30]:
json_str = "```json\n{\n  \"含义\": \"The character '子' (zǐ) has multiple meanings, including 'child', 'son', 'seed', 'a person', or a respectful title for a learned or virtuous person, such as Confucius (孔子). It can also denote something small or a part of a larger whole.\",\n  \"字形\": \"'子' is a simple character consisting of three strokes. It resembles a child with outstretched arms. The top horizontal stroke represents the head, the middle stroke the body, and the bottom stroke the legs.\",\n  \"读音\": \"The pronunciation of '子' is 'zǐ' in Mandarin, with the third tone.\",\n  \"字源\": \"The character '子' originated from ancient pictographs depicting a child. Over time, its form simplified into the modern version. It is one of the oldest Chinese characters, appearing in oracle bone inscriptions.\",\n  \"含此字的字\": [\n    \"好 (hǎo - good)\",\n    \"学 (xué - study)\",\n    \"孩 (hái - child)\",\n    \"孔 (kǒng - hole, also part of Confucius's name)\",\n    \"孙 (sūn - grandson)\"\n  ],\n  \"同音字\": [\n    \"字 (zì - character)\",\n    \"自 (zì - self)\",\n    \"紫 (zǐ - purple)\",\n    \"籽 (zǐ - seed)\"\n  ],\n  \"常用词组\": [\n    \"孩子 (hái zi - child)\",\n    \"儿子 (ér zi - son)\",\n    \"女子 (nǚ zǐ - woman)\",\n    \"男子 (nán zǐ - man)\",\n    \"种子 (zhǒng zi - seed)\"\n  ],\n  \"成语\": [\n    \"君子之交 (jūn zǐ zhī jiāo - friendship between gentlemen)\",\n    \"望子成龙 (wàng zǐ chéng lóng - to hope one's child becomes successful)\",\n    \"子虚乌有 (zǐ xū wū yǒu - purely fictitious)\"\n  ],\n  \"例句\": [\n    \"孔子是中国古代伟大的思想家。 (Kǒng zǐ shì zhōng guó gǔ dài wěi dà de sī xiǎng jiā. - Confucius was a great ancient Chinese thinker.)\",\n    \"这个孩子很聪明。 (Zhè gè hái zi hěn cōng míng. - This child is very smart.)\"\n  ],\n  \"短故事\": [\n    \"In ancient China, '子' was often used as a respectful title for scholars. Confucius, known as 孔子 (Kǒng zǐ), was one such scholar whose teachings shaped Chinese culture for centuries.\",\n    \"The character '子' is also associated with the concept of 'seed' or 'origin'. In Daoist philosophy, the 'Dao' is often described as the 'mother of all things', and '子' represents the beginning of life.\"\n  ],\n  \"诗词\": [\n    \"《静夜思》 - 李白 (Jìng yè sī - Li Bai): '床前明月光，疑是地上霜。举头望明月，低头思故乡。' (The moonlight before my bed, I suspect it is frost on the ground. I raise my head to gaze at the bright moon, and lower it to think of my hometown.)\",\n    \"《子夜歌》 - 无名氏 (Zǐ yè gē - Anonymous): '子夜四时歌，春歌动地来。' (The midnight song of the four seasons, the spring song shakes the earth.)\"\n  ],\n  \"图片\": [\n    \"An illustration of a child with outstretched arms.\",\n    \"A calligraphy of the character '子' in traditional brush strokes.\"\n  ],\n  \"音频\": [\n    \"A pronunciation guide for '子' in Mandarin.\",\n    \"A recitation of a poem containing '子'.\"\n  ],\n  \"视频\": [\n    \"A short video explaining the history and meaning of '子'.\",\n    \"A calligraphy demonstration of writing '子'.\"\n  ],\n  \"电影\": [\n    \"《孔子》 (Confucius) - A biographical film about Confucius.\",\n    \"《鬼子来了》 (Devils on the Doorstep) - A film title containing '子'.\"\n  ],\n  \"参考资料\": [\n    \"《说文解字》 (Shuōwén Jiězì) - An ancient Chinese dictionary.\",\n    \"《汉字源流字典》 (Hànzì Yuánliú Zìdiǎn) - A dictionary on the origin of Chinese characters.\"\n  ],\n  \"有趣网站\": [\n    \"https://www.zdic.net/ - An online Chinese dictionary.\",\n    \"https://www.chineseetymology.org/ - A website exploring the etymology of Chinese characters.\"\n  ]\n}\n```" 

In [31]:
x = extract_json_string(json_str)

In [32]:
x 

'{\n  "含义": "The character \'子\' (zǐ) has multiple meanings, including \'child\', \'son\', \'seed\', \'a person\', or a respectful title for a learned or virtuous person, such as Confucius (孔子). It can also denote something small or a part of a larger whole.",\n  "字形": "\'子\' is a simple character consisting of three strokes. It resembles a child with outstretched arms. The top horizontal stroke represents the head, the middle stroke the body, and the bottom stroke the legs.",\n  "读音": "The pronunciation of \'子\' is \'zǐ\' in Mandarin, with the third tone.",\n  "字源": "The character \'子\' originated from ancient pictographs depicting a child. Over time, its form simplified into the modern version. It is one of the oldest Chinese characters, appearing in oracle bone inscriptions.",\n  "含此字的字": [\n    "好 (hǎo - good)",\n    "学 (xué - study)",\n    "孩 (hái - child)",\n    "孔 (kǒng - hole, also part of Confucius\'s name)",\n    "孙 (sūn - grandson)"\n  ],\n  "同音字": [\n    "字 (zì - character)

In [33]:
with open("tst-1.json", "w", encoding="utf-8") as f: 
    f.write(x)

In [25]:
!cd 

C:\Users\p2p2l\projects\wgong\zistory\zinets\dev\notebook
