In [1]:

import requests
import sys
import time
import html
import re
import json
from typing import Dict, Any, List, Tuple
json_path = r"G:\Code\Python\Project\Reader\data\backup\info\2025-09-29.json"

with open(json_path, "r", encoding="utf-8") as f:
    word_list = json.load(f)

word_list

[{'wordUrl': '',
  'partOfSpeech': [{'type': 'phrase',
    'wordPrototype': 'burst up to six feet',
    'definitions': [{'enMeaning': '', 'chMeaning': '(身高)猛增到6英尺'}]}],
  'sentences': [{'key': 1758082735924,
    'bookKey': 1738143464138,
    'date': '2025-09-17',
    'chapter': 'Cover',
    'chapterIndex': 5,
    'text': ' bursting up to six feet',
    'range': '{"characterRange":{"start":1076,"end":1100},"backward":false}',
    'notes': 'He had gotten much bigger, bursting up to six feet with a bearlike frame, and had learned some judo.',
    'percentage': 0.0,
    'color': '#FBF1D1',
    'tag': nan,
    'highlightType': 'background',
    'bookName': 'Elon Musk',
    'bookAuthor': 'Walter Isaacson'}]},
 {'wordUrl': 'https://dictionary.cambridge.org/dictionary/english-chinese-simplified/pick-on',
  'partOfSpeech': [{'type': '',
    'wordPrototype': 'pick on someone',
    'pronunciationUK': {'phonetic': '/ pɪk /',
     'pronUrl': 'https://dictionary.cambridge.org/media/english-chinese-s

In [14]:
def replace_alnum_with_underscores(match_obj: re.Match) -> str:
    """
    接收一个正则表达式匹配对象，
    并将其中的字母和数字替换为下划线。
    """
    word = match_obj.group(0)
    return ''.join(['_' if char.isalnum() else char for char in word])

def build_html_from_word_info(word_info: Dict[str, Any]) -> Dict[str, str]:
    """
    根据 word_info 构建笔记中各个字段的 HTML 内容。
    """
    # ... (此函数的其他部分与您原脚本类似，为了简洁此处省略了定义和短语部分)
    pos_html_parts: List[str] = []
    pronunciation_parts: List[str] = []
    definition_parts: List[str] = []
    examples_parts: List[str] = []
    blanked_examples_parts: List[str] = []
    
    word_to_highlight = word_info.get("word", "")

    # 处理发音、释义等
    for pos in word_info.get("partOfSpeech", []):
        pos_type = pos.get("type", "")
        pos_title_html = f"<div class='pos-title'>{html.escape(str(pos_type)).capitalize()}</div>" if pos_type else ""
        
        # 发音
        # 发音
        uk = pos.get("pronunciationUK") or {}
        us = pos.get("pronunciationUS") or {}
        audio_lines: List[str] = []
        if uk.get("phonetic") or uk.get("pronUrl"):
            aud = f"UK: {html.escape(uk.get('phonetic',''))}"
            if uk.get("pronUrl"):
                aud += f" <audio controls src=\"{html.escape(uk.get('pronUrl'))}\"></audio>"
            audio_lines.append(f"<div class='audio-row'>{aud}</div>")
        if us.get("phonetic") or us.get("pronUrl"):
            aud = f"US: {html.escape(us.get('phonetic',''))}"
            if us.get("pronUrl"):
                aud += f" <audio controls src=\"{html.escape(us.get('pronUrl'))}\"></audio>"
            audio_lines.append(f"<div class='audio-row'>{aud}</div>")
        if audio_lines:
            pronunciation_parts.extend(audio_lines)
        
        # 释义
        defs = pos.get("definitions") or []
        if defs:
            def_block = [pos_title_html, "<ul>"]
            for d in defs:
                en = (d.get("enMeaning") or "").strip()
                ch = (d.get("chMeaning") or "").strip()
                def_block.append(f"<li><div class='definition-en'>{html.escape(en)}</div><div class='definition-ch'>{html.escape(ch)}</div></li>")
            def_block.append("</ul>")
            definition_parts.append("".join(def_block))
        # 处理词性/定义/短语
    
    for pos in word_info.get("partOfSpeech", []):
        pos_type = pos.get("type", "")
        part_lines: List[str] = []
        part_lines.append(f"<div class='pos-title'>{html.escape(str(pos_type)).capitalize()}</div>")

        # 发音
        uk = pos.get("pronunciationUK") or {}
        us = pos.get("pronunciationUS") or {}
        audio_lines: List[str] = []
        if uk.get("phonetic") or uk.get("pronUrl"):
            aud = f"UK: {html.escape(uk.get('phonetic',''))}"
            if uk.get("pronUrl"):
                aud += f" <audio controls src=\"{html.escape(uk.get('pronUrl'))}\"></audio>"
            audio_lines.append(f"<div class='audio-row'>{aud}</div>")
        if us.get("phonetic") or us.get("pronUrl"):
            aud = f"US: {html.escape(us.get('phonetic',''))}"
            if us.get("pronUrl"):
                aud += f" <audio controls src=\"{html.escape(us.get('pronUrl'))}\"></audio>"
            audio_lines.append(f"<div class='audio-row'>{aud}</div>")
        if audio_lines:
            part_lines.extend(audio_lines)

        # 定义
        defs = pos.get("definitions") or []
        if defs:
            part_lines.append("<ul>")
            for d in defs:
                en = (d.get("enMeaning") or d.get("en") or "").strip()
                ch = (d.get("chMeaning") or d.get("ch") or "").strip()
                part_lines.append(
                    "<li>"
                    f"<div class='definition-en'>{html.escape(en)}</div>"
                    f"<div class='definition-ch'>{html.escape(ch)}</div>"
                    "</li>"
                )
            part_lines.append("</ul>")

        # 短语
        phrases = pos.get("phrases") or []
        phrase_defs = pos.get("phraseDefinitions") or []
        if phrases:
            part_lines.append("<div><b>Phrases:</b><ul>")
            for i, ph in enumerate(phrases):
                pd = phrase_defs[i] if i < len(phrase_defs) else {}
                en = (pd.get("enMeaning") or pd.get("en") or "").strip()
                ch = (pd.get("chMeaning") or pd.get("ch") or "").strip()
                part_lines.append(
                    "<li>"
                    f"<span class='phrase'>{html.escape(ph)}</span> — <span class='definition-en'>{html.escape(en)}</span>"
                    f"<div class='definition-ch'>{html.escape(ch)}</div>"
                    "</li>"
                )
            part_lines.append("</ul></div>")

        pos_html_parts.append("<div>" + "\n".join(part_lines) + "</div>")

    # 处理例句
    for s in word_info.get("sentences", []):
        sentence_text = s.get("notes").strip()
        if not sentence_text:
            continue
        
        # 1. 'Examples' 字段: 目标词加粗
        escaped_sentence = html.escape(sentence_text)
        highlighted = escaped_sentence
        target_word = s.get("text") or word_to_highlight # 用于加粗的目标词
        if target_word:
            try:
                pattern = re.compile(r'\b' + re.escape(html.escape(target_word.strip())) + r'\b', re.IGNORECASE)

                highlighted = pattern.sub(lambda m: f"<strong>{m.group(0)}</strong>", escaped_sentence)
            except re.error:
                pass # 忽略正则错误
        escaped_target = html.escape(target_word.strip())
        if " " in target_word:
            # 多词短语，不加 \b
            pattern_for_blanking = re.compile(re.escape(escaped_target), re.IGNORECASE)
        else:
            # 单词，加边界防止误匹配
            pattern_for_blanking = re.compile(r'\b' + re.escape(escaped_target) + r'\b', re.IGNORECASE)
        # pattern_for_blanking = re.compile(r'\b' + re.escape(target_word) + r'\b', re.IGNORECASE)
        # 2. 'Blanked_Examples' 字段: 所有单词字母替换为下划线
        blanked_sentence = pattern_for_blanking.sub(replace_alnum_with_underscores, sentence_text)
        escaped_blanked = html.escape(blanked_sentence)

        # 来源信息
        book = s.get("bookName") or ""
        meta = f" — 《{html.escape(book)}》" if book else ""

        examples_parts.append(f"<div class='example'><div class='example-text'>{highlighted}</div><div class='example-meta'>{meta}</div></div>")
        blanked_examples_parts.append(f"<div class='example'><div class='example-text'>{escaped_blanked}</div><div class='example-meta'>{meta}</div></div>")

    return {
        "POS_Definitions": "\n".join(pos_html_parts),
        "Pronunciation": "\n".join(pronunciation_parts),
        "Definition": "\n".join(definition_parts),
        "Examples": "\n".join(examples_parts),
        "Blanked_Examples": "\n".join(blanked_examples_parts)
    }

In [15]:
build_html_from_word_info(word_list[5])

{'POS_Definitions': '<div><div class=\'pos-title\'>Adjective</div>\n<div class=\'audio-row\'>UK: / ˌpær.əˈmɪl.ɪ.tri / <audio controls src="https://dictionary.cambridge.org/media/english-chinese-simplified/uk_pron/u/ukp/ukpar/ukparag014.mp3"></audio></div>\n<div class=\'audio-row\'>US: / ˌper.əˈmɪl.ə.ter.i / <audio controls src="https://dictionary.cambridge.org/media/english-chinese-simplified/us_pron/p/par/param/paramilitary.mp3"></audio></div>\n<ul>\n<li><div class=\'definition-en\'>A paramilitary group is organized like an army but is not official and often not legal .</div><div class=\'definition-ch\'>准军事的 ， 非法军事组织的</div></li>\n<li><div class=\'definition-en\'>connected with and helping the official armed forces</div><div class=\'definition-ch\'>与正规军有联系的 ， 辅助军事的</div></li>\n</ul></div>\n<div><div class=\'pos-title\'>Noun [ c ]</div>\n<div class=\'audio-row\'>UK: / ˌpær.əˈmɪl.ɪ.tri / <audio controls src="https://dictionary.cambridge.org/media/english-chinese-simplified/uk_pron/u/ukp/

In [4]:
ANKI_CONNECT_URL = "http://localhost:8765"
MODEL_NAME = "WordType"
REQUEST_TIMEOUT = 10.0

def invoke(action: str, **params):
    try:
        r = requests.post(
            ANKI_CONNECT_URL,
            json={"action": action, "version": 6, "params": params},
            timeout=REQUEST_TIMEOUT
        )
        r.raise_for_status()
        return r.json()
    except requests.RequestException as e:
        print(f"[错误] 无法连接 AnkiConnect（{ANKI_CONNECT_URL}）：{e}")
        sys.exit(1)

In [16]:
deck_name = "CambridgeDeck"
# word_list_sub = word_list[1:2]
for word_info in word_list:
    result = {"created": False, "created_note_result": None, "updated": [], "skipped": [], "errors": []}
    word = word_prototype = word_info.get("partOfSpeech")[0].get("wordPrototype", "")
    print(word)
    query = f'deck:"{deck_name}" "Word:{word}"'
    try:
        find_res = invoke("findNotes", query=query)
    except Exception as e:
        result["errors"].append(f"findNotes 调用异常: {e}")

    note_ids = find_res.get("result", []) or []
    if note_ids:
        res = build_html_from_word_info(word_info)
        blanked_sentences = res["Blanked_Examples"]
        pronunciation = res["Pronunciation"]
        definition = res["Definition"]

        r = invoke("updateNoteFields", note={"id": note_ids[0], "fields": {"Blanked_Examples": blanked_sentences}})
        # print(r)

        r = invoke("updateNoteFields", note={"id": note_ids[0], "fields": {"Definition": definition}})
        # print(r)

        r = invoke("updateNoteFields", note={"id": note_ids[0], "fields": {"Pronunciation": pronunciation}})
        print(r)

        note_id = note_ids[0]
        invoke("updateNoteFields", note={"id": note_id, "fields": {"Blanked_Examples": blanked_sentences}})

        # 验证是否真的更新了
        info = invoke("notesInfo", notes=[note_id])
        print(info["result"][0]["fields"]["Blanked_Examples"]["value"])


    
    


burst up to six feet
{'result': None, 'error': None}
<div class='example'><div class='example-text'>He had gotten much bigger, ________ __ __ ___ ____ with a bearlike frame, and had learned some judo.</div><div class='example-meta'> — 《Elon Musk》</div></div>
pick on someone
{'result': None, 'error': None}
<div class='example'><div class='example-text'>As a result, he was regularly ______ __ __ _______, who would come up and punch him in the face.</div><div class='example-meta'> — 《Elon Musk》</div></div>
ballistic
{'result': None, 'error': None}
<div class='example'><div class='example-text'>My father just lost it, ____ _________, as he often did.</div><div class='example-meta'> — 《Elon Musk》</div></div>
juvenile prison
{'result': None, 'error': None}
<div class='example'><div class='example-text'>Both Elon and Kimbal, who no longer speak to their father, say his claim that Elon provoked the attack is unhinged and that the perpetrator ended up being sent to ________ ______ for it.</div>