In [None]:
import os
import textwrap
from typing import cast
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"


import langextract as lx
from IPython.display import HTML
from langextract.data import AnnotatedDocument


In [None]:
# 1. Define the prompt and extraction rules
prompt = textwrap.dedent("""\
    Extract characters, emotions, and relationships in order of appearance.
    Use exact text for extractions. Do not paraphrase or overlap entities.
    Provide meaningful attributes for each entity to add context.""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text="ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.",
        extractions=[
            lx.data.Extraction(
                extraction_class="character",
                extraction_text="ROMEO",
                attributes={"emotional_state": "wonder"},
            ),
            lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="But soft!",
                attributes={"feeling": "gentle awe"},
            ),
            lx.data.Extraction(
                extraction_class="relationship",
                extraction_text="Juliet is the sun",
                attributes={"type": "metaphor"},
            ),
        ],
    )
]

In [None]:
# The input text to be processed
from pickle import TRUE


input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"

config = lx.factory.ModelConfig(
    # model_id="vllm:microsoft/Phi-3-mini-4k-instruct",
    # model_id="vllm:Qwen/Qwen3-0.6B",
    model_id="vllm:Qwen/Qwen3-4B-Instruct-2507",
    # model_id="http://localhost:8000/v1",
    provider="VLLMLanguageModel",
    provider_kwargs=dict(
        gpu_memory_utilization=0.5,
        max_model_len=1024,
        temperature=0.1,
        max_tokens=1024,
        # 其他vLLM参数
        tensor_parallel_size = 1,
        enforce_eager=True,
        disable_custom_all_reduce=True,
    ),
)

model = lx.factory.create_model(config)

# 添加调试模式来查看模型输出
result = lx.extract(
    model=model,
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    fence_output=True,  # 启用fence输出，帮助模型生成更好的JSON
    use_schema_constraints=True,
    
)

In [None]:
# result 已经是一个 AnnotatedDocument 对象，不需要索引访问
result = cast(AnnotatedDocument, result)

html_content = lx.visualize(result)
html_content = cast(HTML, html_content)

display(html_content)

