In [1]:
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, QueryBundle
from llama_index.core.schema import MetadataMode
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from transformers import AutoTokenizer, AutoModelForCausalLM
from llama_index.llms.huggingface import (
    HuggingFaceInferenceAPI,
    HuggingFaceLLM,
)

  from .autonotebook import tqdm as notebook_tqdm



In [2]:
checkpoint_path = "/mnt/resource/public_models/Llama-2-7b-chat-hf"

In [None]:
# 构建自定义prompt
from llama_index.core import PromptTemplate

# 定义system prompt
SYSTEM_PROMPT = """你是一个医疗人工智能助手。"""
query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

# 定义qa prompt
qa_prompt_tmpl_str = (
    "上下文信息如下。\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "请根据上下文信息而不是先验知识来回答以下的查询。"
    "作为一个医疗人工智能助手，你的回答要尽可能严谨。\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

# 定义refine prompt
refine_prompt_tmpl_str = (
    "原始查询如下：{query_str}"
    "我们提供了现有答案：{existing_answer}"
    "我们有机会通过下面的更多上下文来完善现有答案（仅在需要时）。"
    "------------"
    "{context_msg}"
    "------------"
    "考虑到新的上下文，优化原始答案以更好地回答查询。 如果上下文没有用，请返回原始答案。"
    "Refined Answer:"
)
refine_prompt_tmpl = PromptTemplate(refine_prompt_tmpl_str)

In [3]:
# set tokenizer to match LLM
Settings.tokenizer = AutoTokenizer.from_pretrained(
    checkpoint_path,
    torch_dtype="auto",
    trust_remote_code=True
)

In [None]:
# set the LLM
Settings.llm = HuggingFaceLLM(
        model_name = checkpoint_path,
        tokenizer_name = checkpoint_path 
    )

In [None]:
# 使用llama-index创建本地大模型
Settings.llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=checkpoint_path,
    model_name=checkpoint_path,
    device_map="auto",
    model_kwargs={"torch_dtype": "auto"},
)

In [4]:
# 本地模型api
from custom_llm import MyCustomLLM
Settings.llm = MyCustomLLM()

In [5]:
# set the embed model
Settings.embed_model = HuggingFaceEmbedding(
    model_name="/mnt/jinweilin/package/bge-small-en-v1.5"
)

In [6]:
from llama_index.core.callbacks import LlamaDebugHandler, CallbackManager

# 使用LlamaDebugHandler构建事件回溯器，以追踪LlamaIndex执行过程中发生的事件
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
Settings.callback_manager = callback_manager

In [7]:
from llama_index.core import Document
from llama_index.core.schema import MetadataMode
import json

# 读取文档
with open("../electronic_content_duplication.jsonl","r") as f:
    documents = []
    for line in f.readlines():
        # json: product_name, content
        json_item = json.loads(line)
        document = Document(
            text=json_item['content'],
            metadata={
                "file_name": json_item['product_name'],
                "product_name": json_item['product_name']
            }
        )
        documents.append(document)

# 切分文档，建立索引
index = VectorStoreIndex.from_documents(
    documents,
)

In [None]:
# 读取文档
documents = SimpleDirectoryReader("data").load_data()
# 切分文档，建立索引
index = VectorStoreIndex.from_documents(
    documents,
)

In [8]:
# 存储文档embedding
index.storage_context.persist(persist_dir='electronic_name_emb')

In [7]:
# 读取文档embedding
storage_context = StorageContext.from_defaults(persist_dir="electronic_name_emb")
index = load_index_from_storage(storage_context)

**********
Trace: index_construction
**********


In [None]:
# 使用不同的检索和查询方法
from llama_index.core import get_response_synthesizer
from llama_index.core.indices.vector_store import VectorIndexRetriever
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import ResponseMode

# 构建retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=5,
)

# 构建response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode=ResponseMode.REFINE
)

# 构建查询引擎
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.6)],
)

In [8]:
query_engine = index.as_query_engine()

In [29]:
# 使用不同的检索和查询方法
from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import ResponseMode
from llama_index.core.query_engine import RetrieverQueryEngine

# filters = MetadataFilters(
#     filters=[
#         MetadataFilter(
#             key="product_name", operator=FilterOperator.EQ, value="SGM5200"
#         ),
#         MetadataFilter(
#             key="product_name", operator=FilterOperator.EQ, value="SGM446"
#         ),
#     ],
#     condition='or'
# )

filters = MetadataFilters(
    filters=[
        MetadataFilter(
            key="product_name", operator=FilterOperator.EQ, value="SGM5200"
        )
    ]
)

# 构建response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode=ResponseMode.COMPACT_ACCUMULATE
)

questions = "SGM5200的上电时间需要多久？"

retriever = index.as_retriever(similarity_top_k=10, filters=filters)
nodes = retriever.retrieve(questions)
for node in nodes:
    print(node)

# query_engine = index.as_query_engine(similarity_top_k=5, filters = filters)
# 构建查询引擎
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer
)

# response = query_engine.query(questions)
# print(response)


**********
Trace: query
    |_retrieve -> 0.052081 seconds
      |_embedding -> 0.021501 seconds
**********
Node ID: 91fe3424-f850-46f5-b114-c6fafc8e549d
Text: sg-micro.com  TYPICAL PERFORMANCE CHARACTERISTICS (continued)
Total Unadjusted Error (TUE Minimum) (Range 1)       Total Unadjusted
Error (TUE Minimum) (Range 2)            Typical FFT Plot    Amplitude
(dB)  0    -20    -40    -60    -80    -100    -120    -140    -160
0       100      200      300     400     500      Frequenc...
Score:  0.794

Node ID: 0f01f23a-134f-473d-bd56-0a1645adad8b
Text: Input Frequency       Input Leakage Current vs. Temperature
-1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0 0 1 2 3 4 5 6 7 8 9
10 11 12 13 14 15 Gain Error  (LSB)  Channel Number  VA = 5V, VBD =
5V, fS = 1MSPS  -1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0 0 1 2
3 4 5 6 7 8 9 10 11 12 13 14 15 Offset Error  (LSB)  Channel Number
V...
Score:  0.792

Node ID: 2d904628-cb01-4653-9ca5-48db9ac00b01
Text: SGM5200  12-Bit, 1MSPS, 16 Chan

In [None]:
# 更新查询引擎中的prompt template
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl,
     "response_synthesizer:refine_template": refine_prompt_tmpl}
)

In [34]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata

individual_query_engine_tools = [
    QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name=f"electronic_search",
            description=(
                "Use this tool when querying electronic components"
            )
        ),
    )
]

In [35]:
# 问题分解为子问题
from llama_index.core.query_engine import SubQuestionQueryEngine

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=individual_query_engine_tools,
)

In [13]:
from llama_index.core.postprocessor import KeywordNodePostprocessor

node_postprocessors = [
    KeywordNodePostprocessor(
        required_keywords=["SGM42600"]
    )
]
query_engine = index.as_query_engine(
    node_postprocessors=node_postprocessors
)
response = query_engine.query("What are the packaging dimensions of SGM42600?")
print(response)

Empty Response


In [9]:
response = query_engine.query(
    # "What are the packaging dimensions of SGM42600?"
    # "SGM5200的封装型号和上电时间分别是多少？",
    "帮我推荐一个5v转3.3v的线性稳压器LDO，并给出该器件的参数"
)
print(response)

**********
Trace: query
    |_query -> 12.550714 seconds
      |_retrieve -> 0.792444 seconds
        |_embedding -> 0.620142 seconds
      |_synthesize -> 11.757212 seconds
        |_templating -> 1.6e-05 seconds
        |_llm -> 5.13209 seconds
        |_templating -> 2.1e-05 seconds
        |_llm -> 6.560989 seconds
**********
The original question was about selecting a reset chip from SG Micro for an 85°C temperature-controlled crystal oscillator circuit, focusing on size and cost. However, the provided context appears to describe a smart device with a display that indicates battery status using flashing patterns and mentions the SGM41008, which is a 5A high-power driver with charging and battery protection features, as well as火力 regulation and status indication.

Although the SGM41008 doesn't seem to be a direct fit for a reset chip, it's from SG Micro and may serve as a reference point for their capabilities in the power management domain. If you're looking for a reset solution, 

In [None]:
# 输出formatted_prompt
event_pairs = llama_debug.get_llm_inputs_outputs()
print(event_pairs[0][1].payload["formatted_prompt"])

In [11]:
# 检索相似度前五的片段
contexts = query_engine.retrieve(QueryBundle("What are the packaging dimensions of SGM42600?"))
print('-'*10 + 'ref' + '-'*10)
for i, context in enumerate(contexts):
    print('*'*10 + f'chunk {i} start' + '*'*10)
    content = context.node.get_content(metadata_mode=MetadataMode.LLM)
    print(content)
    print('*' * 10 + f'chunk {i} end' + '*' * 10)
print('-'*10 + 'ref' + '-'*10)

----------ref----------
**********chunk 0 start**********
file_name: SGM8425/SGM8426/SGM8428
product_name: SGM8425/SGM8426/SGM8428

The SGM8425/6/8 are suitable for low power  systems, such as portable and battery-powered  applications.  The SGM8425/6/8 have a bandwidth of 18MHz at -3dB.  They offer fast settling and slewing times. These  devices are well suited for TFT-LCDs.  The SGM8425 is available in Green SOT-23-5, SOIC-8  and MSOP-8 packages. The SGM8426 is available in  Green SOIC-8 and MSOP-8 packages. The SGM8428  is available in Green TSSOP-14 and SOIC-14 packages.  They are rated over the -40℃ to +85℃ temperature  range.    FEATURES  ● Rail-to-Rail Input and Output  ● -3dB Bandwidth: 18MHz  ● High Slew Rate: 16V/µs  ● Supply Current: 1.6mA/Amplifier  ● -40℃ to +85℃ Operating Temperature Range  ● Small Packaging:  SGM8425 Available in SOT-23-5, SOIC-8 and  MSOP-8 Packages  SGM8426 Available in SOIC-8 and MSOP-8  Packages  SGM8428 Available in SOIC-14 and TSSOP-14  Packages   