In [2]:

import sys

sys.path.append("..")  # 添加父目录到 Python 路径

In [8]:
from IPython.display import display, Markdown


def format_response(response):
    """Format the response with proper styling and structure."""
    from IPython.display import display, Markdown
    import re
    
    # Convert the response to string if it's not already
    content = str(response)
    
    # Process the content to improve readability
    # 1. Split into sentences and add line breaks
    sentences = content.replace('。', '。\n\n')
    sentences = sentences.replace('！', '！\n\n')
    sentences = sentences.replace('？', '？\n\n')
    
    # 2. Handle lists and enumerations
    sentences = re.sub(r'(\d+[\.、])', r'\n\1', sentences)
    
    # 3. Handle special punctuation for Chinese text
    sentences = sentences.replace('：', '：\n')
    sentences = sentences.replace('；', '；\n')
    
    # 4. Clean up multiple newlines
    sentences = re.sub(r'\n\s*\n', '\n\n', sentences)
    
    # Add markdown styling with better spacing
    formatted_text = f"""
### 查询结果

{sentences.strip()}

---
*Generated by LlamaIndex RAG System*
"""
    
    # Display as markdown for better formatting
    display(Markdown(formatted_text))

In [9]:
from src.llms.siliconflow import SiliconflowLLM
from llama_index.core import Settings


api_key = "sk-gxjqtuvbqdcmuicsazotvnvuzpvaqpukrjwcisumxytblhxx"


llm = SiliconflowLLM(
    api_key=api_key,
    api_base="https://api.siliconflow.cn/v1",
    model_name="Qwen/Qwen2.5-VL-72B-Instruct",
)

res = llm.complete("你好，介绍一下自己？")
# print(res.text)  # 使用 flush=True 确保立即输出
format_response(res)

Settings.llm = llm





### 查询结果

你好！

我是来自阿里云的大规模语言模型，我叫通义千问。

我是阿里云自主研发的超大规模语言模型，能够回答问题、创作文字，还能表达观点、撰写代码。

如果您有任何问题或需要帮助，请随时告诉我，我会尽力提供支持。

---
*Generated by LlamaIndex RAG System*


In [10]:
# 设置 embedding 模型, 这里我门使用自定义的硅流embedding
from llama_index.core import Settings
from src.embeddings.siliconflow import SiliconflowEmbedding


api_key = "sk-gxjqtuvbqdcmuicsazotvnvuzpvaqpukrjwcisumxytblhxx"
model_name = "BAAI/bge-large-en-v1.5"
Settings.embed_model = SiliconflowEmbedding(
    model_name=model_name, api_key=api_key, embed_batch_size=100
)

Settings.chunk_size = 512
Settings.chunk_overlap = 32

print(Settings.chunk_size, Settings.chunk_overlap)
print(Settings.embed_model.model_name)

512 32
BAAI/bge-large-en-v1.5


In [11]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# 加载文档
documents = SimpleDirectoryReader("../data/xlsx/").load_data()

# 创建索引
index = VectorStoreIndex.from_documents(documents)

# 创建查询引擎
query_engine = index.as_query_engine()

format_response("数据构建完成！")


### 查询结果

数据构建完成！

---
*Generated by LlamaIndex RAG System*


In [12]:
response = query_engine.query("阳台所用材料一共耗费多少钱?")
format_response(response)



### 查询结果

阳台所用材料的总耗费为
44448.26元中的部分，具体计算如下：

- 柜体：

4.176平方米，使用生态澳松板。

- 门板：

4.176平方米，使用进口PET。

- 拉手：
4个，单价680元，总价2720元。

- 铰链：
16个，单价60元，总价960元。

由于柜体和门板的单价未直接给出，我们只能计算给出单价的部分：

- 拉手总价：
2720元
- 铰链总价：
960元

因此，阳台所用材料中可计算的部分总耗费为2720元 + 960元 = 3680元。

柜体和门板的总耗费需要根据单价计算，但单价信息未提供，所以这部分无法计算。

如果需要完整计算阳台的总耗费，需要补充柜体和门板的单价信息。

---
*Generated by LlamaIndex RAG System*


In [14]:
response = query_engine.query("What's the 2023's current assets?")
format_response(response)


### 查询结果

The current assets for 2023 are 1,880,
000. This figure is derived from the sum of the current assets listed, which includes Cash, Accounts receivable, and Inventory.

---
*Generated by LlamaIndex RAG System*


In [15]:
response = query_engine.query("What's the 2024's current assets?")
format_response(response)


### 查询结果

The provided information does not specify the year 2024 or any financial data associated with that year. Therefore, it's not possible to determine the current assets for 2024 based on the given details. The data provided includes figures for current assets such as cash, accounts receivable, and inventory, but without a specific year mentioned, these cannot be attributed to 
2024.

---
*Generated by LlamaIndex RAG System*
