### 1. 连接大模型

In [35]:
from utils import get_ernie_models
from utils import get_qwen_models

In [36]:
llm, chat, embed = get_qwen_models()

### 2. 基本RAG系统的构成

### 2.1. 引入必要的库和包

In [37]:
# 解析 Web 页面的库（用面向对象的方式来封装 HTML 页面）
import bs4
# hub 生态中心
from langchain import hub
# 引入 Chroma 向量库
from langchain_chroma import Chroma
# 在线加载网页
from langchain_community.document_loaders import WebBaseLoader
# 输出解析器
from langchain_core.output_parsers import StrOutputParser
# 可执行的占位符
from langchain_core.runnables import RunnablePassthrough
# 文档切分器
from langchain_text_splitters import RecursiveCharacterTextSplitter

### 2.2 加载页面

In [38]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

In [39]:
# 加载数据
docs = loader.load()

### 2.3 文本分割

In [40]:
# 递归式 字符级 文本 切分器
"""
    chunk_size: 建议段落大小
"""
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

### 2.4 向量化并入库

In [41]:
len(splits)

142

In [42]:
# 底层存储使用的 SQLite，没有指定存储位置的话，则在内存中建立临时库
vectorstore = Chroma.from_documents(documents=splits[:6], embedding=embed)

In [43]:
my_splits = splits[6:]

In [44]:
len(my_splits)

136

In [45]:
for idx in range(23):
    my_docs = my_splits[idx * 6: (idx + 1) * 6]
    vectorstore.add_documents(documents=my_docs)

### 2.5 RAG系统搭建

In [46]:
# 把向量操作封装为一个基本检索器
retriever = vectorstore.as_retriever()

In [47]:
# prompt = hub.pull("rlm/rag-prompt")

In [65]:
from langchain_core.prompts import ChatPromptTemplate

# RAG系统经典的 Prompt (A 增强的过程)
prompt = ChatPromptTemplate.from_messages([
  ("human", """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
如果上下文中没有出现跟问题相关的信息，请直接回答不知道即可！
Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:""")
])

In [66]:
print(prompt.invoke(input={"context": "我的参考上下文", "question":"我是问题"}).messages[0].content)

You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
如果上下文中没有出现跟问题相关的信息，请直接回答不知道即可！
Use three sentences maximum and keep the answer concise.
Question: 我是问题 
Context: 我的参考上下文 
Answer:


In [67]:
# 把检索到的4条上下文的文本使用 \n\n 练成一个大的字符串
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [68]:
# RAG 链
rag_chain = (
    {"context": retriever | format_docs, 
     "question": RunnablePassthrough()}
    | prompt
    | chat
    | StrOutputParser()
)

In [69]:
rag_chain.invoke(input="What is Task Decomposition?")

'Task Decomposition is a method of breaking down complex tasks into simpler, more manageable sub-tasks or steps. This can be achieved through various approaches, including using language models with specific prompts, employing task-specific instructions, or incorporating human input. The technique helps in understanding and executing complex activities by making them easier to process and solve.'

In [70]:
# 问一个无关问题，大模型不会回答
rag_chain.invoke(input="中国的首都在哪里？")

'中国的首都是北京。'

In [71]:
chat.invoke(input="中国的首都在哪里？")

AIMessage(content='中国的首都是北京。', response_metadata={'model_name': 'qwen-max', 'finish_reason': 'stop', 'request_id': 'd44ddc45-6e40-99d6-80b9-52acafbc8448', 'token_usage': {'input_tokens': 13, 'output_tokens': 5, 'total_tokens': 18}}, id='run-7fce99f3-da6b-4904-9356-1a97a4f64039-0')