In [1]:
# 加载文档
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_path=("https://zh.wikipedia.org/wiki/黑神话：悟空"),
)
docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
# 文本分块
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)

In [4]:
# 设置嵌入模型
from langchain_openai import OpenAIEmbeddings
embed_model = OpenAIEmbeddings()

In [5]:
# 设置嵌入模型，用开源模型替换OPENAI的嵌入模型
import torch
from langchain_community.embeddings import HuggingFaceEmbeddings
embed_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-zh",
    model_kwargs={'device': 'cuda:0' if torch.cuda.is_available() else 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

  embed_model = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# 创建向量存储
all_splits = text_splitter.split_documents(docs)

from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embed_model)
vector_store.add_documents(all_splits)

['ee6afa91-6fa0-4276-8d12-4283f8d9c736',
 '18be8266-ebd5-44c5-bcb2-4156a1a64ab6',
 'f2a3011d-8708-4787-a9fb-ee620af763c5',
 '0fb2327a-a0c5-42be-aeb9-20577d0805be',
 '333cc735-2421-4f51-95d3-ae2e37e21e40',
 'f7519c3e-79bd-4921-b4a4-2a46052264f2',
 '54df40a3-7033-40fd-a9fe-f8b6b443ec31',
 'c823b6bf-641e-4192-aaf3-d2e6ca8778c3',
 '3c8fcc9a-c3ef-446a-816a-7b18560f3496',
 '4a7939b8-36b9-4007-b92b-d9739baed50a',
 'ef2c832c-45f9-4e69-956b-5c3e99f41834',
 'db6a6b44-2d7d-4689-a689-1e12964de4e4',
 'e87b25f2-b9fd-4c5f-a74c-5bb0bea7e88f',
 '05c7e900-5b69-4651-bcfd-b46cb078c8d7',
 'd734c4e2-015a-43bb-9dee-5d1462cd9c9f',
 'ab313784-9759-4e2a-ae2e-c94b787d893c',
 'dec9ab07-c014-4a8e-8014-38be80f229f4',
 'e29f7c39-d152-4517-9a43-09252336849a',
 'db9c655b-d101-4b59-9b3b-889a8cabac40',
 '333a5359-c2b2-452a-a978-85aef3d7f7b4',
 'fed5b5bb-8ac8-43b1-a21f-2f77ee67e733',
 '3e58223c-45a6-410b-80a5-87d3b314c063',
 'e2accdbf-d10c-4ccc-8d16-c6d7523fca71',
 'e053b7d3-a093-4b07-a4c7-5510f83199de',
 '85c09d3b-4b80-

In [7]:
# 构建用户查询的问题
question = "黑神话：悟空的游戏类型是什么？"

In [8]:
# 在向量存储中搜索相关文档，并准备上下文内容
retrieved_docs = vector_store.similarity_search(
    question,
    k=3
)
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)


In [9]:
# 构建提示词模板
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
    """基于以下上下文回答问题。如果上下文没有相关信息，请说
    "我无法从提供的上下文中找到相关内容"。
    上下文：{context}
    问题：{question}
    回答："""
)

In [11]:
# 使用大模型生成回答
from langchain_deepseek import ChatDeepSeek
llm = ChatDeepSeek(
    model="deepseek-chat",
    temperature=0.1,
    max_tokens=512,
    api_key="你的key"
)
answer = llm.invoke(
    prompt.format(
        question=question,
        context=docs_content
    )
)
print("\n 最终答案：",answer.content)


 最终答案： 根据提供的上下文，《黑神话：悟空》的游戏类型是**动作角色扮演游戏**（Action Role-Playing Game, ARPG）。
