# RAG using Upstage Document Parse and Groundedness Check
# 使用 Upstage 文档解析和事实核查的 RAG 示例

This example illustrates RAG using [Upstage](https://python.langchain.com/docs/integrations/providers/upstage/) Document Parse and Groundedness Check.
本示例展示了如何使用 [Upstage](https://python.langchain.com/docs/integrations/providers/upstage/) 的文档解析和事实核查功能来实现 RAG。

In [None]:
# 导入所需的类型和模块
from typing import List

from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables.base import RunnableSerializable
from langchain_upstage import (
    ChatUpstage,
    UpstageDocumentParseLoader,
    UpstageEmbeddings,
    UpstageGroundednessCheck,
)

# 初始化 Upstage 聊天模型
model = ChatUpstage()

# 设置要处理的 PDF 文件路径
files = ["/PATH/TO/YOUR/FILE.pdf", "/PATH/TO/YOUR/FILE2.pdf"]

# 创建文档加载器，使用 element 分割方式
loader = UpstageDocumentParseLoader(file_path=files, split="element")

# 加载文档
docs = loader.load()

# 创建向量存储，使用 SOLAR 嵌入模型
vectorstore = DocArrayInMemorySearch.from_documents(
    docs, embedding=UpstageEmbeddings(model="solar-embedding-1-large")
)
retriever = vectorstore.as_retriever()

# 设置提示模板
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

# 获取相关文档
retrieved_docs = retriever.get_relevant_documents("How many parameters in SOLAR model?")

# 初始化事实核查器
groundedness_check = UpstageGroundednessCheck()
groundedness = ""
# 循环直到回答被认为是有根据的
while groundedness != "grounded":
    chain: RunnableSerializable = RunnablePassthrough() | prompt | model | output_parser

    # 执行查询链
    result = chain.invoke(
        {
            "context": retrieved_docs,
            "question": "How many parameters in SOLAR model?",
        }
    )

    # 进行事实核查
    groundedness = groundedness_check.invoke(
        {
            "context": retrieved_docs,
            "answer": result,
        }
    )