## 1. 根据课堂RAG示例，完成外部文档导入并进行RAG检索的过程。
### 外部PDF文档：https://storage.googleapis.com/deepmind-media/Era-of-Experience%20/The%20Era%20of%20Experience%20Paper.pdf
### 使用 langchain_community.document_loaders.PDFMinerLoader 加载 PDF 文件。
### docs = PDFMinerLoader(path).load()



In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv, find_dotenv
import os
from langchain_community.document_loaders import PDFMinerLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain_core.runnables import RunnablePassthrough


if __name__ == '__main__':

    load_dotenv(find_dotenv())

    llm = ChatOpenAI(
        model="gpt-4o-mini",
        api_key=os.environ['API_KEY'],
        base_url=os.environ['BASE_URL']
    )

    # 存储 vector store 向量数据库k/v
    # 创建向量数据库（内存中），对chunk进行向量化和存储
    embedding_model = OpenAIEmbeddings(
        api_key=os.environ['API_KEY'],
        base_url=os.environ['BASE_URL']
    )
    
    if not os.path.exists('/Users/peiqi/code/AiPremiumClass/李思佳/week15/local_save'):

        # 加载网页中文本内容，转换为langchain处理的document
        loader = PDFMinerLoader(file_path='/Users/peiqi/code/AiPremiumClass/李思佳/week15/The Era of Experience Paper.pdf')

        docs = loader.load()

        # TextSplitter实现加载后的文档的分割
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, 
            chunk_overlap=200, 
            separators=['\n\n','\n','']
        )
        splited_docs = splitter.split_documents(docs)

        vector_store = FAISS.from_documents(
            documents=splited_docs, 
            embedding=embedding_model
        )

        vector_store.save_local('/Users/peiqi/code/AiPremiumClass/李思佳/week15/local_save')
        print('faiss数据库本地化保存成功')
    else:
        vector_store = FAISS.load_local(
            '/Users/peiqi/code/AiPremiumClass/李思佳/week15/local_save',
            embeddings=embedding_model,
            allow_dangerous_deserialization=True                      
            )

        print('加载faiss数据库本地化记录成功')

    # 构建检索器
    retriever = vector_store.as_retriever(
        search_type="similarity", 
        search_kwargs={"k": 6}
    ) 
    
    docs = retriever.invoke("actions and observations")
    # retrieved_docs = retriever.invoke("任务分解的⽅法有哪些?")
    # 构建rag chain

    def format_docs(docs):
        return '\n\n'.join([doc.page_content for doc in docs])
    
    prompt = hub.pull("rlm/rag-prompt")

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()} 
        | prompt 
        | llm 
        | StrOutputParser())
    
    respons = rag_chain.invoke("What's the main actions and observations, please translate to Chinese")
    print(respons)



加载faiss数据库本地化记录成功




主要的动作和观察包括：代理人会在实际环境中自主行动，利用来自环境的信号和数据来调整其行为和学习方法。同时，代理人通过观察结果并相应更新原则，从而不断优化其理解以促进长期成功。通过这种方式，代理人能够从外部事件和信号中学习，而不仅仅依赖于人类的偏好。
