In [34]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv, find_dotenv
from langchain_community.document_loaders import WebBaseLoader, PyMuPDFLoader, PDFMinerLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain import hub

import os


In [27]:
load_dotenv(find_dotenv())

llm = ChatOpenAI(
    model='glm-4-air-250414',
    api_key=os.getenv("API_KEY"),
    base_url=os.getenv("BASE_URL"),
)

embedding_model = OpenAIEmbeddings(
    model='embedding-3',
    api_key=os.getenv('API_KEY'),
    base_url=os.environ['BASE_URL'],
    chunk_size=64
)

from langchain_core.messages import HumanMessage, SystemMessage

x = embedding_model.embed_query("eeeee")
print(len(x))

2048


In [32]:


if not os.path.exists('local_save'):
    loader = PDFMinerLoader(file_path='The Era of Experience Paper.pdf')
    docs = loader.load()

    # TextSplitter实现加载后Document分割
    splitter = RecursiveCharacterTextSplitter(
        separators=['\n\n','\n',''],
        chunk_size=1000,
        chunk_overlap=100,
    )
    splited_docs = splitter.split_documents(docs)

    # 创建向量数据库（内存中）对chunk进行向量化和存储
    vector_store = FAISS.from_documents(
        documents=splited_docs,
        embedding=embedding_model,
    )
    # 向量数据库本地化存储
    vector_store.save_local('local_save')
    print('faiss数据库本地化保存成功！')
else:
    vector_store = FAISS.load_local(
        'local_save',
        embeddings=embedding_model,
        allow_dangerous_deserialization=True
    )
    print('加载faiss数据库本地化记录成功！')

faiss数据库本地化保存成功！


In [33]:
vector_store = FAISS.load_local(
    'local_save',
    embeddings=embedding_model,
    allow_dangerous_deserialization=True
)
retriever = vector_store.as_retriever(search_kwargs={"k":2})
def format_docs(docs):
    return '\n\n'.join([doc.page_content for doc in docs])

# prompt
prompt = hub.pull('rlm/rag-prompt')

rag_chain = (
    {"context": retriever | format_docs , "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# rag检索
response = rag_chain.invoke("what is the era of experience?")
print(response)



The era of experience is characterized by AI agents learning from vast, continuous streams of experiential data, breaking beyond human-centric limitations by grounding actions and rewards in the environment. It involves improving reinforcement learning concepts for long-term, autonomous interactions and developing new methods for reward functions, value estimation, and world modeling. This era aims to enable AI to surpass human knowledge and priors.
