In [13]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv, find_dotenv
from langchain_community.document_loaders import WebBaseLoader, PyMuPDFLoader, PDFMinerLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain import hub

import os


In [14]:
load_dotenv(find_dotenv())

llm = ChatOpenAI(
    model='glm-4-air-250414',
    api_key=os.getenv("API_KEY"),
    base_url=os.getenv("BASE_URL"),
)

embedding_model = OpenAIEmbeddings(
    model='embedding-3',
    api_key=os.getenv('API_KEY'),
    base_url=os.environ['BASE_URL'],
    chunk_size=64
)

from langchain_core.messages import HumanMessage, SystemMessage

x = embedding_model.embed_query("eeeee")
print(len(x))

2048


In [15]:


if not os.path.exists('local_save'):
    loader = PDFMinerLoader(file_path='The Era of Experience Paper.pdf')
    docs = loader.load()

    # TextSplitter实现加载后Document分割
    splitter = RecursiveCharacterTextSplitter(
        separators=['\n\n','\n',''],
        chunk_size=1000,
        chunk_overlap=100,
    )
    splited_docs = splitter.split_documents(docs)

    # 创建向量数据库（内存中）对chunk进行向量化和存储
    vector_store = FAISS.from_documents(
        documents=splited_docs,
        embedding=embedding_model,
    )
    # 向量数据库本地化存储
    vector_store.save_local('local_save')
    print('faiss数据库本地化保存成功！')
else:
    vector_store = FAISS.load_local(
        'local_save',
        embeddings=embedding_model,
        allow_dangerous_deserialization=True
    )
    print('加载faiss数据库本地化记录成功！')

加载faiss数据库本地化记录成功！


In [16]:
from langchain_tavily import TavilySearch
search_tool = TavilySearch(max_results=5, tavily_api_key=os.getenv('TAVILY_API_KEY'))


In [19]:
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.tools import create_retriever_tool

vector_store = FAISS.load_local(
    'local_save',
    embeddings=embedding_model,
    allow_dangerous_deserialization=True
)
retriever = vector_store.as_retriever(search_kwargs={"k":2})

retriever_tool = create_retriever_tool(
    retriever, "book_retriever",
    description="围绕《把时间当作朋友》核心内容，从心智力量、时间管理、自律坚持、兴趣与努力的关系、对成功学的反思等多方面，阐述了通过开启和管理心智、合理规划时间、克服懒惰、持续行动等实现个人成长与改变的理念。"
)
# prompt
prompt = hub.pull("hwchase17/openai-functions-agent")
tools = [search_tool,retriever_tool]
agent = create_tool_calling_agent(
    llm=llm, prompt=prompt, tools=tools)

# agent executor
executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True)

msgs = executor.invoke({"input":"what is the era of experience?"})

print(msgs['output'])





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `tavily_search` with `{'query': 'what is the era of experience'}`


[0m[36;1m[1;3m{'query': 'what is the era of experience', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://medium.com/@la_boukouffallah/welcome-to-the-era-of-experience-028440180a86', 'title': 'Welcome to The Era of Experience - Medium', 'content': "The Era of Experience is a bold vision for AI's future. By moving beyond human data and learning through interaction with the world, AI could achieve", 'score': 0.9307171, 'raw_content': None}, {'url': 'https://bdtechtalks.substack.com/p/ais-era-of-experience', 'title': 'AI\'s "Era of Experience" - by Ben Dickson - TechTalks', 'content': 'The "Era of Experience" envisions AI\'s evolution beyond human data, emphasizing self-learning from real-world interactions. But challenges loom', 'score': 0.8687491, 'raw_content': None}, {'url': 'https://storage.googleapis.com

In [20]:
import requests

url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)

# 检查是否成功
if response.status_code == 200:
    with open("input.txt", "w", encoding="utf-8") as f:
        f.write(response.text)
    print("下载完成，已保存为 input.txt")
else:
    print(f"下载失败，状态码：{response.status_code}")

下载完成，已保存为 input.txt
