In [8]:
!pip install langchain_community langchain_openai langchainhub faiss-cpu pdfminer.six > /dev/null

In [9]:
OPENAI_API_KEY = 'OPENAI_API_KEY'
BASE_URL = 'https://open.bigmodel.cn/api/paas/v4'
MODEL_NAME = 'glm-4-plus'
# MODEL_NAME = 'glm-z1-flashx'
EMBEDDING_MODEL_NAME = 'embedding-3'
PROMT_NAME = 'hwchase17/openai-functions-agent'
TAVILY_API_KEY = 'TAVILY_API_KEY'

In [10]:
from langchain import hub

prompt = hub.pull(PROMT_NAME)
resp = prompt.invoke({
    "agent_scratchpad": ["tavily_search_results_json"],
    "input": "北京今天天气怎么样"
})
print(resp)

messages=[SystemMessage(content='You are a helpful assistant', additional_kwargs={}, response_metadata={}), HumanMessage(content='北京今天天气怎么样', additional_kwargs={}, response_metadata={}), HumanMessage(content='tavily_search_results_json', additional_kwargs={}, response_metadata={})]




In [11]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model=MODEL_NAME, api_key=OPENAI_API_KEY, base_url=BASE_URL)

In [12]:
def get_search_tool():
    from langchain_community.tools.tavily_search import TavilySearchResults
    search = TavilySearchResults(max_results=5, tavily_api_key=TAVILY_API_KEY)
    # resp = search.invoke('广东今天天气怎么样')
    # print(resp)
    return search

In [13]:
from langchain_community.document_loaders import WebBaseLoader, PDFMinerLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
import os

def get_vector_store(web_path, embedding):
    loader = PDFMinerLoader(web_path) if web_path.endswith('.pdf') else WebBaseLoader(web_path)
    docs = loader.load()
    spliter = RecursiveCharacterTextSplitter(chunk_size=2000,chunk_overlap=100)
    splited_docs = spliter.split_documents(docs)
    print(len(splited_docs))
    vector_store = FAISS.from_documents(splited_docs, embedding)
    return vector_store

def get_store_name(path):
    slash_index = path.rfind('/')+1
    dot_index = path.rfind('.',slash_index)
    store_name = path[slash_index:] if dot_index < 0 else path[slash_index:dot_index]
    return store_name    

def load_vector_store(store_name, web_path, embedding):
    if os.path.exists(store_name):
        print('加载本地向量数据库', store_name)
        return FAISS.load_local(store_name, embedding, allow_dangerous_deserialization=True)
    vector_store = get_vector_store(web_path, embedding)
    vector_store.save_local(store_name)
    print('本地向量数据库保存成功', store_name)
    return vector_store

def get_retriever_tool(path, description):
    embedding_model = OpenAIEmbeddings(
        api_key=OPENAI_API_KEY,
        base_url=BASE_URL,
        model='embedding-3',
    )
    store_name = get_store_name(path)
    vector_store = load_vector_store(store_name, path, embedding_model)
    retriever = vector_store.as_retriever(search_kwargs={'k': 5})
    tool = create_retriever_tool(retriever, f'{store_name} retriever', description=description)
    return tool

In [17]:
# tools = [get_search_tool()]
tools = [
    get_search_tool(),
    get_retriever_tool(
        'https://arxiv.org/pdf/2106.09685.pdf',
        '大语言模型的低秩矩阵适应 LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS'
    )
]

from langchain.agents import AgentExecutor
from langchain.agents import create_tool_calling_agent

agent = create_tool_calling_agent(model, tools, prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
executor.invoke({
    "input": "什么是LOW-RANK ADAPTATION，查一下它的使用原理和范围是什么？"
})

加载本地向量数据库 2106.09685


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `2106.09685 retriever` with `{'query': 'LOW-RANK ADAPTATION'}`


[0m[33;1m[1;3mLORA: LOW-RANK ADAPTATION OF LARGE LAN-
GUAGE MODELS

Phillip Wallis

Yelong Shen∗
Shean Wang

Edward Hu∗
Yuanzhi Li
Microsoft Corporation
{edwardhu, yeshe, phwallis, zeyuana,
yuanzhil, swang, luw, wzchen}@microsoft.com
yuanzhil@andrew.cmu.edu
(Version 2)

Lu Wang

Weizhu Chen

Zeyuan Allen-Zhu

1
2
0
2

t
c
O
6
1

]
L
C
.
s
c
[

2
v
5
8
6
9
0
.
6
0
1
2
:
v
i
X
r
a

ABSTRACT

An important paradigm of natural language processing consists of large-scale pre-
training on general domain data and adaptation to particular tasks or domains. As
we pre-train larger models, full ﬁne-tuning, which retrains all model parameters,
becomes less feasible. Using GPT-3 175B as an example – deploying indepen-
dent instances of ﬁne-tuned models, each with 175B parameters, is prohibitively
expensive. We propose Low-Rank Adaptation, or

{'input': '什么是LOW-RANK ADAPTATION，查一下它的使用原理和范围是什么？',
 'output': 'Low-Rank Adaptation (LoRA) 是一种用于自然语言处理模型的适应方法，特别适用于大规模预训练模型。其核心思想是在保持预训练模型权重不变的情况下，通过在每个Transformer层中注入可训练的低秩分解矩阵来减少下游任务中可训练参数的数量。这种方法的主要优点包括：\n\n1. **减少可训练参数**：相比于完全微调（fine-tuning）所有模型参数，LoRA显著减少了需要训练的参数数量。例如，在GPT-3 175B模型上，LoRA可以将可训练参数减少10,000倍，GPU内存需求减少3倍。\n\n2. **提高训练效率**：由于减少了参数数量，LoRA具有更高的训练吞吐量。\n\n3. **保持模型质量**：尽管减少了可训练参数，LoRA在RoBERTa、DeBERTa、GPT-2和GPT-3等模型上的表现与完全微调相当甚至更优。\n\n4. **无额外推理延迟**：与适配器（adapter）方法不同，LoRA不会引入额外的推理延迟。\n\n### 使用原理\n\nLoRA的工作原理基于以下观察：预训练语言模型在适应特定任务时，其权重的更新具有低“内在秩”。具体来说，对于预训练的权重矩阵 \\( W_0 \\)，其更新被约束为低秩分解的形式 \\( W_0 + \\Delta W = W_0 + BA \\)，其中 \\( B \\in \\mathbb{R}^{d \\times r} \\)，\\( A \\in \\mathbb{R}^{r \\times k} \\)，且秩 \\( r \\ll \\min(d, k) \\)。在训练过程中，\\( W_0 \\) 被冻结，而 \\( A \\) 和 \\( B \\) 包含可训练参数。\n\n### 应用范围\n\nLoRA适用于需要在大规模预训练模型上进行特定任务或领域适应的场景，尤其在这些模型参数量巨大、完全微调不切实际的情况下。具体应用包括但不限于：\n\n- **文本分类**：在预训练模型基础上适应特定分类任务。\n- **问答系统**： fine-tuning模型以更好地回答特定领域的问题。\n- **文本生成**：调整模型以生成特定风格的文本。