# Langchain集成

In [1]:
# restart kernel
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

In [2]:
import boto3
import pprint
from botocore.client import Config
from langchain.llms.bedrock import Bedrock
from langchain.retrievers.bedrock import AmazonKnowledgeBasesRetriever

pp = pprint.PrettyPrinter(indent=2)

kb_id = "VW9HMVFQT2" # replace it with your Knowledge base id.

bedrock_config = Config(connect_timeout=120, read_timeout=120, retries={'max_attempts': 0})
bedrock_client = boto3.client('bedrock-runtime')
bedrock_agent_client = boto3.client("bedrock-agent-runtime",
                              config=bedrock_config
                              )

model_kwargs_claude = {
    "temperature": 0,
    "top_k": 10,
    "max_tokens_to_sample": 3000
}

llm = Bedrock(model_id="anthropic.claude-v2",
              model_kwargs=model_kwargs_claude,
              client = bedrock_client,)

### Retrieve API:流程

从LangChain创建一个`AmazonKnowledgeBasesRetriever`对象,它将调用由Amazon Bedrock的Knowledge Bases提供的`Retreive API`,将用户查询转换为嵌入向量,搜索知识库,并返回相关结果,让您可以对基于语义搜索结果构建自定义工作流程。`Retreive API`的输出包括`retrieved text chunks`、源数据的`location type`和`URI`,以及检索的`scores`。

In [3]:

retriever = AmazonKnowledgeBasesRetriever(
        knowledge_base_id=kb_id,
        retrieval_config={"vectorSearchConfiguration": {"numberOfResults": 4}},
        # endpoint_url=endpoint_url,
        # region_name="us-east-1",
        # credentials_profile_name="<profile_name>",
    )
docs = retriever.get_relevant_documents(
        query="amazon 核心业务有哪些?"
    )
pp.pprint(docs)

[ Document(page_content='表2：AWS 核心经营指标   单位：百万美元 Q1-22 Q2-22 Q3-22 Q4-22 Q1-23   北美销售收入 69,244 74,430 78,843 93,363 76,881   YOY 8% 10% 20% 13% 11%   营业利润 -1568 -627 -412 -240 898   营业利润率 -2.3% -0.8% -0.5% -0.3% 1.2%   国际销售收入 28,759 27,065 27,720 34,463 29,123   YOY -6% -12% -5% -8% 1%   营业利润 -1281 -1771 -2466 -2228 -1247   营业利润率 -4.5% -6.5% -8.9% -6.5% -4.3%   AWS 18,441 19,739 20,538 21,378 21,354   YOY 37% 33% 27% 20% 16%   营业利润 6,518.00 5,715.00 5,403.00 5,205.00 5,123.00   营业利润率 35.3% 29.0% 26.3% 24.3% 24.0%   资料来源：公司公告、国信证券经济研究所整理   主要业务分析：   ①零售走出周期底部，利润率持续修复。总体零售板块营收稳步增长、利润持续   修复，逐渐走出疫情与加息周期宏观的影响。在本季度，北美地区的广义零售业   务同比上涨了 11%，这一增长速度超过了美国零售总收入的一季度增长率 7%。利   润率的增加主要是由于运营成本的减少。过去的一年里，亚马逊裁减了超过   70,000 名员工，并且停止了物流仓库基础设施以及在线医疗等次要业务。在今年   的第一季度，亚马逊继续大幅度削减了员工和基础设施等运营成本。', metadata={'location': {'type': 'S3', 's3Location': {'uri': 's3://smartcrawlerstack-knowledgebasebucketc011dd60-wgczt2xdmxhe/file_260cd4f8-e86d-4792-9741-8613e5b745d8.pdf'}}, 'score': 0.76235014}),
  Document(page_con

`score`: 你可以查看每个返回的文本块的相关分数,这个分数表示它与查询条件的相关程度,即它与查询条件的匹配程度有多紧密。

In [4]:
from langchain.prompts import PromptTemplate

PROMPT_TEMPLATE = """
    Human: You are a financial advisor AI system, and provides answers to questions by using fact based and statistical information when possible. 
    Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    <context>
    {context}
    </context>

    <question>
    {question}
    </question>

    The response should be specific and use statistics or numbers when possible.

    Assistant:"""
claude_prompt = PromptTemplate(template=PROMPT_TEMPLATE, 
                                input_variables=["context","question"])

In [5]:
# fetch context from the response
def get_contexts(docs):
    contexts = []
    for retrievedResult in docs: 
        contexts.append(retrievedResult.page_content)
    return contexts

In [6]:
contexts = get_contexts(docs)
pp.pprint(contexts)

[ '表2：AWS 核心经营指标   单位：百万美元 Q1-22 Q2-22 Q3-22 Q4-22 Q1-23   北美销售收入 69,244 '
  '74,430 78,843 93,363 76,881   YOY 8% 10% 20% 13% 11%   营业利润 -1568 -627 -412 '
  '-240 898   营业利润率 -2.3% -0.8% -0.5% -0.3% 1.2%   国际销售收入 28,759 27,065 27,720 '
  '34,463 29,123   YOY -6% -12% -5% -8% 1%   营业利润 -1281 -1771 -2466 -2228 '
  '-1247   营业利润率 -4.5% -6.5% -8.9% -6.5% -4.3%   AWS 18,441 19,739 20,538 '
  '21,378 21,354   YOY 37% 33% 27% 20% 16%   营业利润 6,518.00 5,715.00 5,403.00 '
  '5,205.00 5,123.00   营业利润率 35.3% 29.0% 26.3% 24.3% 24.0%   '
  '资料来源：公司公告、国信证券经济研究所整理   主要业务分析：   ①零售走出周期底部，利润率持续修复。总体零售板块营收稳步增长、利润持续   '
  '修复，逐渐走出疫情与加息周期宏观的影响。在本季度，北美地区的广义零售业   务同比上涨了 11%，这一增长速度超过了美国零售总收入的一季度增长率 '
  '7%。利   润率的增加主要是由于运营成本的减少。过去的一年里，亚马逊裁减了超过   70,000 '
  '名员工，并且停止了物流仓库基础设施以及在线医疗等次要业务。在今年   的第一季度，亚马逊继续大幅度削减了员工和基础设施等运营成本。',
  '表2：AWS 核心经营指标   单位：百万美元 Q1-22 Q2-22 Q3-22 Q4-22 Q1-23   北美销售收入 69,244 '
  '74,430 78,843 93,363 76,881   YOY 8% 10% 20% 13% 11%   营业利润 -1568 -627 -412 '
  '-240 898   营业利润率 -2.3% -0.8% 

### Initiate the user prompt and response via the LLM

Here, we are going to format our prompt using the context generated by the retrieve API as well as the user query to get the final response.

In [7]:
query = "amazon 核心业务有哪些?"
prompt = claude_prompt.format(context=contexts, 
                                 question=query)

In [8]:
response = llm(prompt)
pp.pprint(response)

(' 根据给定的内容,亚马逊的核心业务主要包括:\n'
 '\n'
 '1. 零售业务,包括线上自营零售、线下实体零售、第三方卖家服务、订阅服务和广告业务。\n'
 '\n'
 '2. 云服务业务 AWS,是亚马逊增长最快的业务板块。\n'
 '\n'
 '3. AI 技术应用,有望提升零售和云服务业务的效率和质量。')


## 将上面定义的检索器(retriever)和大型语言模型(LLM)与`RetrievalQA`链集成,构建问答应用程序。

In [9]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(
                                    llm=llm,
                                    chain_type="stuff",
                                    retriever=retriever,
                                    return_source_documents=True,
                                    chain_type_kwargs={"prompt": claude_prompt}
                                )

In [10]:
answer = qa(query)
print(answer['result'])

 根据上下文提供的信息,亚马逊的核心业务主要包括:

1. 零售业务,包括线上自营零售、线下实体零售、第三方卖家服务、订阅服务和广告业务。

2. 云服务业务 AWS,是亚马逊的第二大收入来源和利润支柱。

3. AI 技术应用,有望进一步提升零售和云服务业务的运营效率和盈利能力。

4. 根据第一季度财报,零售业务营收占比最大,其次是AWS云服务和第三方卖家服务。


In [13]:
import os

from langchain_community.document_loaders import CSVLoader
from langchain_community.document_loaders import BSHTMLLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import WikipediaLoader

from langchain_community.vectorstores import Chroma

from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.chat_models import BedrockChat
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.chains import TransformChain, LLMChain, SimpleSequentialChain, SequentialChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.agents import load_tools, initialize_agent, AgentType
import boto3

import configparser

from langchain.prompts import PromptTemplate

PROMPT_TEMPLATE = """Use the following format:
Question: {question}
Thought: summarize based on Observation
Action: the action to take, should be one of: {tool_names}
Action Input: the input to the action
Observation: the result of the action
Thought: do not repeat Question and Thought, summarize based on Observation and ouput Final Answer
Final Answer: the final answer to the original input question
Please respect the order of the steps Thought/Action/Action Input/Observation
"""

agent_prompt = PromptTemplate(template=PROMPT_TEMPLATE, 
                                input_variables=["tool_names","question"])


# Konfigurationsdatei laden
# config = configparser.ConfigParser()
# config.read('config.ini')
# sections = config.sections()  
# print(sections)
session = boto3.Session()

def run_demo(session,prompt):
    bedrock = session.client('bedrock')
    bedrock_runtime = session.client('bedrock-runtime', region_name="us-east-1")

    model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
    model_kwargs = { "temperature": 0.0, 'max_tokens_to_sample': 200 }

    # serpapi_api_key = config.get('serpapi', 'api_key')
    # Set SerpApi ApiKey
    os.environ["SERPAPI_API_KEY"] = 'a724eed26f083f0b890c7fcb907242e9b999bf2ed5d54172f4671550697f290d'# 这里需要改为自己的SERPAPI_API_KEY

    demo_agents_serpapi(bedrock_runtime, prompt=prompt)

    
def demo_agents_serpapi(bedrock_runtime, 
                        embedding_model_id : str = "amazon.titan-embed-text-v1", 
                        llm_model_id : str = "anthropic.claude-v2", 
                        llm_model_kwargs : dict = { "temperature": 0.0 },
                        prompt = ""):

    print("Call demo_agents_serpapi")

    embeddings = BedrockEmbeddings(
        client = bedrock_runtime,
        model_id = embedding_model_id
    )

    llm = BedrockChat(
        client = bedrock_runtime,
        model_id = llm_model_id,
        model_kwargs = llm_model_kwargs,
    )

    tools = load_tools(['serpapi', 'llm-math'], llm=llm)

    agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose = True, max_iterations = 5,handle_parsing_errors="Return summary according to Observation and complete search result")
    
    query = "amazon 核心业务有哪些?"
    prompt = agent_prompt.format(tool_names=tools,question=query)

    result = agent.run(input=prompt)


    print(result)

run_demo(session,query)

Call demo_agents_serpapi


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Question: amazon 核心业务有哪些?
Thought: I will search for the answer to this question.
Action: Search
Action Input: amazon core business[0m
Observation: [36;1m[1;3m['What is Amazon\'s main business? Amazon divides its business into three segments: North America, International, and AWS (Amazon Web Services). "North America" ...', "The core elements of Amazon's business strategy are its broad product range, affordable prices, and quick shipping via an advanced logistics network. The ...", 'Amazon has five main business models, an eCommerce, a Cloud Platform with AWS, runs subscriptions with Prime, third-party sellers and it also ...', 'Although Amazon deals in numerous products and has set foot in multiple industries, its core business model is based on an ecommerce market ...', 'The business model canvas of Amazon works with several partners and external stakeholders to deliver the best possible service

In [14]:
prompt

"\n    Human: You are a financial advisor AI system, and provides answers to questions by using fact based and statistical information when possible. \n    Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags. \n    If you don't know the answer, just say that you don't know, don't try to make up an answer.\n    <context>\n    ['表2：AWS 核心经营指标   单位：百万美元 Q1-22 Q2-22 Q3-22 Q4-22 Q1-23   北美销售收入 69,244 74,430 78,843 93,363 76,881   YOY 8% 10% 20% 13% 11%   营业利润 -1568 -627 -412 -240 898   营业利润率 -2.3% -0.8% -0.5% -0.3% 1.2%   国际销售收入 28,759 27,065 27,720 34,463 29,123   YOY -6% -12% -5% -8% 1%   营业利润 -1281 -1771 -2466 -2228 -1247   营业利润率 -4.5% -6.5% -8.9% -6.5% -4.3%   AWS 18,441 19,739 20,538 21,378 21,354   YOY 37% 33% 27% 20% 16%   营业利润 6,518.00 5,715.00 5,403.00 5,205.00 5,123.00   营业利润率 35.3% 29.0% 26.3% 24.3% 24.0%   资料来源：公司公告、国信证券经济研究所整理   主要业务分析：   ①零售走出周期底部，利润率持续修复。总体零售板块营收稳步增长、利润持续   修复，逐渐走出疫情与加息周期宏观的影响。在本季度，北美地区的广义零售业   务同比上涨