In [1]:
from langchain.llms import OpenAI
from langchain.chains import LLMRequestsChain, LLMChain
from langchain.callbacks import get_openai_callback
from utils import ask_agent, ask_chain

In [2]:
llm = OpenAI(temperature=0, model_name='gpt-3.5-turbo')



In [3]:
from langchain.prompts import PromptTemplate

template = """Between >>> and <<< are the raw search result text from provided RSS URL.
Extract the answer to the question '{query}' or say "not found" if the information is not contained, and summarize all the information.
>>> {requests_result} <<<
Use the following JSON format to include all the titles:
{{
  "titles": [
    'aaa',
    'bbb',
  ]
}} 
Extracted:"""

PROMPT = PromptTemplate(
    input_variables=["query", "requests_result"],
    template=template,
)

In [4]:
chain = LLMRequestsChain(llm_chain = LLMChain(llm=llm, prompt=PROMPT))

In [5]:
question = "What are all the titles in this RSS feed?"
inputs = {
    "query": question,
    "url": "https://rss.nytimes.com/services/xml/rss/nyt/US.xml"
}


In [6]:
response = ask_chain(chain, inputs)



Total Tokens: 2453
Prompt Tokens: 2229
Completion Tokens: 224
Successful Requests: 1
Total Cost (USD): $0.004906


In [7]:
print(response['output'])

{
  "titles": [
    "Title 42 Is Gone, but Not the Conditions Driving Migrants to the U.S.",
    "‘A Daily Game of Russian Roulette’: Homeless in San Diego",
    "North Carolina Governor Vetoes Abortion Ban but Faces Override",
    "Behind the Heartbeat of Hawaii",
    "Abortion Showdown in North Carolina May Hinge on a Single Vote",
    "Nikki Haley Says Pledging a Federal Abortion Ban Wouldn’t Be ‘Honest’",
    "5 Moments That Defined Trump’s Record on Immigration",
    "Rules for Pentagon Use of Proxy Forces Shed Light on a Shadowy War Power",
    "Meet ‘Chonkosaurus,’ the Thick Snapping Turtle Stealing Hearts",
    "DeSantis Impresses Voters and Trolls Trump in Iowa Swing",
    "Before Title 42, Congress Failed to Overhaul Immigration Policy"
  ]
}

Summary: The RSS feed contains news articles on various topics such as immigration, homelessness, abortion, politics, and wildlife. The titles of the articles are provided in the raw search result text.


In [8]:
template = """在 >>> 和 <<< 之间是网页的返回的HTML内容。

网页是新浪财经A股上市公司的每季度股东信息表格。

请抽取参数请求的信息。每个截至日期作为JSON返回数据的date_of_quarter。因此，当表格中有多个截止日期时，返回数据应当包括所有的日期作为key。
 
>>> {requests_result} <<<
请使用如下的JSON格式返回数据
{{
  "date_of_quarter": [
    {{
      "holder_name": "a",
      "percentage": "50"
    }},
    {{
      "holder_name": "b",
      "percentage": "30"
    }},
  ]
}} 

例如，截至日期为2023-03-31，JSON数据应该是如下形式:

{{
  "2023-03-31": [
    {{
      "holder_name": "a",
      "percentage": "50"
    }},
    {{
      "holder_name": "b",
      "percentage": "30"
    }},
  ]
}}
Extracted:"""

PROMPT = PromptTemplate(
    input_variables=["requests_result"],
    template=template,
)

chain = LLMRequestsChain(llm_chain = LLMChain(llm=llm, prompt=PROMPT))

In [9]:
inputs = {
    "url": "https://vip.stock.finance.sina.com.cn/corp/go.php/vCI_StockHolder/stockid/600519/displaytype/30.phtml"
}

In [10]:
response = ask_chain(chain, inputs)

Total Tokens: 4097
Prompt Tokens: 3089
Completion Tokens: 1008
Successful Requests: 1
Total Cost (USD): $0.008194


In [11]:
# response = chain(inputs)
print(response['output'])

{
  "2023-03-31": [
    {
      "holder_name": "中国贵州茅台酒厂(集团)有限责任公司",
      "percentage": "54.06"
    },
    {
      "holder_name": "香港中央结算有限公司",
      "percentage": "7.26"
    },
    {
      "holder_name": "贵州省国有资本运营有限责任公司",
      "percentage": "4.54"
    },
    {
      "holder_name": "贵州茅台酒厂(集团)技术开发有限公司",
      "percentage": "2.22"
    },
    {
      "holder_name": "中央汇金资产管理有限责任公司",
      "percentage": "0.83"
    },
    {
      "holder_name": "深圳市金汇荣盛财富管理有限公司-金汇荣盛三号私募证券投资基金",
      "percentage": "0.66"
    },
    {
      "holder_name": "珠海市瑞丰汇邦资产管理有限公司-瑞丰汇邦三号私募证券投资基金",
      "percentage": "0.65"
    },
    {
      "holder_name": "中国证券金融股份有限公司",
      "percentage": "0.64"
    },
    {
      "holder_name": "中国工商银行-上证50交易型开放式指数证券投资基金",
      "percentage": "0.39"
    },
    {
      "holder_name": "中国人寿保险股份有限公司-传统-普通保险产品-005L-CT001沪",
      "percentage": "0.32"
    }
  ],
  "2022-12-31": [
    {
      "holder_name": "中国贵州茅台酒厂(集团)有限责任公司",
      "percentage": "54"
    },
    {
      "holder_

In [None]:
template = """在 >>> 和 <<< 之间是网页的返回的HTML内容。

网页展示了在携程上搜索青岛北站和灌南站之间的高铁班次的结果。

请抽取参数请求的信息。将"青岛北"和"灌南"之间的班次数量作为JSON返回数据的train_number。因此，当有多个班次时，返回数据应当包括所有的班次作为key。
注意: JSON数据中的target和source这两个key必须为"青岛北"或者"灌南".
 
>>> {requests_result} <<<
请使用如下的JSON格式返回数据
{{
  "train_number": [
    {{
      "train_id": "D2913",
      "source": "青岛北",
      "target": "灌南",
      "source_time": "07:00",
      "target_time": "09:30",
      "duration": "2小时30分",
      "price": "￥99.5"
    }},
    {{
      "train_id": "G1553",
      "source": "灌南",
      "target": "青岛北",
      "source_time": "10:00",
      "target_time": "12:30",
      "duration": "2小时51分",
      "price": "￥110"
    }},
  ]
}} 

例如，当青岛北到灌南之间班次数量为2时，JSON数据应该是如下形式:
{{
  "2": [
    {{
      "train_id": "D2913",
      "source": "青岛北",
      "target": "灌南",
      "source_time": "07:00",
      "target_time": "09:30",
      "duration": "2小时30分",
      "price": "￥99.5"
    }},
    {{
      "train_id": "G1553",
      "source": "灌南",
      "target": "青岛北",
      "source_time": "10:00",
      "target_time": "12:30",
      "duration": "2小时51分",
      "price": "￥110"
    }},
  ]
}} 
}}
Extracted:"""

PROMPT = PromptTemplate(
    input_variables=["requests_result"],
    template=template,
)

chain = LLMRequestsChain(llm_chain = LLMChain(llm=llm, prompt=PROMPT))

In [41]:
inputs = {
    "url": "https://trains.ctrip.com/webapp/train/list?ticketType=1&dStation=%E9%9D%92%E5%B2%9B%E5%8C%97&aStation=%E7%81%8C%E5%8D%97&dDate=2023-06-03&rDate=2023-06-11&trainsType=gaotie-dongche&hubCityName=&highSpeedOnly=0"
}

In [42]:
response = ask_chain(chain, inputs)

Total Tokens: 4093
Prompt Tokens: 3293
Completion Tokens: 800
Successful Requests: 1
Total Cost (USD): $0.008186


In [43]:
print(response['output'])

{
  "train_number": [
    {
      "train_id": "D2916",
      "source": "青岛北",
      "target": "灌南",
      "source_time": "07:40",
      "target_time": "10:30",
      "duration": "2小时50分",
      "price": "￥127"
    },
    {
      "train_id": "D2913",
      "source": "青岛北",
      "target": "灌南",
      "source_time": "08:02",
      "target_time": "10:30",
      "duration": "2小时28分",
      "price": "￥121"
    },
    {
      "train_id": "D2923",
      "source": "青岛西",
      "target": "灌南",
      "source_time": "08:53",
      "target_time": "11:07",
      "duration": "2小时14分",
      "price": "￥94"
    },
    {
      "train_id": "G1552",
      "source": "青岛",
      "target": "灌南",
      "source_time": "10:02",
      "target_time": "12:53",
      "duration": "2小时51分",
      "price": "￥129"
    },
    {
      "train_id": "G1553",
      "source": "青岛北",
      "target": "灌南",
      "source_time": "10:27",
      "target_time": "12:53",
      "duration": "2小时26分",
      "price": "￥123"
    },
    {