## chat_agent

In [1]:
import json

from ChatAgent import ChatgptAgent, ConversationAgent
chat_agent = ChatgptAgent()
conversation = ConversationAgent(chat_agent.session)

In [2]:
chat_agent.ask_chat("说一个笑话", conversation)

2023-06-28 16:07:23,201 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "7a912333-13d2-466c-bf13-b55ddb2853c6", "author": {"role": "assistant", "name": n...


'当然！这是一个经典的笑话：\n\n有一天，小明去参加驾驶考试。考官问他：“小明，你在红灯时应该做什么？” \n\n小明思考片刻后回答：“嗯，我会按下收藏按钮。”\n\n考官一脸困惑地问：“收藏按钮？你说的是哪个按钮？”\n\n小明得意地回答：“是手机上的收藏按钮！每当我看到红灯亮起，我就会拿出手机，把这个美丽的瞬间收藏起来。”'

## 自定义LLM chat_agent_LLM

In [2]:
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
class ChatAgentLLM(LLM):
    chat_agent: ChatgptAgent
    conversation: ConversationAgent

    @property
    def _llm_type(self) -> str:
        return "ChatAgent"

    def _call(
            self,
            prompt: str,
            stop: Optional[List[str]] = None,
            run_manager: Optional[CallbackManagerForLLMRun] = None,
            **kwargs: Any,
    ) -> str:
        return chat_agent.ask_chat(prompt, conversation)

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"chat_agent": self.chat_agent, "conversation": self.conversation}


chat_agent_LLM = ChatAgentLLM(chat_agent=chat_agent, conversation=conversation)

## Langchain 包装的 OpenAI

In [4]:
from langchain.llms import OpenAI
openai = OpenAI(temperature=0.0)

## 测试 PydanticOutputParser

In [5]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator

# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "?":
            raise ValueError("Badly formed question!")
        return field


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

In [6]:
from langchain.prompts import  PromptTemplate
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
_input = prompt.format_prompt(query=joke_query)
output = chat_agent_LLM(_input.to_string())
parser.parse(output)

2023-06-28 16:08:02,828 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "f3fda041-ddaa-4c26-b8ac-3a90f4e26197", "author": {"role": "assistant", "name": n...


Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything!')

## DatetimeOutputParser

In [11]:
from langchain.output_parsers import ResponseSchema, DatetimeOutputParser
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
output_parser = DatetimeOutputParser()
template = """Answer the users question:

{question}

{format_instructions}"""
prompt = PromptTemplate.from_template(
    template,
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)


In [12]:
chain_chat_agent_LLM = LLMChain(prompt=prompt, llm=chat_agent_LLM)
chain_chat_agent_LLM.run("around when was bitcoin founded?")

2023-06-28 16:12:45,453 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "4e4fc82c-2d16-4ec8-ac40-ef347b296b80", "author": {"role": "assistant", "name": n...


'Bitcoin was founded on January 3, 2009. The corresponding datetime string is "2009-01-03T00:00:00.000000Z".'

In [10]:
chain_openai = LLMChain(prompt=prompt, llm=openai)
chain_openai.run("around when was bitcoin founded?")

'\n\n2008-01-03T18:15:05.000000Z'

## MyStructuredOutputParser

In [14]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
class MyStructuredOutputParser(StructuredOutputParser):
    my_structured_format_instructions = """输出必须是下面这种格式的 markdown code 片段格式，包括前面的 "```json" 和后面的 "```":

```json
{{
{format}
}}
```"""

    def _get_sub_string(self, schema: ResponseSchema) -> str:
        line_template = '\t"{name}": {type}  // {description}'
        return line_template.format(
            name=schema.name, description=schema.description, type=schema.type
        )

    def get_format_instructions(self) -> str:
        schema_str = "\n".join(
            [self._get_sub_string(schema) for schema in self.response_schemas]
        )
        return self.my_structured_format_instructions.format(format=schema_str)
response_schemas = [
    ResponseSchema(name="answer", description="用户提问的回答"),
    ResponseSchema(name="source", description="回答用户提问所引用的内容，前后4句话以上")
]
my_structured_output_parser = MyStructuredOutputParser.from_response_schemas(response_schemas)

In [16]:
from langchain.prompts import PromptTemplate
format_instructions = my_structured_output_parser.get_format_instructions()
with open('downloadfiles/text_1.txt') as f:
    doc = f.read()
prompt = PromptTemplate(
    template="根据'[[[ ]]]'包裹内容回答用户问题.\n[[[{context}]]]\n{format_instructions}\n{question}",
    input_variables=["question"],
    partial_variables={"format_instructions": format_instructions, "context": doc}
)
_input = prompt.format_prompt(question="文中老胡是如何反驳炒作“胡锡进商业版图”？")
output = chat_agent_LLM(_input.to_string())
my_structured_output_parser.parse(output)

2023-06-28 16:26:30,790 - use_requests.py - 270 - ERROR - [Status Code] 403
2023-06-28 16:27:03,818 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "b791d77f-e930-4485-99f8-c04150dfacd6", "author": {"role": "assistant", "name": n...


{'answer': '老胡想说，那些炒作“胡锡进商业版图”的人应该大致了解老胡退休后的实际情形，也能搞明白那些环时下属或者参股公司至多是“环球时报商业版图”，而非“胡锡进商业版图”。但他们仍然搞这种有悖真实情况的炒作，我认为至少其中有一些人是心术不正的。',
 'source': '一些人因老胡进入股市而编造、炒作“胡锡进商业版图”，真是无聊至极。众所周知，老胡曾长期担任环球时报社总编辑、法人代表，但我2021年底退休，自然要同时退出在环球时报社和子公司的所有领导职务，与报社切断利益关系。报社虽聘我为“环球时报特约评论员”，但我也主动放弃了在各大平台对该称呼的使用，能改的都改了，避免一旦我说话不慎牵连环球时报。环球时报社未能及时对我担任职务的环时下属公司及参股公司做商业注册变更，应该是有流程上的技术原因。环时的同志今天上午对我说，他们会加速办理那些公司的商业注册变更。顺便说一句，老胡之前作为环时负责人在那些公司兼职期间，没有从那些公司获得一分钱的收入。'}

## Retry parser

In [25]:
from langchain.prompts import PromptTemplate

from langchain.output_parsers import (
    PydanticOutputParser,
    OutputFixingParser,
    RetryOutputParser,
    RetryWithErrorOutputParser
)
from pydantic import BaseModel, Field

In [32]:
template = """Based on the user question, provide an Action and Action Input for what step should be taken.
{format_instructions}
Question: {query}
Response:"""
class Action(BaseModel):
    action: str = Field(description="action to take")
    action_input: str = Field(description="input to the action")
parser = PydanticOutputParser(pydantic_object=Action)

In [33]:
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [37]:
bad_response = '{"action": "search"}'
prompt_value = prompt.format_prompt(query="who is leo di caprios gf?")
fix_parser = OutputFixingParser.from_llm(parser=parser, llm=chat_agent_LLM)
fix_parser.parse(bad_response)

2023-06-28 16:39:05,889 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "6bcb1093-be4a-4b71-8ab0-036aeb6dff8b", "author": {"role": "assistant", "name": n...


Action(action='search', action_input='')

In [36]:
retry_parser = RetryWithErrorOutputParser.from_llm(
    parser=parser, llm=chat_agent_LLM)

retry_parser.parse_with_prompt(bad_response, prompt_value)

2023-06-28 16:38:57,947 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "1cbd52a5-d47c-4565-8218-51df73153ecc", "author": {"role": "assistant", "name": n...


Action(action='search', action_input='who is leo di caprios gf?')

## Auto-fixing parser

In [38]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List

In [39]:
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")

actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

In [40]:
misformatted = "{'name': 'Tom Hanks', 'film_names': ['Forrest Gump']}"

In [41]:
parser.parse(misformatted)

OutputParserException: Failed to parse Actor from completion {'name': 'Tom Hanks', 'film_names': ['Forrest Gump']}. Got: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)

In [42]:
from langchain.output_parsers import OutputFixingParser

new_parser = OutputFixingParser.from_llm(parser=parser, llm=chat_agent_LLM)

In [43]:
new_parser.parse(misformatted)

2023-06-28 16:50:04,446 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "4db1632d-0daa-4e14-932c-567737635278", "author": {"role": "assistant", "name": n...


Actor(name='Tom Hanks', film_names=['Forrest Gump'])

## Summarization 1

In [44]:
with open('downloadfiles/text_2.txt') as f:
    some_text = f.read()
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=2300,
    chunk_overlap=200,
    length_function=len,
)
texts = text_splitter.create_documents([some_text])

In [46]:
from langchain.chains.summarize import load_summarize_chain

prompt_template = """对下文进行一个简短的总结:


{text}


总结:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(chat_agent_LLM, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)

chain({"input_documents": texts})

2023-06-28 17:00:42,176 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "a0afd1d1-44d8-4c25-a39c-6a493b32663c", "author": {"role": "assistant", "name": n...
2023-06-28 17:00:54,674 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "f1968d4d-7c89-4080-8399-41fbcc0fdbd0", "author": {"role": "assistant", "name": n...
2023-06-28 17:01:24,982 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "891e5cda-386b-42a7-a822-b73cf5ab6e36", "author": {"role": "assistant", "name": n...
2023-06-28 17:01:45,588 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "1c9a84c5-7060-4c38-a69b-96fc082e900f", "author": {"role": "assistant", "name": n...
Token indices sequence length is longer than the specified maximum sequence length for this model (2124 > 1024). Running this sequence through the model will result in indexing errors
2023-06-28 17:02:01,161 - use_requests.py - 252 - INFO - success in gett

{'input_documents': [Document(page_content='上海仍是上海吗？解封一年后，滴水不漏式文化与身心管治\n今日大上海，若要引进外国演出而能确保通过审查，最方便是找些没有歌词的DJ 派对，和只是弹奏的爵士乐。\n\n特约撰稿人 文強 發自上海 2023-06-20\n2023年5月30日，翻修后的上海电影艺术中心。摄：VCG/VCG via Getty Images\n      \n文强，作家，时尚传媒人，自香港到上海，超过十年沪漂。\n\n文化市场综合行政执法队，简称“文管”，它不是全新工种，但高调出现在上海。试想像：在各种文化及演出场合，总有专门人员穿著制服坐在席间，在场震摄，默默留意台上一字一句，又或者关心身边观众有没越轨。\n\n解封一年后，上海是否仍是那个上海？\n\n六月是上海国际电影节惯常举行的季节。今年许多选片回复到一票难求的热闹，映后交流时，观众席仍然满载渴望提问与表达的眼神。\n\n一年前并非这样，当时上海才刚从封城两个月的噩梦中醒来，人们惊魂未定，电影节宣布延期。现在，观众重返戏院了，今年电影节其中一部最惹来关注的作品，是在此作首映、上海人罗冬执导的《梅的白天与黑夜》，追踪一位年越70岁叫陈玉梅的上海老阿姨的日常生活与约会故事。\n\n影片近乎没差评，大家都为梅姨的生命力、直率与典型的上海式世故而着迷。阿姨太喜爱镜头了，以至本应是记录片，却看得像剧情片。有介绍提及，这是一部主要拍摄在“怡情前”的片子，故此也不奇怪，片中人都没戴口罩。有留言尝试纠正，说该是“疫情”，不是“怡情”。版主回复：“不用更正，我们故意的，那个正确的词已无法正确使用了，望周知。”\n\n从而又记起五月份笔者曾到过上海的医院，医护提到，虽然坊间不断有二次阳的病人，可是医院方已不会再视之为新型冠状病毒的案子来登记了。更甚的说法是：已经没有这一项了，所有同类的症状，皆不会再被界定为新型冠状病毒。从字面和数据意义上，疫情，新冠，在这里都没有了。\n\n难以想像，已经过了一年。后来和不少朋友的闲聊中，都会提到一个说法：对于一些经历，譬如上次见的某人，去过的某地，一般都起码是三年前的事了，而大家都总不觉得有那么久。\n\n人生中有三年像空白了。这三年去哪了？\n\n“那个正确的词已无法正确使用了，望周知。”\n\n《梅的白天与黑夜》的电影内容是追踪

## AnalyzeDocumentChain


### load_qa_chain

In [57]:
with open("downloadfiles/text_4.txt") as f:
    text  = f.read()

In [61]:
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import AnalyzeDocumentChain
qa_chain = load_qa_chain(chat_agent_LLM, chain_type="map_reduce")
qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
qa_document_chain.run(input_document=text, question="Why Chinese urge caution?")

2023-06-28 17:44:02,758 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "45b727d0-c649-402d-a917-cce1d67f08db", "author": {"role": "assistant", "name": n...
2023-06-28 17:44:15,261 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "fb37e1f8-9d7e-49a9-8527-d806fa2c74ea", "author": {"role": "assistant", "name": n...
2023-06-28 17:44:22,623 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "0a72af9b-9317-47f6-98c4-1079ebcd1101", "author": {"role": "assistant", "name": n...
Token indices sequence length is longer than the specified maximum sequence length for this model (1995 > 1024). Running this sequence through the model will result in indexing errors
2023-06-28 17:44:43,098 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "f9ce8467-75ba-4396-85d3-d2ba176b0537", "author": {"role": "assistant", "name": n...


"Chinese urge caution for several reasons:\n\n1. The potential war with Taiwan could mean taking on a nuclear-armed superpower (referring to the United States), which poses significant risks and uncertainties.\n\n2. Chinese officials pay close attention to online opinion, and they are aware that there are influential netizens who oppose the idea of going to war. Even among ardent nationalists, there are fissures, and some urge caution or argue that military action may not be necessary.\n\n3. The setbacks faced by Russia during its invasion of Ukraine and the West's solidarity in response to it may have sobered some supporters of rapid steps towards reunification by force. The Chinese leadership may be cautious due to the unpredictability of the outcome and potential consequences.\n\n4. The supply of arms by the United States to Taiwan and its military buildup in the region are seen as challenges by some Chinese nationalists. To counter this, there are suggestions to strengthen China's 

### load_summarize_chain

In [60]:
from langchain.chains.summarize import load_summarize_chain
summary_chain = load_summarize_chain(chat_agent_LLM, chain_type="map_reduce")
summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain)
summarize_document_chain.run(input_document=text)

2023-06-28 17:43:13,703 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "3637556a-49e3-413e-b0c4-3be69fa4eb90", "author": {"role": "assistant", "name": n...
2023-06-28 17:43:25,479 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "41c46021-4145-4f1c-a420-c7836bb13c79", "author": {"role": "assistant", "name": n...
2023-06-28 17:43:32,836 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "36fc9adf-1d63-4d20-8c5f-046f237f3a0b", "author": {"role": "assistant", "name": n...
2023-06-28 17:43:43,485 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "6b5be2d5-f6ce-4db6-87e5-88b632128ed6", "author": {"role": "assistant", "name": n...


"Chinese nationalists show caution and divisions regarding a potential war with Taiwan, with some urging restraint and questioning the necessity of war. President Xi Jinping and Chinese officials may prefer caution due to risks and uncertainties, including confronting a nuclear-armed superpower and public support concerns. Recent surveys indicate relatively low support for immediate military action among the Chinese population. Russia's setbacks in Ukraine and Western response may have tempered support for rapid military action. Proposals to address challenges and conditions for war suggest significant obstacles and a need for more time. Radical nationalists advocating extreme measures have faced criticism. Some netizens express reservations about participating in a war, highlighting feelings of neglect and suggesting government officials' children should go first. Chinese officials may consider such sentiments."

## load_qa_chain 1

In [71]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=2200,
    chunk_overlap=200,
    length_function=len,
)
with open("downloadfiles/text_3.txt") as f:
    text = f.read()
texts = text_splitter.create_documents([text])

In [72]:
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
question_prompt_template = """使用长文档中的以下部分，查看是否有任何文本与回答问题相关。仅返回任何相关的原样原格式文本。你不要自己添加内容。

[[[{context}]]]

问题: {question}
原样文本："""
QUESTION_PROMPT = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """利用下面的相关段落文本回答用户的问题，
如果你不知道答案就回复：无法解答。不要编造内容回复。

问题: {question}

[[[{summaries}]]]

答案:"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)

load_qa_chain_map_reduce = load_qa_chain(chat_agent_LLM, chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)
query = "文中海上风电机组建造的最大难点是什么？"
load_qa_chain_map_reduce({"input_documents": texts, "question": query}, return_only_outputs=True)

2023-06-28 18:20:55,225 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "ca9dfc91-0f8f-46a1-bfd0-2c5b501a56f8", "author": {"role": "assistant", "name": n...
2023-06-28 18:20:58,880 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "797e2edf-b868-4c08-9ac6-8ba4293aca3e", "author": {"role": "assistant", "name": n...
2023-06-28 18:21:09,295 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "f0ff8de7-c44d-4ea1-a8f8-6ae2dae6ca22", "author": {"role": "assistant", "name": n...


{'intermediate_steps': ['“16MW机组吊装施工规程中面临“施工环境恶劣，海况复杂，施工安全风险高”、“风机容量大，施工难度大，设备选型难”、“大型吊装任务多，安全管控压力大”这三大难点。”',
  '"正因此该船建造难度远超普通船舶。"'],
 'output_text': '16MW海上风电机组建造的最大难点包括施工环境恶劣、海况复杂、施工安全风险高，风机容量大、施工难度大、设备选型难以及大型吊装任务多、安全管控压力大等。这些因素导致该机组的建造难度远超过普通船舶。'}

## 翻译

In [16]:
with open('downloadfiles/text_5.txt') as f:
    text = f.read()
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=1600,
    chunk_overlap=0,
    length_function=len,
)
texts = text_splitter.create_documents([text])

In [12]:
json_format="""[
  {
    "English": "Hello, how are you doing today?",
    "Chinese": "你好，你今天好吗？"
  },
  {
    "English": "Thank you so much for your help!",
    "Chinese": "非常感谢你的帮助！"
  },
  {
    "English": "I'm sorry for the mistake I made.",
    "Chinese": "对于我犯的错误，我感到很抱歉。"
  }
]"""
translate_prompt_template = """You are a translation engine;
Your first step should be to separate the text into paragraphs, then translate each paragraph into Chinese, response in the json output. Remember to escape characters when necessary.

examples:
```json
{json_format}
```

{context}

json output:
"""

In [13]:
from langchain.prompts import PromptTemplate
prompt = PromptTemplate(template=translate_prompt_template,input_variables=["context","json_format"])
partial_prompt = prompt.partial(json_format=json_format)

In [14]:
from langchain.chains import LLMChain
translate_chain = LLMChain(llm=chat_agent_LLM,
                           prompt=partial_prompt)

In [15]:
for context in texts:
    text = context.page_content
    r = translate_chain(text)
    print(r["text"])

2023-06-28 20:56:34,139 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "8dbabdd3-fb14-4fc9-8273-2ddb51434825", "author": {"role": "assistant", "name": n...


[
  {
    "English": "Talking about AI in human terms is natural—but wrong",
    "Chinese": "以人类的方式谈论人工智能是自然的，但是是错误的"
  },
  {
    "English": "When it comes to artificial intelligence, metaphors are often misleading",
    "Chinese": "谈到人工智能时，隐喻常常是具有误导性的"
  },
  {
    "English": "A ghost coming out of a computer screen",
    "Chinese": "一个从计算机屏幕中走出来的幽灵"
  },
  {
    "English": "Jun 22nd 2023",
    "Chinese": "2023年6月22日"
  },
  {
    "English": "My love’s like a red, red rose. It is the east, and Juliet is the sun. Life is a highway, I wanna ride it all night long. Metaphor is a powerful and wonderful tool. Explaining one thing in terms of another can be both illuminating and pleasurable, if the metaphor is apt.",
    "Chinese": "我的爱情如同一朵红色的玫瑰。它是东方，朱丽叶是太阳。生活是一条公路，我想整夜驾驶。隐喻是一种强大而美妙的工具。如果隐喻恰当，用一种事物解释另一种事物可以既有启发性又令人愉悦。"
  },
  {
    "English": "But that “if” is important. Metaphors can be particularly helpful in explaining unfamiliar concepts: imagining the Einsteinian model of gravity (he

2023-06-28 20:57:11,818 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "4dcbabb3-c2f8-41a2-bf04-eeaa2e2fb06b", "author": {"role": "assistant", "name": n...


[
  {
    "English": "“Hallucinations” might be thought of as a forgiving euphemism. Your friendly local AI is just having a bit of a bad trip; leave him to sleep it off and he’ll be back to himself in no time. For the “lies” crowd, though, the humanizing metaphor is even more profound: the AI is not only thinking, but has desires and intentions. A lie, remember, is not any old false statement. It is one made with the goal of deceiving others. ChatGPT has no such goals at all.",
    "Chinese": "“幻觉”可以被视为一种宽容的委婉说法。您友好的本地人工智能只是有点糟糕的经历；让他休息一下，很快他就会恢复正常。然而，对于“谎言”群体来说，这种人性化的隐喻更加深刻：人工智能不仅在思考，而且有欲望和意图。请记住，谎言不是任何一个老旧的虚假陈述。它是为了欺骗他人而做出的陈述。ChatGPT根本没有这样的目标。"
  },
  {
    "English": "Humans’ tendency to anthropomorphize things they don’t understand is ancient, and may confer an evolutionary advantage. If, on spying a rustling in the bushes, you infer an agent (whether predator or spirit), no harm is done if you are wrong. If you assume there is nothing in the undergrowth and a leopard jumps out, y

2023-06-28 20:57:50,729 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "8786b628-ee70-428d-a07b-b0afa6139355", "author": {"role": "assistant", "name": n...


[
  {
    "English": "But AI is too important for loose language. If entirely avoiding human-like metaphors is all but impossible, writers should offset them, early, with some suitably bloodless phrasing. “An LLM is designed to produce text that reflects patterns found in its vast training data,” or some such explanation, will help readers take any later imagery with due skepticism. Humans have evolved to spot ghosts in machines. Writers should avoid ushering them into that trap. Better to lead them out of it.■",
    "Chinese": "但是对于AI来说，使用随意的语言是不可取的。如果完全避免人类化的隐喻几乎是不可能的，作者应该提前用一些适当冷淡的措辞来抵消它们。“一个LLM的设计目的是产生反映其庞大训练数据中的模式的文本”，或者类似的解释，将帮助读者对任何后来的意象保持适度的怀疑。人类进化出了察觉机器中的幽灵的能力。作者应该避免将读者引入这个陷阱。最好引导他们走出来。■"
  },
  {
    "English": "Read more from Johnson, our columnist on language:\nGestures are a subtle and vital form of communication (Jun 8th)\nAs it spreads across the world, who owns English? (May 25th)\nThe hazards of pronouncing foreign names on air (May 11th)",
    "Chinese": "更多关于我们的语言专栏作

## Retrieval QA

In [17]:
from chromadb.config import Settings
import chromadb
from langchain.embeddings import SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")

No sentence-transformers model found with name /Volumes/Usir/.cache/torch/sentence_transformers/GanymedeNil_text2vec-large-chinese. Creating a new one with MEAN pooling.


In [18]:
from langchain.vectorstores import Chroma

vectordb = Chroma(collection_name="NationalLawsAndRegulationsDatabase",
                  embedding_function=embeddings,
                  client=chromadb.Client(Settings(
                      chroma_db_impl="duckdb+parquet",
                      persist_directory="/Volumes/Usir/DB/chromadb"
                  )))

In [21]:
question = "法律规定哪些机构对噪声污染负有监管责任？"
retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"filter":{"status": "有效"}})
texts = retriever.get_relevant_documents(question)

from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
question_prompt_template = """使用以下法律部分原文，查看是否有任何文本与回答问题相关。仅返回任何相关的原样原格式文本。你不要自己添加内容。

[[[{context}]]]

问题: {question}
原样文本："""
QUESTION_PROMPT = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """扮演一个律师，利用下面的法律部分原文回答用户的问题，
如果你不知道答案就回复：无法解答。不要编造内容回复。

问题: {question}

[[[{summaries}]]]

答案:"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)

load_qa_chain_map_reduce = load_qa_chain(chat_agent_LLM, chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)
load_qa_chain_map_reduce({"input_documents": texts, "question": question}, return_only_outputs=True)

2023-06-28 21:13:52,782 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "9d06dd68-73ba-472a-a0ce-78f371a39ae1", "author": {"role": "assistant", "name": n...
2023-06-28 21:14:00,770 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "e39b72e8-bba5-4a3d-8b76-aea1a9d3c0ba", "author": {"role": "assistant", "name": n...
2023-06-28 21:14:41,523 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "0bf19a6b-b483-4abf-82fd-7c3df4d30e23", "author": {"role": "assistant", "name": n...
2023-06-28 21:14:50,638 - use_requests.py - 252 - INFO - success in getting  data
{"message": {"id": "907670b0-7f3b-40d8-9a3b-2129ec2ff826", "author": {"role": "assistant", "name": n...
Token indices sequence length is longer than the specified maximum sequence length for this model (2669 > 1024). Running this sequence through the model will result in indexing errors
2023-06-28 21:15:30,473 - use_requests.py - 252 - INFO - success in gett

{'intermediate_steps': ['根据提供的法律部分原文，以下机构对噪声污染负有监管责任：\n\n- 国务院生态环境主管部门（负责制定噪声监测和评价规范，组织声环境质量监测，发布全国声环境质量状况信息）\n- 地方人民政府生态环境主管部门（按照规定设置本行政区域声环境质量监测站，组织本行政区域声环境质量监测，向社会公布声环境质量状况信息）\n- 其他负有噪声污染防治监督管理职责的部门（参与约谈地区政府及其有关部门的主要负责人，进行现场检查，处理噪声污染相关举报等）\n\n需要注意的是，具体的监管责任可能根据地区和具体情况有所不同。因此，建议在特定情况下咨询当地相关法律法规以获取准确和具体的信息。',
  '生态环境主管部门、其他负有噪声污染防治监督管理职责的部门、县级以上人民政府市场监督管理部门、海关、县级以上地方人民政府住房和城乡建设主管部门。',
  '第七十条 对噪声敏感建筑物集中区域的社会生活噪声扰民行为，基层群众性自治组织、业主委员会、物业服务人应当及时劝阻、调解；劝阻、调解无效的，可以向负有社会生活噪声污染防治监督管理职责的部门或者地方人民政府指定的部门报告或者投诉，接到报告或者投诉的部门应当依法处理。\n第七十一条 违反本法规定，拒绝、阻挠监督检查，或者在接受监督检查时弄虚作假的，由生态环境主管部门或者其他负有噪声污染防治监督管理职责的部门责令改正，处二万元以上二十万元以下的罚款。\n第七十二条 违反本法规定，生产、进口、销售超过噪声限值的产品的，由县级以上人民政府市场监督管理部门、海关按照职责责令改正，没收违法所得，并处货值金额一倍以上三倍以下的罚款；情节严重的，报经有批准权的人民政府批准，责令停业、关闭。\n第七十三条 违反本法规定，建设单位建设噪声敏感建筑物不符合民用建筑隔声设计相关标准要求的，由县级以上地方人民政府住房和城乡建设主管部门责令改正，处建设工程合同价款百分之二以上百分之四以下的罚款。\n违反本法规定，建设单位在噪声敏感建筑物禁止建设区域新建与航空无关的噪声敏感建筑物的，由地方人民政府指定的部门责令停止违法行为，处建设工程合同价款百分之二以上百分之十以下的罚款，并报经有批准权的人民政府批准，责令拆除。\n第七十四条 违反本法规定，在噪声敏感建筑物集中区域新建排放噪声的工业企业的，由生态环境主管部门责令停止违法行为，处十

## 清空所有对话

In [None]:
chat_agent.del_conversation_local()

## 关闭对话

In [22]:
chat_agent_LLM.chat_agent.quit()

2023-06-28 21:55:08,126 - auth_handler.py - 70 - INFO - success in saving chatgpt cookies.json
2023-06-28 21:55:09,006 - use_requests.py - 174 - ERROR - <class 'exceptions.Requests403Error'> e
