In [1]:
from langchain_openai import ChatOpenAI
import os

# 演示的是会话，记录会话信息，具备多伦对话的功能

model = ChatOpenAI(model="gpt-3.5-turbo")


os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = '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'



from langchain_core.messages import HumanMessage

print(model.invoke([HumanMessage(content="Hi! I'm Bob")]))

print(model.invoke([HumanMessage(content="What's my name???")]))



# 传入对话上下文
from langchain_core.messages import AIMessage

result2 = model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

print(result2)


from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(model, get_session_history)


config = {"configurable": {"session_id": "abc2"}}


response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob")],
    config=config,
)

print(response.content)


response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

print(response.content)



content='Hello Bob! How can I assist you today?' response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 12, 'total_tokens': 22}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-bfdeb9e1-a79d-4754-8d9d-c758eabb39e8-0' usage_metadata={'input_tokens': 12, 'output_tokens': 10, 'total_tokens': 22}
content="I'm sorry, I do not know your name. Could you please tell me?" response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 12, 'total_tokens': 29}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-692e778c-6232-46d4-b843-1b00a2d5323f-0' usage_metadata={'input_tokens': 12, 'output_tokens': 17, 'total_tokens': 29}


Parent run 19ba6130-6c26-4b10-8d32-6560f3e15924 not found for run c0e547df-6e5f-48cd-a571-1a49b45738b2. Treating as a root run.


content='Your name is Bob. How can I help you today, Bob?' response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 35, 'total_tokens': 49}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-fcf54ace-ede1-4300-a3c2-2d28e626874c-0' usage_metadata={'input_tokens': 35, 'output_tokens': 14, 'total_tokens': 49}


Parent run 7fe5eea1-34ea-4924-b3e0-ae8f3e4473d7 not found for run 433231e7-cdf8-4a63-9740-f4dff42f3638. Treating as a root run.


Hello Bob! How can I assist you today?
Your name is Bob.


In [2]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

In [3]:
response = chain.invoke({"messages": [HumanMessage(content="hi! I'm bob")]})

response.content

'Hello, Bob! How can I assist you today?'

In [None]:
with_message_history = RunnableWithMessageHistory(chain, get_session_history)
config = {"configurable": {"session_id": "abc5"}}

In [4]:
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Jim")],
    config=config,
)

response.content

Parent run 483f4184-9343-4214-8b94-003aa81d06bf not found for run b67d8082-1b8d-4328-b8cb-cabed9069a3a. Treating as a root run.


'Hello Jim! How can I assist you today?'

In [5]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response.content

Parent run d0e5174c-6a76-4cc7-a502-80a6620176f4 not found for run 7fce049f-5331-4005-8919-52fc849edd84. Treating as a root run.


'Your name is Jim.'

In [8]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

In [9]:
response = chain.invoke(
    {"messages": [HumanMessage(content="hi! I'm bob")], "language": "Spanish"}
)

response.content

'¡Hola, Bob! ¿En qué puedo ayudarte hoy?'

In [12]:
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

In [13]:
config = {"configurable": {"session_id": "abc11"}}

In [14]:
response = with_message_history.invoke(
    {"messages": [HumanMessage(content="hi! I'm todd")], "language": "Spanish"},
    config=config,
)

response.content

Parent run 123d00a8-2c97-49ce-88ec-f5014619d21d not found for run 1f6ab7b1-bb94-4d67-9842-0b175d54b710. Treating as a root run.


'¡Hola Todd! ¿En qué puedo ayudarte hoy?'

管理对话
构建聊天机器人时需要理解的一个重要概念是如何管理对话历史记录。如果不加以管理，消息列表将无限增长，并可能溢出 LLM 的上下文窗口。因此，添加一个限制传入消息大小的步骤非常重要。

重要的是，您需要在提示模板之前但在从消息历史记录中加载以前的消息之后执行此操作。



In [15]:
from langchain_core.runnables import RunnablePassthrough


def filter_messages(messages, k=10):
    return messages[-k:]


chain = (
    RunnablePassthrough.assign(messages=lambda x: filter_messages(x["messages"]))
    | prompt
    | model
)

In [16]:
messages = [
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

In [17]:
response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="what's my name?")],
        "language": "English",
    }
)
response.content

"I'm sorry, I don't know your name."

但如果我们询问最近十条消息中的信息，它仍然会记住它

In [18]:
response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="what's my fav ice cream")],
        "language": "English",
    }
)
response.content

'You mentioned that you like vanilla ice cream.'

In [20]:
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

config = {"configurable": {"session_id": "abc20"}}

In [21]:
response = with_message_history.invoke(
    {
        "messages": messages + [HumanMessage(content="whats my name?")],
        "language": "English",
    },
    config=config,
)

response.content

Parent run 0cf66666-1b1c-4f63-83a4-b9af9e1839f3 not found for run f12df591-43c7-4e73-a157-b0fb5d8fff6e. Treating as a root run.


"I'm sorry, I don't have access to that information."

现在我们有一个功能聊天机器人。但是，聊天机器人应用程序的一个非常重要的用户体验考虑因素是流式传输。LLM 有时可能需要一段时间才能响应，因此为了改善用户体验，大多数应用程序都会在生成每个令牌时将其流式传输回来。这允许用户查看进度。

事实上，做到这一点非常简单！

所有链都公开一个.stream方法，使用消息历史记录的链也不例外。我们可以简单地使用该方法来获取流式响应。

In [23]:
config = {"configurable": {"session_id": "abc15"}}
for r in with_message_history.stream(
    {
        "messages": [HumanMessage(content="hi! I'm todd. tell me a joke")],
        "language": "English",
    },
    config=config,
):
    print(r.content, end="|")

Parent run 9a828699-02a9-4cd1-b7bc-17a3212f478a not found for run f3f6f24f-91a9-4ded-b356-713dd0c25a6a. Treating as a root run.


|Hi| Todd|!| Here|'s| another| joke| for| you|:

|What| do| you| call| fake| spaghetti|?

|An| imp|asta|!| 😄||