In [1]:
import os
from langchain.chat_models import AzureChatOpenAI

In [2]:
#Credentials
apikey = 'key'
endpoint = 'endpoint'
deployname = 'gpt-4-2-teste'
apiversion =  '2024-08-01-preview'

In [3]:
#Configurar o modelo no LangChain
model = AzureChatOpenAI(
    api_key=apikey,
    model=deployname,
    azure_endpoint=endpoint,
    api_version=apiversion
)

  model = AzureChatOpenAI(


### Langchain

In [10]:
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage, SystemMessage

In [11]:
#use model directly
model.invoke([HumanMessage(content="Hi! I'm Will")])

AIMessage(content='Hello Will! How can I assist you today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 12, 'total_tokens': 22, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5603ee5e2e', 'finish_reason': 'stop', 'logprobs': None}, id='run-c6a73595-ed58-4153-ae8a-3d2112083c0a-0')

In [12]:
#the model on its own does not have any concept of state
model.invoke([HumanMessage(content="What's my name?")])

AIMessage(content="I'm sorry, I don't have access to personal data like names unless you tell me. How can I assist you today?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 12, 'total_tokens': 38, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5603ee5e2e', 'finish_reason': 'stop', 'logprobs': None}, id='run-39afaf74-4ca7-4806-b389-e6a2d7475fab-0')

In [13]:
#conversation history
from langchain_core.messages import AIMessage

model.invoke(
    [
        HumanMessage(content="Hi! I'm Will"),
        AIMessage(content="Hello Will! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

AIMessage(content='Your name is Will. How can I help you further?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 35, 'total_tokens': 47, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5603ee5e2e', 'finish_reason': 'stop', 'logprobs': None}, id='run-0cace34e-f6cd-49cc-b99e-257ef1766c85-0')

#### Message persistence

In [14]:
#!pip install langgraph

In [15]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [16]:
#We now need to create a config that we pass into the runnable every time
config = {"configurable": {"thread_id": "abc123"}}

#This enables us to support multiple conversation threads with a single application, a common requirement when your application has multiple users.

In [17]:
query = "Hi! I'm Will."

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()  # output contains all messages in state


Hello Will! How can I assist you today?


In [18]:
query = "What's my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Your name is Will.


The chatbot now remembers things about us. If we change the config to reference a different thread_id, we can see that it starts the conversation fresh.

In [19]:
config = {"configurable": {"thread_id": "abc234"}}

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


As an AI, I don't have access to your personal information including your name. How can I assist you today?


In [20]:
#We can always go back to the original conversation (since we are persisting it in a database)

config = {"configurable": {"thread_id": "abc123"}}

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Your name is Will. How can I help you today?


### Async Function

In [21]:
# Async function for node:
async def call_model(state: MessagesState):
    response = await model.ainvoke(state["messages"])
    return {"messages": response}


# Define graph as before:
workflow = StateGraph(state_schema=MessagesState)
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)
app = workflow.compile(checkpointer=MemorySaver())

# Async invocation:
output = await app.ainvoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


I'm sorry, but I don't have access to your personal information, including your name. How can I assist you today?


## Prompt Templates

Prompt Templates help to turn raw user information into a format that the LLM can work with. In this case, the raw user input is just a message, which we are passing to the LLM. 

In [22]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [23]:
'''
First, let's add in a system message with some custom instructions (but still taking messages as input). 
Next, we'll add in more input besides just the messages.
'''


"\nFirst, let's add in a system message with some custom instructions (but still taking messages as input). \nNext, we'll add in more input besides just the messages.\n"

In [24]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You talk like a pirate. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [25]:
#Now update our application to incorporate this template
workflow = StateGraph(state_schema=MessagesState)


def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": response}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [26]:
config = {"configurable": {"thread_id": "abc345"}}
query = "Hi! I'm Will."

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Ahoy there, Will! How can I be assistin' ye today, matey?


In [27]:
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Yer name be Will, ye scallywag! What brings ye to these waters?


In [28]:
#Let's now make our prompt a little bit more complicated.
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [29]:
from typing import Sequence

from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict


In [30]:
#Our application now has two parameters-- the input messages and language.

class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str


workflow = StateGraph(state_schema=State)


def call_model(state: State):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)


In [31]:
config = {"configurable": {"thread_id": "abc456"}}
query = "Hi! I'm Will."
language = "Portuguese"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


Olá Will! Como posso ajudar você hoje?


In [32]:
#Note that the entire state is persisted, so we can omit parameters like language if no changes are desired:
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages},
    config,
)
output["messages"][-1].pretty_print()


Seu nome é Will. Como posso ajudá-lo hoje?


### Managing Conversation History

One important concept to understand when building chatbots is how to manage conversation history. If left unmanaged, the list of messages will grow unbounded and potentially overflow the context window of the LLM

LangChain comes with a few built-in helpers for managing a list of messages. In this case we'll use the trim_messages helper to reduce how many messages we're sending to the model. The trimmer allows us to specify how many tokens we want to keep, along with other parameters like if we want to always keep the system message and whether to allow partial messages

In [33]:
from langchain_core.messages import SystemMessage, trim_messages

trimmer = trim_messages(
    max_tokens=65,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm will"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={})]

In [34]:
#To use it in our chain, we just need to run the trimmer before we pass the messages input to our prompt.

workflow = StateGraph(state_schema=State)


def call_model(state: State):
    trimmed_messages = trimmer.invoke(state["messages"])
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state["language"]}
    )
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [35]:
#Agora, se tentarmos perguntar nosso nome à modelo, ela não saberá, pois cortamos essa parte do histórico do bate-papo:
config = {"configurable": {"thread_id": "abc567"}}
query = "What is my name?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


You haven't mentioned your name, so I don't know it. What would you like me to call you?


In [36]:
#Mas se perguntarmos sobre informações que estão nas últimas mensagens, ele lembra:

config = {"configurable": {"thread_id": "abc678"}}
query = "What math problem did I ask?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()



You asked "What's 2 + 2?"


## Streaming

Agora temos um chatbot funcional. No entanto, uma consideração de UX realmente importante para aplicativos de chatbot é o streaming. Às vezes, os LLMs podem demorar um pouco para responder e, portanto, para melhorar a experiência do usuário, uma coisa que a maioria dos aplicativos faz é transmitir de volta cada token conforme ele é gerado. Isso permite que o usuário veja o progresso.


Por padrão, .stream em nosso aplicativo LangGraph transmite etapas do aplicativo -- neste caso, a única etapa da resposta do modelo. Definir stream_mode="messages" nos permite transmitir tokens de saída em vez de

In [37]:
config = {"configurable": {"thread_id": "abc789"}}
query = "Hi I'm Will, please tell me a joke."
language = "Portuguese"

input_messages = [HumanMessage(query)]
for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},
    config,
    stream_mode="messages",
):
    if isinstance(chunk, AIMessage):  # Filter to just model responses
        print(chunk.content, end="|")

|Ol|á| Will|!| Cl|aro|,| aqui| vai| uma| pi|ada| para| você|:

|Por| que| o| livro| de| mat|em|ática| estava| tr|iste|?

|Por|que| ele| tinha| muit|os| problemas|.||