# Lesson 4: Persistence and Streaming

In [1]:
from dotenv import load_dotenv

_ = load_dotenv()

In [2]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_openai import ChatOpenAI
from langchain_tavily import TavilySearch

In [3]:
tool = TavilySearch(max_results=2)

In [4]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

In [5]:
from langgraph.checkpoint.sqlite import SqliteSaver
import sqlite3

# Create an in-memory SQLite database connection with thread safety
conn = sqlite3.connect(":memory:", check_same_thread=False)
memory = SqliteSaver(conn)

In [6]:
class Agent:
    def __init__(self, model, tools, checkpointer, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_openai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges("llm", self.exists_action, {True: "action", False: END})
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile(checkpointer=checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def call_openai(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}

In [7]:
prompt = """You are a smart research assistant. Use the search engine to look up information. \
You are allowed to make multiple calls (either together or in sequence). \
Only look up information when you are sure of what you want. \
If you need to look up some information before asking a follow up question, you are allowed to do that!
"""
model = ChatOpenAI(model="gpt-4o")
abot = Agent(model, [tool], system=prompt, checkpointer=memory)

In [8]:
messages = [HumanMessage(content="What is the weather in sf?")]

In [9]:
thread = {"configurable": {"thread_id": "1"}}

In [10]:
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v['messages'])

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_c6ppUbGmL6f7s9MEHB01IZ6J', 'function': {'arguments': '{"query":"current weather in San Francisco","search_depth":"basic","topic":"general"}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 1340, 'total_tokens': 1369, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-Bul8HOCIAcsusGG5fkhUWIwhVO5bw', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--b004511e-dc77-4c8c-9aff-4250b5485448-0', tool_calls=[{'name': 'tavily_search', 'args': {'query': 'current weather in San Francisco', 'search_depth': 'basic', 'topic': 'general'}, 'id': 'call_c6ppUbGmL6f7

In [11]:
messages = [HumanMessage(content="What about in la?")]
thread = {"configurable": {"thread_id": "1"}}
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v)

{'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_bIonporeyFIArLh1JLwjAI8q', 'function': {'arguments': '{"query":"current weather in Los Angeles","search_depth":"basic","topic":"general"}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 2020, 'total_tokens': 2049, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 1920}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-Bul8MYhwwdOqLpdyNuWCfycWcaqaq', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--44ad69df-dbef-4b8f-b5c3-061acd895d8b-0', tool_calls=[{'name': 'tavily_search', 'args': {'query': 'current weather in Los Angeles', 'search_depth': 'basic', 'topic': 'general'}, 'id': 'call_

In [12]:
messages = [HumanMessage(content="Which one is warmer?")]
thread = {"configurable": {"thread_id": "1"}}
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v)

{'messages': [AIMessage(content="Los Angeles is currently warmer than San Francisco. Los Angeles has a temperature of about 22.8°C (73.0°F), while San Francisco's temperature is approximately 18.3°C (64.9°F).", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 46, 'prompt_tokens': 2715, 'total_tokens': 2761, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 2688}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-Bul8RBMe9pXzN31bYEJU1T7mYvTDI', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--0e5fcb93-9af1-4385-848b-4220534795ac-0', usage_metadata={'input_tokens': 2715, 'output_tokens': 46, 'total_tokens': 2761, 'input_token_details': {'audio': 0, 'cache_read': 2688}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]}


In [13]:
messages = [HumanMessage(content="Which one is warmer?")]
thread = {"configurable": {"thread_id": "2"}}
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v)

{'messages': [AIMessage(content='Could you please provide more context or specify what two or more things you want to compare to determine which one is warmer? For example, are you asking about specific locations, materials, clothing, or something else?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 43, 'prompt_tokens': 1338, 'total_tokens': 1381, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 1280}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-Bul8S33EmwvSuDIwzTkVw3uJGkefv', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--31269812-3c42-459f-90bb-39aac16c4d68-0', usage_metadata={'input_tokens': 1338, 'output_tokens': 43, 'total_tokens': 1381, 'input_token_details': {'audio': 0, 'cache_read': 1280}, 'output_toke

## Streaming tokens

In [15]:
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
import aiosqlite

# Create an async SQLite connection properly
async_memory = None

async def setup_async_memory():
    global async_memory
    conn = await aiosqlite.connect(":memory:")
    async_memory = AsyncSqliteSaver(conn)
    return async_memory

# Set up the async memory
import asyncio
async_memory = await setup_async_memory()
abot = Agent(model, [tool], system=prompt, checkpointer=async_memory)

In [16]:
messages = [HumanMessage(content="What is the weather in SF?")]
thread = {"configurable": {"thread_id": "4"}}

# Using async streaming with token-level streaming
async for event in abot.graph.astream_events({"messages": messages}, thread, version="v1"):
    kind = event["event"]
    if kind == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        if content:
            # Empty content in the context of OpenAI means
            # that the model is asking for a tool to be invoked.
            # So we only print non-empty content
            print(content, end="|")

Calling: {'name': 'tavily_search', 'args': {'query': 'current weather San Francisco'}, 'id': 'call_1IK6bpjCekRBC8tmbJllWCNT', 'type': 'tool_call'}
Back to the model!
Back to the model!
The| current| weather| in| San| Francisco| is| partly| cloudy| with| a| temperature| of| |18|.|3|°C| (|64|The| current| weather| in| San| Francisco| is| partly| cloudy| with| a| temperature| of| |18|.|3|°C| (|64|.|9|°F|).| The| wind| is| blowing|.|9|°F|).| The| wind| is| blowing| from| the| west|-s|outh|west| at| |9|.|8| mph| (|15| from| the| west|-s|outh|west| at| |9|.|8| mph| (|15|.|8| k|ph|),| and| the| humidity| is| |68|%.| Visibility| is|.|8| k|ph|),| and| the| humidity| is| |68|%.| Visibility| is| around| |16| km| (|9| miles|),| around| |16| km| (|9| miles|),| and| there| is| no| precipitation| at| the| moment|.| The| UV| index| is| |8|.|9|,| indicating| a| high| level| of| ultraviolet| and| there| is| no| precipitation| at| the| moment|.| The| UV| index| is| |8|.|9|,| indicating| a| high| level| o