In [3]:
from dotenv import load_dotenv
import os
_ = load_dotenv()

from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults

token = os.environ["GITHUB_TOKEN"]
endpoint = "https://models.github.ai/inference"
model = "openai/gpt-4.1"

tool = TavilySearchResults(max_results=2)

  tool = TavilySearchResults(max_results=2)


In [4]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

from langgraph.checkpoint.sqlite import SqliteSaver

memory = SqliteSaver.from_conn_string(":memory:")

class Agent:
    def __init__(self, model, tools, checkpointer, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_openai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges("llm", self.exists_action, {True: "action", False: END})
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile(checkpointer=checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def call_openai(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}



In [None]:
# Usage - use the context manager properly:  
with SqliteSaver.from_conn_string(":memory:") as memory:  
    prompt = """You are a smart research assistant. Use the search engine to look up information. \
    You are allowed to make multiple calls (either together or in sequence). \
    Only look up information when you are sure of what you want. \
    If you need to look up some information before asking a follow up question, you are allowed to do that!
    """
    model = ChatOpenAI(
        base_url=endpoint,
        api_key=token,
        model="openai/gpt-4.1",
    )  #reduce inference cost

    abot = Agent(model, [tool], system=prompt, checkpointer=memory)
      
    # Now you can use abot.graph for streaming  
    messages = [HumanMessage(content="What is the weather in sf?")]  
    thread = {"configurable": {"thread_id": "1"}}  
      
    for event in abot.graph.stream({"messages": messages}, thread):  
        for v in event.values():  
            print(v['messages'])

    # messages = [HumanMessage(content="Which one is warmer?")]
    # thread = {"configurable": {"thread_id": "1"}}
    # for event in abot.graph.stream({"messages": messages}, thread):
    #     for v in event.values():
    #         print(v)
        
    # messages = [HumanMessage(content="Which one is warmer?")]
    # thread = {"configurable": {"thread_id": "2"}}
    # for event in abot.graph.stream({"messages": messages}, thread):
    #     for v in event.values():
    #         print(v['messages'])

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_NI2gUyiNBpNiiJUdhzqQ5r0R', 'function': {'arguments': '{"query":"current weather in San Francisco"}', 'name': 'tavily_search_results_json'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 155, 'total_tokens': 178, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_b663f05c2c', 'id': 'chatcmpl-ByuFWN7rKwiNYD1WR8uyMaE8KCpsc', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--d139b789-4e18-4efa-9895-6d6a47a8f05e-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'current weather in San Francisco'}, 'id': 'call_NI2gUyiNBpNiiJUdhzqQ5r0R', 'type': 'tool_call'}], usage_metadata={'input_token

In [6]:
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver

async with AsyncSqliteSaver.from_conn_string(":memory:") as memory:

    abot = Agent(model, [tool], system=prompt, checkpointer=memory)

    messages = [HumanMessage(content="What is the weather in SF?")]

    thread = {"configurable": {"thread_id": "4"}}

    async for event in abot.graph.astream_events({"messages": messages}, thread, version="v1"):
        kind = event["event"]
        if kind == "on_chat_model_stream":
            content = event["data"]["chunk"].content
            if content:
                # Empty content in the context of OpenAI means
                # that the model is asking for a tool to be invoked.
                # So we only print non-empty content
                print(content, end="|")

Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'current weather in San Francisco'}, 'id': 'call_JALlVNYm0lk74EsJRlYJSNaN', 'type': 'tool_call'}
Back to the model!
The| current| weather| in| San| Francisco| is| generally| mild| and| partly| cloudy|.| Day|time| temperatures| are| typically| around| |19|°C|-|21|°C| (|66|°F|-|70|°F|),| with| lows| near| |13|°C| (|55|°F|).| There| may| be| some| mist| or| over|cast| in| the| mornings|,| but| it| often| clears| up| to| be| partly| cloudy| or| sunny| during| the| day|.

|Would| you| like| a| live| weather| update| or| a| forecast| for| a| specific| day|?|