In [1]:
# Ref:
# YouTube Video
# Sam Witteveen
# Creating an AI Agent with LangGraph Llama 3 & Groq
# https://www.youtube.com/watch?v=lvQ96Ssesfk

In [2]:
!pip -q install groq
!pip -q install tavily-python
!pip -q install -U langchain langgraph

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.5/103.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.0/974.0 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.6/88.6 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.7/314.7 kB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [3]:
!pip show langgraph

Name: langgraph
Version: 0.0.66
Summary: langgraph
Home-page: https://www.github.com/langchain-ai/langgraph
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: langchain-core
Required-by: 


In [4]:
import json
import os
from google.colab import userdata


# Set API keys as environment variables

In [5]:
#os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
#os.environ["TAVILY_API_KEY"] = userdata.get('TAVILY_API_KEY')

# Define the API clients

In [6]:
from groq import Groq
from tavily import TavilyClient

groq_client = Groq(
    api_key=userdata.get('GROQ_API_KEY'),
)

tavily_client = TavilyClient(api_key=userdata.get('TAVILY_API_KEY'))


# What is the objective?

Create a ReAct workflow to answer user questions.<br>
The input is a user question<br>
The output is the answer to the user question.

# Draw the graph
- Give each function a number
- Give each edge a number

# List the inputs and graph functions
Each point in the graph is just a function.<br>
There are node functions and conditional edge functions.<br>
The inputs are passed to the graph at the start (as a dict).<br>
They automatically initialize the values in the state.

Inputs
- system_message
- user_query
- num_steps

Functions
1. initialize_message_history_1 (node)
2. query_llm_2 (node) (llm)
3. route_to_search_or_answer_3 (cond edge)
4. run_web_search_4 (node) (tavily)
5. save_final_answer_5 (node)
6. print_the_state_6 (node)

# Helper functions

In [7]:
def write_markdown_file(content, filename):
  """Writes the given content as a markdown file to the local directory.

  Args:
    content: The string content to write to the file.
    filename: The filename to save the file as.
  """
  with open(f"{filename}.md", "w") as f:
    f.write(content)


In [8]:
def initialize_message_history(system_message, first_user_query):

    """
    Initialize variables in the state.
    Args:
        system_message (str): The ReAct system message
        user_query (str): The user search query
    Returns:
        A dict that automatically updates the state.
    """

    print("---INITIALIZE MESSAGE HISTORY---")

    message_history = [
                        {
                            "role": "system",
                            "content": system_message
                        },
                        {
                            "role": "user",
                            "content": first_user_query
                        }
                    ]


    return message_history



# Set up the LLM

In [9]:
def make_llm_api_call(message_history):

    """
    Makes a call to the Llama3 model on Groq.
    Args:
        message_history (List of dicts): The message history
    Returns:
        response_text: (str): The text response from the LLM
    """

    response = groq_client.chat.completions.create(
                        messages=message_history,
                        model="llama3-70b-8192",
                    )

    response_text = response.choices[0].message.content

    return response_text


# Example

message_history = [{
                        "role": "system",
                        "content": "Your name is Molly.",
                    },
                    {
                        "role": "user",
                        "content": "What's your name?",
                    }
                ]

response = make_llm_api_call(message_history)

print(response)

My name is Molly!


# Set up the tools

In [10]:
def run_tavily_search(query, num_results=5):

    """
    Uses the Tavily API to run a web search
    Args:
        query (str): The user query
        num_results (int): Num search results
    Returns:
        tav_response (json string): The search results in json format
    """

    # For basic search:
    tav_response = tavily_client.search(query=query, max_results=num_results)

    return tav_response



# Example

query = "How much does a bulldog weigh?"

results = run_tavily_search(query, num_results=2)

# Use this str output in the system message example below
# Use this instead of the Eisenhower example
print(results)

{'query': 'How much does a bulldog weigh?', 'follow_up_questions': None, 'answer': None, 'images': None, 'results': [{'title': 'English Bulldog Growth and Weight Chart (Male & Female)', 'url': 'https://www.k9web.com/breeds/english-bulldog-growth-chart/', 'content': 'Male two-month-old Bulldogs will weigh between 9 and 12 pounds (4 and 5.4 kg), while females should weigh 7 and 10 pounds (3.1 and 4.5 kg). ... If your dog seems to be putting on too much weight too quickly, you may consider taking him to the vet to rule out common health problems such as hypothyroidism, leading to excessive weight gain. 2 ...', 'score': 0.93549, 'raw_content': None}, {'title': 'English Bulldog Growth & Weight Chart: Everything You Need To ... - Pawlicy', 'url': 'https://www.pawlicy.com/blog/english-bulldog-growth-and-weight-chart/', 'content': 'According to Care.com, puppies reach about 75% of their adult height at six months old. This will be around 10-13 inches tall for a male English Bulldog and approxi

# Simple ReAct example

In [11]:
# ReAct Example

system_message = """
You run in a loop of Thought, Action, PAUSE, Observation.
At the end of the loop you output an Answer
Use Thought to describe your thoughts about the question you have been asked.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.
Output your response as a JSON string.

Your available actions are:

calculate:
e.g. calculate: 4 * 7 / 3
Runs a calculation and returns the number - uses Python so be sure to use floating point syntax if necessary

average_dog_weight:
e.g. average_dog_weight: Collie
returns average weight of a dog when given the breed

Example session:

Question: How much does a Bulldog weigh?
{
    "Thought": "I should look the dogs weight using average_dog_weight",
    "Action": "average_dog_weight: Bulldog".
    "Status": PAUSE,
}

You will be called again with this:

Observation: A Bulldog weights 51 lbs

You then output:
{
    Answer: A bulldog weights 51 lbs,
    Status: DONE,
}
""".strip()

user_query = "How much does a toy poodle weigh?"

message_history1 = [{
                        "role": "system",
                        "content": system_message,
                    },
                    {
                        "role": "user",
                        "content": user_query,
                    }
                ]

message_history2 = [{
                        "role": "system",
                        "content": system_message,
                    },
                    {
                        "role": "user",
                        "content": user_query,
                    },
                   {
                        "role": "assistant",
                        "content": '{\n    "Thought": "I should look up the average weight of a toy poodle",\n    "Action": "average_dog_weight: Toy Poodle",\n    "Status": PAUSE,\n}',
                    },
                   {
                        "role": "user",
                        "content": "a toy poodles average weight is 7 lbs",
                    }
                ]



response = make_llm_api_call(message_history2)

print(response)

{
    "Answer": "A toy poodle weighs 7 lbs",
    "Status": DONE,
}


# Set up the system message
Set up the system message and test the performance of the LLM.

In [12]:

system_message = """
You run in a loop of Thought, Action, PAUSE, Observation.
At the end of the loop you output an Answer
Use Thought to describe your thoughts about the question you have been asked.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.
Output your response as a JSON string.

Your available actions are:

run_web_search:
e.g. run_web_search: average weight of a Collie
returns web search info relating to Collies.

Break the query down into separate questions and search only one question at a time.
You are only allowed to search one topic at a time.
You are allowed to make multiple web searches (but not together, only in sequence).

Example session:

Question: How much does a Bulldog weigh?
{
    "Thought": "I should run a web search for the average weight of a bulldog",
    "Action": "run_web_search: Average weight of a Bulldog".
    "Status": "PAUSE"
}

You will be called again with this:

Observation: Web search results relating to Bulldogs

You then output:
{
    "Thought": "I have all the information I need to answer the question.",
    "Answer": "A bulldog weights 51 lbs",
    "Status": "DONE"
}
""".strip()





# Example

user_query = "What was Dwight Eisenhower's presidential campaign slogan?"

#user_query = "What are the currencies of Thailand and Indonesia?"

message_history = [{
                        "role": "system",
                        "content": system_message
                    },
                    {
                        "role": "user",
                        "content": user_query
                    }
                ]


response1 = make_llm_api_call(message_history)
# Add the assistants response to the message history
messsage = {"role": "assistant", "content": response1}
message_history.append(messsage)

# Extract the search text
json_response = json.loads(response1)
search_text = json_response['Action'].replace('run_web_search:', "").strip()

# Extract the status
json_response = json.loads(response1)
status = json_response['Status']
print(status)

# Run the tavily search
search_results = run_tavily_search(search_text, num_results=10)

# Add the search results to the message history
messsage = {"role": "user", "content": str(search_results)}
message_history.append(messsage)

response2 = make_llm_api_call(message_history)

json_response = json.loads(response2)
status = json_response['Status']
print(status)

print(response1)
print(response2)

PAUSE
DONE
{
    "Thought": "I'm not familiar with Dwight Eisenhower's presidential campaign slogan, I should run a web search to find the answer",
    "Action": "run_web_search: Dwight Eisenhower presidential campaign slogan",
    "Status": "PAUSE"
}
{
    "Thought": "I have all the information I need to answer the question.",
    "Answer": "Dwight Eisenhower's presidential campaign slogan was 'I Like Ike'.",
    "Status": "DONE"
}


# Define the inputs
These inputs are passed into the graph at the start.<br>
They initialize the variables in the state.

Inputs
- system_message
- user_query
- num_steps

In [13]:
# Inputs to initialize state variables

# inputs = {"system_message": system_message, "user_query": user_query, "num_steps": 0}

# Define the graph functions
Each point in the graph is just a function.<br>
There are node functions and conditional edge functions.

Functions
1. initialize_message_history_1 (node)
2. query_llm_2 (node) (llm)
3. route_to_websearch_or_answer_3 (cond edge)
4. run_web_search_4 (node) (tavily)
5. save_final_answer_5 (node)
6. print_the_state_6 (node)

In [14]:
def initialize_message_history_1(state):

    """
    Adds the system message and the user query
    to the message history list.
    """

    print("---INITIALIZE MESSAGE HISTORY---")

    # Increment the steps
    num_steps = int(state['num_steps'])
    num_steps += 1

    system_message = state['system_message']
    user_query = state['user_query']

    message_history = [
                        {
                            "role": "system",
                            "content": system_message
                        },
                        {
                            "role": "user",
                            "content": user_query
                        }
                    ]
    print("Added system message.")
    print("Added user query.")
    print("Message history initialized.")

    # Update the state
    return {"message_history": message_history, "num_steps": num_steps}



In [15]:
def query_llm_2(state):

    print("---MAKE LLM API CALL---")

    # Increment the steps
    num_steps = int(state['num_steps'])
    num_steps += 1

    # Get the message history.
    # Note that the user serach query is already included
    message_history = state['message_history']

    print(type(message_history))

    # Run the llm
    # This returns a json string
    response = make_llm_api_call(message_history)

    print("Response:", response)

    # Update the message history
    # Add the assistants response to the message history
    messsage = {"role": "assistant", "content": response}
    message_history.append(messsage)

    print("Updated message history - assistant")

    # Update the state
    return {"message_history": message_history,  "llm2_response": response, "num_steps": num_steps}




In [16]:
def route_to_websearch_or_answer_3(state):

    """
    Route to web search or not.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """

    print("---ROUTE TO WEB SEARCH OR ANSWER---")

    # Get the last llm node 2 response from the state
    llm2_response = state["llm2_response"]

    # Extract the status
    json_response = json.loads(llm2_response)
    status = json_response['Status']
    print("Status:", status)


    if status == 'PAUSE':
        print("Routing to web search.")
        return "to_web_search"

    elif status == 'DONE':
        print("Routing to final amswer")
        return "to_final_answer"

In [17]:
def run_web_search_4(state):

    print("---RUNNING WEB SEARCH---")
    # Increment the steps
    num_steps = int(state['num_steps'])
    num_steps += 1

    # Get the last llm node 2 response from the state
    llm2_response = state["llm2_response"]
    message_history = state["message_history"]

    # Extract the search text
    json_response = json.loads(llm2_response)
    search_text = json_response['Action'].replace('run_web_search:', "").strip()

    print("Search text:", search_text)

    # Run the tavily search
    web_search_results = run_tavily_search(search_text, num_results=5)

    # Convert the search results to a string
    web_search_results = str(web_search_results)

    # Update the message history
    # Add the search results to the message history
    messsage = {"role": "user", "content": web_search_results}
    message_history.append(messsage)

    print("Updated message history - user, search results")

    # Update the state
    return {"message_history": message_history, "num_steps":num_steps}

In [18]:
def save_final_answer_5(state):

    print("---EXTRACTING AND SAVING FINAL ANSWER---")
    num_steps = state['num_steps']
    num_steps += 1

    # Extract the final answer
    llm2_response = state["llm2_response"]
    json_response = json.loads(llm2_response)
    answer = json_response['Answer']

    print("Final answer:", answer)

    # Save the answer to a file
    write_markdown_file(str(answer), "final_answer")

    # Update the state
    return {"final_answer": answer, "num_steps":num_steps}

In [19]:
def print_the_state_6(state):

    """
    print the state
    """

    print("---STATE PRINTER---")
    print(f"Last llm response: {state['llm2_response']} \n" )
    print(f"Final answer: {state['final_answer']} \n" )
    print(f"Num steps: {state['num_steps']} \n")

    return


# Define the state

The functions take the state as input.<br>
The functions usually return a dict that automatically updates the state.<br>
Therefore, the variables in the state need to correspond to variables that the functions output.

In [20]:
from langchain.schema import Document
from langgraph.graph import END, StateGraph

In [21]:
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    """
    Represents the state of the graph.

    Attributes:
        system_message: the ReAct system message
        user_query: the question from the user
        message_history: list of llm chat messages
        llm2_response: the latest llm response
        final_answer: final llm answer to the user query
        num_steps: number of steps
    """

    system_message : str
    user_query : str
    message_history : List[dict]
    llm2_response : str
    final_answer : str
    num_steps : int


# Build the graph

1. initialize_message_history_1 (node)
2. query_llm_2 (node) (llm)
3. route_to_websearch_or_answer_3 (cond edge)
4. run_web_search_4 (node) (tavily)
5. save_final_answer_5 (node)
6. print_the_state_6 (node)

## Initialize the graph

In [22]:
workflow = StateGraph(GraphState)

## Define the nodes

In [23]:
workflow.add_node("initialize_message_history_1", initialize_message_history_1)
workflow.add_node("query_llm_2", query_llm_2)
workflow.add_node("run_web_search_4", run_web_search_4)
workflow.add_node("save_final_answer_5", save_final_answer_5)
workflow.add_node("print_the_state_6", print_the_state_6)

## Define the edges

In [24]:
# INPUT

# e-0
workflow.set_entry_point("initialize_message_history_1")

# e-1
workflow.add_edge("initialize_message_history_1", "query_llm_2")

# e-2
workflow.add_conditional_edges(
    "query_llm_2",
    route_to_websearch_or_answer_3,
    {
        "to_web_search": "run_web_search_4", # e-3
        "to_final_answer": "save_final_answer_5", # e-5
    },
)
# e-4
workflow.add_edge("run_web_search_4", "query_llm_2")

# e-6
workflow.add_edge("save_final_answer_5", "print_the_state_6")

# e-7
workflow.add_edge("print_the_state_6", END)

# END

## Compile the graph

In [25]:
# Compile
app = workflow.compile()

# Run the system

In [26]:
user_query = "What are the currencies of South Africa, Thailand and Indonesia?"

#user_query = " Which actors won the best actress oscar in 2000 and 2001?"

inputs = {"system_message": system_message, "user_query": user_query, "num_steps": 0}

In [27]:
# The printed outputs will be displayed when this cell is run

output = app.invoke(inputs)

---INITIALIZE MESSAGE HISTORY---
Added system message.
Added user query.
Message history initialized.
---MAKE LLM API CALL---
<class 'list'>
Response: {
    "Thought": "I need to find the currencies of three countries. I'll start with South Africa.",
    "Action": "run_web_search: Currency of South Africa",
    "Status": "PAUSE"
}
Updated message history - assistant
---ROUTE TO WEB SEARCH OR ANSWER---
Status: PAUSE
Routing to web search.
---RUNNING WEB SEARCH---
Search text: Currency of South Africa
Updated message history - user, search results
---MAKE LLM API CALL---
<class 'list'>
Response: {
    "Thought": "I have the information about the currency of South Africa. It's the South African rand (ZAR). Now, I need to find the currencies of Thailand and Indonesia.",
    "Action": "run_web_search: Currency of Thailand",
    "Status": "PAUSE"
}
Updated message history - assistant
---ROUTE TO WEB SEARCH OR ANSWER---
Status: PAUSE
Routing to web search.
---RUNNING WEB SEARCH---
Search text

In [28]:
# Display the final email

print(output['final_answer'])

The currencies are: South African rand (ZAR) for South Africa, Thai baht (THB) for Thailand, and Indonesian rupiah (IDR) for Indonesia.


In [29]:
# Check that the markdown file has been created

!ls

final_answer.md  sample_data


In [30]:
# Read the contents of the file

!cat final_answer.md

The currencies are: South African rand (ZAR) for South Africa, Thai baht (THB) for Thailand, and Indonesian rupiah (IDR) for Indonesia.