In [1]:
# Ref:
# YouTube Video
# Sam Witteveen
# Creating an AI Agent with LangGraph Llama 3 & Groq
# https://www.youtube.com/watch?v=lvQ96Ssesfk

In [1]:
!pip -q install groq
!pip -q install tavily-python
!pip -q install -U langchain langgraph

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.5/103.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.0/974.0 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.6/88.6 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.7/314.7 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [2]:
!pip show langgraph

Name: langgraph
Version: 0.0.66
Summary: langgraph
Home-page: https://www.github.com/langchain-ai/langgraph
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: langchain-core
Required-by: 


In [3]:
import json
import os
from google.colab import userdata


# Set API keys as environment variables

In [5]:
#os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
#os.environ["TAVILY_API_KEY"] = userdata.get('TAVILY_API_KEY')

# Define the API clients

In [4]:
from groq import Groq
from tavily import TavilyClient

groq_client = Groq(
    api_key=userdata.get('GROQ_API_KEY'),
)

tavily_client = TavilyClient(api_key=userdata.get('TAVILY_API_KEY'))


# What is the objective?

The objective is to answer a multi-part question from a user.

Use a keyword creation agent instead of a ReAct pattern to answer user questions.<br>
The input is a user question<br>
The output is the answer to the user question.

# Draw the graph
- Give each function a number
- Give each edge a number

# List the inputs and graph functions
Each point in the graph is just a function.<br>
There are node functions and conditional edge functions.<br>
The inputs are passed to the graph at the start (as a dict).<br>
They automatically initialize the values in the state.

Inputs
- keywords_llm_2_system_message
- final_answer_llm_4_system_message
- user_query
- num_steps

Functions
1. query_keywords_llm_1 (node) (llm)
2. run_web_search_2 (node) (tavily)
3. query_final_answer_llm_3 (node) (llm)
4. save_final_answer_4 (node)
5. print_the_state_5 (node)

# Helper functions

In [5]:
def write_markdown_file(content, filename):
  """Writes the given content as a markdown file to the local directory.

  Args:
    content: The string content to write to the file.
    filename: The filename to save the file as.
  """
  with open(f"{filename}.md", "w") as f:
    f.write(content)


In [6]:
def create_message_history(system_message, user_input):

    """
    Create a message history messages list.
    Args:
        system_message (str): The system message
        user_query (str): The user input
    Returns:
        A list of dicts in OpenAi chat format
    """

    message_history = [
                        {
                            "role": "system",
                            "content": system_message
                        },
                        {
                            "role": "user",
                            "content": user_input
                        }
                    ]

    return message_history



# Set up the LLM

In [7]:
def make_llm_api_call(message_history):

    """
    Makes a call to the Llama3 model on Groq.
    Args:
        message_history (List of dicts): The message history
    Returns:
        response_text: (str): The text response from the LLM
    """

    response = groq_client.chat.completions.create(
                        messages=message_history,
                        model="llama3-70b-8192",
                    )

    response_text = response.choices[0].message.content

    return response_text


# Example

system_message = "Your name is Molly."
user_message = "What's your name?"

message_history = create_message_history(system_message, user_message)

response = make_llm_api_call(message_history)

print(response)

My name is Molly!


# Set up the tools

In [8]:
def run_tavily_search(query, num_results=5):

    """
    Uses the Tavily API to run a web search
    Args:
        query (str): The user query
        num_results (int): Num search results
    Returns:
        tav_response (json string): The search results in json format
    """

    # For basic search:
    tav_response = tavily_client.search(query=query, max_results=num_results)

    return tav_response



# Example

query = "How much does a bulldog weigh?"

results = run_tavily_search(query, num_results=2)

# Use this str output in the system message example below
# Use this instead of the Eisenhower example
print(results)

{'query': 'How much does a bulldog weigh?', 'follow_up_questions': None, 'answer': None, 'images': None, 'results': [{'title': 'English Bulldog Growth & Weight Chart: Everything You Need To ... - Pawlicy', 'url': 'https://www.pawlicy.com/blog/english-bulldog-growth-and-weight-chart/', 'content': 'According to Care.com, puppies reach about 75% of their adult height at six months old. This will be around 10-13 inches tall for a male English Bulldog and approximately 9-11 inches tall for a female English Bulldog. As for weight, a 6-month-old male English Bulldog will weigh about 33 to 37 pounds, while a 6-month-old female English Bulldog ...', 'score': 0.94611, 'raw_content': None}, {'title': 'English Bulldog Growth and Weight Chart (Male & Female)', 'url': 'https://www.k9web.com/breeds/english-bulldog-growth-chart/', 'content': 'Male two-month-old Bulldogs will weigh between 9 and 12 pounds (4 and 5.4 kg), while females should weigh 7 and 10 pounds (3.1 and 4.5 kg). ... If your dog seems

# Set up the system messages

Set up the system message and test the performance of the LLM.

In [12]:
keywords_system_message = """
You are a master at working out the best keywords and phrases to search for in a web search to get the best info for the customer.

You will be given a USER_QUERY. Work out the best search keywords that will find the best
info for helping to answer the user query.

Return a JSON with a single key 'keywords' with a list of no more than 5 keywords or phrases, and no premable or explaination.
"""


# Example

user_message = "What are the currencies of South Africa and Thailand?"

user_message = "What was Dwight Eisenhowers presidential campaign slogan?"

message_history = create_message_history(keywords_system_message, user_message)

response = make_llm_api_call(message_history)

print(response)

{ "keywords" : ["Dwight Eisenhower", "presidential campaign", "slogan", "I Like Ike", "Eisenhower campaign history"] }


In [53]:
final_answer_system_message = """
You are the Final Answer Agent. Take the USER_QUERY and the RESEARCH_INFO from the research agent and \
write a helpful response in a thoughtful and friendly way.

You never make up information that hasn't been provided in the research_info.

Return the response as a JSON string with a single key 'final_answer' and no premable or explaination.
"""


# Example

# Get the search keywords
user_query = "What is the currency of South Africa and Thailand?"
message_history = create_message_history(keywords_system_message, user_query)

response = make_llm_api_call(message_history)

response = json.loads(response)
keywords_list = (response['keywords'])

# Run searches using the keywords
research_info_list = []

for query in keywords_list:

    results = run_tavily_search(query, num_results=2)
    research_info_list.append(results)


# Get the final answer

input = f"""
USER_QUERY: {user_query}
RESEARCH_INFO: {research_info_list}
"""

message_history = create_message_history(final_answer_system_message, input)

response = make_llm_api_call(message_history)

response = json.loads(response)
final_answer = response['final_answer']

final_answer

'The currency of South Africa is the South African Rand (ZAR) and the currency of Thailand is the Thai Baht (THB).'

# Define the inputs
These inputs are passed into the graph at the start.<br>
They initialize the variables in the state.

Inputs
- keywords_system_message
- final_answer_system_message
- user_query
- research_info_list
- num_steps

In [13]:
# Inputs to initialize state variables

# inputs = {"keywords_system_message": keywords_system_message,
# "final_answer_system_message": final_answer_system_message,
# "user_query": user_query,
# "num_steps": 0}

# Define the graph functions
Each point in the graph is just a function.<br>
There are node functions and conditional edge functions.

Functions
1. query_keywords_llm_1 (node) (llm)
2. run_web_search_2 (node) (tavily)
3. query_final_answer_llm_3 (node) (llm)
4. save_final_answer_4 (node)
5. print_the_state_5 (node)

In [26]:
def query_keywords_llm_1(state):

    print("---MAKE KEYWORDS LLM API CALL---")

    # Increment the steps
    num_steps = int(state['num_steps'])
    num_steps += 1

    user_query = state['user_query']
    keywords_system_message = state['keywords_system_message']

    message_history = create_message_history(keywords_system_message, user_query)

    response = make_llm_api_call(message_history)

    response = json.loads(response)
    keywords_list = response['keywords']

    print('Keyword generation complete.')
    print(keywords_list)

    # Update the state
    return {"keywords_list": keywords_list, "num_steps": num_steps}




In [58]:
def run_web_search_2(state):

    print("---RUN WEB SEARCHES - TAVILY---")

    # Increment the steps
    num_steps = int(state['num_steps'])
    num_steps += 1

    keywords_list = state["keywords_list"]

    print(type(keywords_list))

    print("Search keywords:", keywords_list)

    # Run searches using the keywords
    research_info_list = []

    for query in keywords_list:

        results = run_tavily_search(query, num_results=2)
        research_info_list.append(results)

    print("Web research complete.")

    # Update the state
    return {"research_info_list": research_info_list, "num_steps": num_steps}




In [42]:
def query_final_answer_llm_3(state):

    print("---MAKE FINAL ANSWER LLM API CALL---")

    # Increment the steps
    num_steps = int(state['num_steps'])
    num_steps += 1

    user_query = state['user_query']
    final_answer_system_message = state['final_answer_system_message']
    research_info_list = state["research_info_list"]


    input = f"""
    USER_QUERY: {user_query}
    RESEARCH_INFO: {research_info_list}
    """

    message_history = create_message_history(final_answer_system_message, input)

    response = make_llm_api_call(message_history)

    print(response)

    response = json.loads(response)
    final_answer = response['final_answer']


    # Update the state
    return {"final_answer": final_answer, "num_steps": num_steps}




In [29]:
def save_final_answer_4(state):

    print("---SAVING FINAL ANSWER---")
    num_steps = state['num_steps']
    num_steps += 1

    # Extract the final answer
    final_answer = state["final_answer"]


    print("Final answer:", final_answer)

    # Save the answer to a file
    write_markdown_file(str(final_answer), "final_answer")

    print("Final answer saved to a file.")

    # Update the state
    return {"num_steps":num_steps}

In [30]:
def print_the_state_5(state):

    """
    Print the state
    """

    print("---STATE PRINTER---")
    print(f"Final answer: {state['final_answer']} \n" )
    print(f"Num steps: {state['num_steps']} \n")

    return


# Define the state

The functions take the state as input.<br>
The functions usually return a dict that automatically updates the state.<br>
Therefore, the variables in the state need to correspond to variables that the functions output.

In [32]:
from langchain.schema import Document
from langgraph.graph import END, StateGraph

In [31]:
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    """
    Represents the state of the graph.

    Attributes:
        keywords_system_message: the keywords llm system message
        final_answer_system_message: the final answer llm system message
        user_query: the question from the user
        keywords_list: a list of search keywords and phrases
        research_info_list: a list of results from the Tavily web searches
        final_answer: the final answer
        num_steps: number of steps
    """

    keywords_system_message : str
    final_answer_system_message : str
    user_query : str
    keywords_list : List[str]
    research_info_list : List[str]
    final_answer : str
    num_steps : int


# Build the graph

1. query_keywords_llm_1 (node) (llm)
2. run_web_search_2 (node) (tavily)
3. query_final_answer_llm_3 (node) (llm)
4. save_final_answer_4 (node)
5. print_the_state_5 (node)

## Initialize the graph

In [60]:
workflow = StateGraph(GraphState)

## Define the nodes

In [61]:
workflow.add_node("query_keywords_llm_1", query_keywords_llm_1)
workflow.add_node("run_web_search_2", run_web_search_2)
workflow.add_node("query_final_answer_llm_3", query_final_answer_llm_3)
workflow.add_node("save_final_answer_4", save_final_answer_4)
workflow.add_node("print_the_state_5", print_the_state_5)

## Define the edges

In [62]:
# INPUT

# e-1
workflow.set_entry_point("query_keywords_llm_1")

# e-2
workflow.add_edge("query_keywords_llm_1", "run_web_search_2")

# e-3
workflow.add_edge("run_web_search_2", "query_final_answer_llm_3")

# e-4
workflow.add_edge("query_final_answer_llm_3", "save_final_answer_4")

# e-5
workflow.add_edge("save_final_answer_4", "print_the_state_5")

# e-6
workflow.add_edge("print_the_state_5", END)

# END

## Compile the graph

In [63]:
# Compile
app = workflow.compile()

# Run the system

In [70]:
user_query = "Who was President Obama's chief of staff and who was his vice president?"

inputs = {
            "keywords_system_message": keywords_system_message,
            "final_answer_system_message": final_answer_system_message,
            "user_query": user_query,
            "num_steps": 0
        }

In [71]:
# The printed outputs will be displayed when this cell is run

output = app.invoke(inputs)

---MAKE KEYWORDS LLM API CALL---
Keyword generation complete.
['Barack Obama staff', "Obama's chiefs of staff", 'Joe Biden VP', 'Obama administration officials', 'White House staff Obama era']
---RUN WEB SEARCHES - TAVILY---
<class 'list'>
Search keywords: ['Barack Obama staff', "Obama's chiefs of staff", 'Joe Biden VP', 'Obama administration officials', 'White House staff Obama era']
Web research complete.
---MAKE FINAL ANSWER LLM API CALL---
{"final_answer": "President Obama's chief of staff was Rahm Emanuel, William Daley, Jacob Lew, and Denis McDonough, serving in that order. His vice president was Joe Biden."}
---SAVING FINAL ANSWER---
Final answer: President Obama's chief of staff was Rahm Emanuel, William Daley, Jacob Lew, and Denis McDonough, serving in that order. His vice president was Joe Biden.
Final answer saved to a file.
---STATE PRINTER---
Final answer: President Obama's chief of staff was Rahm Emanuel, William Daley, Jacob Lew, and Denis McDonough, serving in that orde

In [72]:
# Display the final email

print(output['final_answer'])

President Obama's chief of staff was Rahm Emanuel, William Daley, Jacob Lew, and Denis McDonough, serving in that order. His vice president was Joe Biden.


In [73]:
# Check that the markdown file has been created

!ls

final_answer.md  sample_data


In [74]:
# Read the contents of the file

!cat final_answer.md

President Obama's chief of staff was Rahm Emanuel, William Daley, Jacob Lew, and Denis McDonough, serving in that order. His vice president was Joe Biden.