In [1]:

!pip install -U langchain_community tiktoken langchain-openai langchain-ollama langchain-chroma langchain-cohere langchainhub chromadb langchain langgraph

Collecting langchain_community
  Downloading langchain_community-0.3.23-py3-none-any.whl.metadata (2.5 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.16-py3-none-any.whl.metadata (2.3 kB)
Collecting chromadb
  Downloading chromadb-1.0.8-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.9 kB)
Collecting langchain
  Downloading langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting langgraph
  Downloading langgraph-0.4.3-py3-none-any.whl.metadata (7.9 kB)
Collecting langchain-core<1.0.0,>=0.3.56 (from langchain_community)
  Downloading langchain_core-0.3.59-py3-none-any.whl.metadata (5.9 kB)
Downloading langchain_community-0.3.23-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_openai-0.3.16-py3-none-any.whl (62 kB)
Downloading langchain-0.3.25-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m 

storing some documents

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_openai import OpenAIEmbeddings

### from langchain_cohere import CohereEmbeddings

# Set embeddings
embd = OllamaEmbeddings(model="llama3.2:latest")

# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

print(f"Loaded {len(docs_list)} documents from {len(urls)} URLs.")

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

print(f"Split into {len(doc_splits)} chunks.")

# Add to vectorstore
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embd,
)

print(f"Indexed {len(doc_splits)} chunks into vectorstore.")

retriever = vectorstore.as_retriever()

Loaded 3 documents from 3 URLs.
Split into 88 chunks.
Indexed 88 chunks into vectorstore.


# Building Nodes

In [5]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import AzureChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain import hub

from pydantic import BaseModel, Field

In [6]:
llm = AzureChatOpenAI(
    deployment_name="gpt-4-2",
)

### Retrieval Grader

In [7]:
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# LLM with function call
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
    give the answer in single word 'yes' or 'no' \n
    """
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | llm
question = "how agent uses tools ?"
docs = retriever.invoke(question)
print(f"Retrieved {len(docs)} documents.")
doc_txt = docs[0].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

Retrieved 4 documents.
content='yes' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 169, 'total_tokens': 170, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5603ee5e2e', 'id': 'chatcmpl-BW4Qp2rE5Fvg1jUwOU69dr7JKwehX', 'service_tier': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filte

### Generator

In [8]:
### Generate

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)



Agents use tools by learning to call external APIs for extra information that the model may not inherently possess in its pre-trained weights. These tools can include capabilities for current information gathering, code execution, and access to proprietary databases among others. The agent uses these tools to augment its abilities and manage tasks that require external data or specialized processing.


### Hallucination Grader

In [9]:
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )


# LLM with function call
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts. \n
     give the answer in single word 'yes' or 'no' """
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucination_grader = hallucination_prompt | llm
hallucination_grader.invoke({"documents": docs, "generation": generation})

AIMessage(content='Yes', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 2953, 'total_tokens': 2954, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5603ee5e2e', 'id': 'chatcmpl-BW4RpKbwdAYFXRMZoQYU3Iw03ZZl1', 'service_tier': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': Fal

### answer grader

In [10]:
# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )


# LLM with function call
structured_llm_grader = llm.with_structured_output(GradeAnswer)

# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n 
     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question. \n
     give the answer in single word 'yes' or 'no' """
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

answer_grader = answer_prompt | llm
answer_grader.invoke({"question": question, "generation": generation})

AIMessage(content='yes', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 144, 'total_tokens': 145, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5603ee5e2e', 'id': 'chatcmpl-BW4RqxGt87VKmykl9rd8A8M4Cbyte', 'service_tier': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False

### Question Re-writer

In [11]:
# LLM

# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning. \n
     return question only.
     """
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question such that it can be used for sematic document retrival.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question": question})

'How do agents utilize tools in operations?'

## Constructing Graph

In [12]:
from typing import List, Annotated, Optional
from typing_extensions import TypedDict

from langchain_core.messages import HumanMessage, SystemMessage


### defining graph state

In [13]:
from langgraph.graph.message import add_messages

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """
    messages: Annotated[list, add_messages]
    chat_router:  Optional[str]
    question: Optional[str]
    generation: Optional[str]
    documents: List[str]

### Tools

In [14]:
def multiply(a: float, b: float) -> float:
    """Multiply a and b.
    
    Args:
        a: First float
        b: Second float
    """
    return a * b

def add(a: float, b: float) -> float:
    """Add a and b.
    
    Args:
        a: First float
        b: Second float
    """
    return a + b

def divide(a: float, b: float) -> float:
    """Divide a by b.
    
    Args:
        a: First float
        b: Second float
    """
    return a / b

### defining graph nodes

chat node

In [15]:


def chat_router(state: GraphState) -> GraphState:
    """
    Chat router function to process the state and return a response.

    Args:
        state: GraphState object containing the state of the graph.

    Returns:
        str: The response from the chat.
    """

    system_prompt = """\
    You are the Intelligent Business-Document Assistant.  
    You will be given the entire chat history (a list of {"role","content"} messages). 
    Also you are equipped with following tools:

    def add(a: float, b: float)
    - adds a and b

    def multipy(a: float, b: float)
    - multiplies a and b

    def divide(a: float, b: float)
    - divides a by b
    - ensure b is not 0

    Your job is to look at the most recent user request in context and choose exactly one of three actions:

    1. retrieve
    - You need new facts from the documents.  
    - only invoke this if and only if most recent request needs retireval.
    - reply with single word "retrieve"

    2. tool 
    - You have enough document data, but need to run a tool.
    - reply with single word "tool".

    3. respond
    - if any further reterival and tool calling is not required and if it seems like assistant has not responded entirely then and only then reply with a single word "respond".

    4. end
    - if assistant has responded one time and no further processing is required then reply with a single word "end".

    **Important:**  
    - give answer in one word only. 
    """

    sysmsg = SystemMessage(system_prompt)
    print(state["messages"])
    messages = state["messages"]


    route_ans = llm.invoke([sysmsg] + messages)
    print(f"---ROUTING TO {route_ans.content}---")
    updated_state = {
        **state,
        "chat_router": route_ans.content,
    }

    # print("updated state in chat_router: ",updated_state)

    return updated_state


In [16]:
from langgraph.prebuilt import tools_condition

def decide_betn_respond_retrieve_toolcall(state: GraphState) -> str:
    
    # print("state after routing generation: ",state)
    
    route_ans = state["chat_router"]

    if route_ans == "respond":
        return "respond"
    elif route_ans == 'tool':
        return tools_condition(state)
    elif route_ans == 'retrieve':
        return "retrieve"
    elif route_ans == "end":
        return "end"
    else:
        return "respond"

responder node 

In [17]:
def responder(state: GraphState):
    return{
        **state,
        "messages":[llm.invoke(state["messages"])]
    }

tool node

In [18]:
from langgraph.prebuilt import ToolNode

tools = [multiply, add, divide]
tools_node = ToolNode(tools)

retreival node

In [19]:
def retrieve(state: GraphState) -> GraphState:
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    recent_message = state["messages"][-1]
    question = recent_message.content
    print(f"Question: {question}")

    # Retrieval
    documents = retriever.invoke(question)
    return {**state, "documents": documents, "question": question}

generation node

In [20]:
def generate(state: GraphState) -> GraphState:
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    prompt =  hub.pull("rlm/rag-prompt")
    print(prompt)
    # documents = "\n\n".join(doc.page_content for doc in documents)
    # print("final formatted documents: ",documents)
    formatted_prompt = prompt.format(context = documents, question = question)
    print("final formatted prompt for generation: ",formatted_prompt)
    messages = state["messages"] + [HumanMessage(formatted_prompt)]
    ai_message = llm.invoke(messages)

    # RAG generation
    # generation = rag_chain.invoke({"context": documents, "question": question})
    
    return {
        **state, 
        "documents": documents, 
        "question": question, 
        "generation": ai_message.content, 
        "messages": messages + [ai_message],
    }


document grading node

In [21]:
def grade_documents(state: GraphState) -> GraphState:
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        # grade = score.binary_score
        grade = score.content
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue
    return {"documents": filtered_docs, "question": question}

query transformation node

In [22]:
def transform_query(state: GraphState) -> GraphState:
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]

    # Re-write question
    better_question = question_rewriter.invoke({"question": question})
    return {"documents": documents, "question": better_question}

decision node to generate answer or transform query to retrive more documents

In [23]:
def decide_to_generate(state: GraphState) -> str:
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    filtered_documents = state["documents"]

    if not filtered_documents:
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
        )
        return "transform_query"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

decision node to whether generated answer has grounded truth in documents and answers the question.

In [24]:
def grade_generation_v_documents_and_question(state: GraphState) -> str:
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    # grade = score.binary_score
    grade = score.content

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        # grade = score.binary_score
        grade = score.content
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

## Compiling Graph State

In [25]:
from langgraph.graph import END, StateGraph, START
from pprint import pprint
from langgraph.checkpoint.memory import MemorySaver

In [26]:
workflow = StateGraph(GraphState)

adding nodes to graph

In [27]:
workflow.add_node("chat", chat_router)
workflow.add_node("responder", responder)
workflow.add_node("tools", tools_node)

#nodes related to retrieval
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generate
workflow.add_node("transform_query", transform_query)  # transform_query

<langgraph.graph.state.StateGraph at 0x11baea4b0>

In [28]:
workflow.add_edge(START, "chat")

workflow.add_conditional_edges(
    "chat",
    decide_betn_respond_retrieve_toolcall,
    {
        "respond":"responder",
        "tools":"tools",
        "retrieve":"retrieve",
        "end":END,
    }
)

workflow.add_edge("tools", "chat")
workflow .add_edge("responder", "chat")

workflow.add_edge("retrieve", "grade_documents")


workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    },
)

workflow.add_edge("transform_query", "retrieve")

workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": "chat",
        "not useful": "transform_query",
    },
)

<langgraph.graph.state.StateGraph at 0x11baea4b0>

adding memory to agent

In [29]:
config = {"configurable": {"thread_id": "1"}}

In [30]:
memory = MemorySaver()
graph = workflow.compile(checkpointer=memory)

app = workflow.compile()

pictorial form of graph

In [31]:
from PIL import Image
from io import BytesIO

graph_image = Image.open(BytesIO(app.get_graph(xray=True).draw_mermaid_png())) 
graph_image.show()

## sample generation

In [29]:
messages = [HumanMessage("how ai agent uses tools ?")]
messages = app.invoke({"messages":messages})

[HumanMessage(content='how ai agent uses tools ?', additional_kwargs={}, response_metadata={}, id='cce63e2b-b220-46a0-b9aa-0682a30a7850')]
---ROUTING TO retrieve---
---RETRIEVE---
Question: how ai agent uses tools ?
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
---GENERATE---




input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]
final formatted prompt for generation:  Human: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: how ai agent

In [None]:
inputs  = {
    "question" : "agent memory",
}

for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint("\n---\n")

pprint(value["generation"])