In [1]:
!pip install langgraph langchain langchain-openai pydantic

Collecting langgraph
  Downloading langgraph-0.6.6-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.32-py3-none-any.whl.metadata (2.4 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.1.0 (from langgraph)
  Downloading langgraph_checkpoint-2.1.1-py3-none-any.whl.metadata (4.2 kB)
Collecting langgraph-prebuilt<0.7.0,>=0.6.0 (from langgraph)
  Downloading langgraph_prebuilt-0.6.4-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk<0.3.0,>=0.2.2 (from langgraph)
  Downloading langgraph_sdk-0.2.6-py3-none-any.whl.metadata (1.5 kB)
Collecting ormsgpack>=1.10.0 (from langgraph-checkpoint<3.0.0,>=2.1.0->langgraph)
  Downloading ormsgpack-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m391.3 kB/s[0m eta [36m0:00:00[0m
Downloading langgraph-0.6.6-py3-none-any.whl (153 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [7]:
!pip install -q langchain-groq

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/134.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/134.9 kB[0m [31m1.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/134.9 kB[0m [31m843.0 kB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━[0m [32m122.9/134.9 kB[0m [31m1.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [17]:
import os
try:
    from google.colab import userdata
    GROQ_API_KEY = userdata.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
except Exception:
    GROQ_API_KEY = os.getenv("GROQ_API_KEY")

from langchain_groq import ChatGroq
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=GROQ_API_KEY,
    temperature=0.4,
)

In [18]:
from typing import TypedDict, Literal, List, Optional
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolNode
from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from langchain.tools import tool
from IPython.display import Markdown, display

In [None]:
# -----------------------------
# 0) Real-time user input (Notebook/Console)
# -----------------------------
def get_user_profile():
    print("▶ Blog Generator — provide a few details.")
    topic = input("Topic: ").strip()
    audience = input("Audience: ").strip()
    tone = input("Tone (casual/friendly/professional/technical): ").strip() or "friendly"
    words = input("Target word count (e.g., 900): ").strip()
    words = int(words) if words.isdigit() else 900
    notes = input("Any extra instructions? ").strip()
    return dict(topic=topic, audience=audience, tone=tone, target_words=words, instructions=notes)

def ask_user_approval(suggestion: str) -> bool:
    print("\n— SUPERVISOR NOTE —")
    print(suggestion)
    ans = input("Approve and finalize? (y/n): ").strip().lower()
    return ans.startswith("y")

In [19]:
# -----------------------------
# 2) Tools as functions (callable by LLM)
# -----------------------------
@tool
def keyword_suggest(topic: str) -> List[str]:
    """Suggest 6-10 SEO-friendly keywords for the given blog topic."""
    base = ["RAG", "retrieval augmented generation", "vector database", "semantic search",
            "LLM", "content strategy", "blog workflow", "prompting", "citations", "hallucinations"]
    return base[:10]

@tool
def quick_facts(topic: str) -> str:
    """Produce concise background notes and 5–8 facts (no URLs). Mark uncertainty with [verify]."""
    return (f"- {topic} often uses a pipeline: splitter → embeddings → vector DB → retriever → prompt → LLM.\n"
            f"- Benefits: fewer hallucinations, fresher knowledge, traceability.\n"
            f"- Watch-outs: chunking strategy, prompt grounding, evals.\n"
            f"- Blogging: outline → questions → retrieve → cite quotes where possible [verify].\n")

TOOLS = [keyword_suggest, quick_facts]

In [20]:
# -----------------------------
# 3) Shared State
# -----------------------------
class BlogState(TypedDict, total=False):
    # user input
    topic: str
    audience: str
    tone: Literal["casual","friendly","professional","technical"]
    target_words: int
    instructions: Optional[str]

    # artifacts
    ideas: str
    outline: str
    draft: str
    supervisor_notes: str
    final: str
    keywords: List[str]

    # control
    decision: Literal["revise","approve"]


In [21]:
# -----------------------------
# 4) Nodes
# -----------------------------
def intake(state: BlogState) -> BlogState:
    """Collect real-time inputs from the user (first node)."""
    if not all(k in state for k in ("topic","audience","tone","target_words")):
        user = get_user_profile()
        state.update(user)
    return state


def ideas_researcher(state: BlogState) -> BlogState:
    """Node 1: Generate ideas & quick research notes (tool-calling allowed)."""
    llm_with_tools = llm.bind_tools([keyword_suggest, quick_facts])
    sys = SystemMessage(content="You are an idea researcher. Produce concise idea bullets and optional keyword/tool calls.")
    usr = HumanMessage(content=f"Topic: {state['topic']}\nAudience: {state['audience']}\nTone: {state['tone']}")
    msg = llm_with_tools.invoke([sys, usr])
    messages = [sys, usr, msg]
    # Execute any tool calls
    if getattr(msg, "tool_calls", None):
        tool_results = ToolNode(TOOLS).invoke({"messages": messages})
        messages += tool_results["messages"]
        messages.append(HumanMessage(content="Summarize insights and list keywords from tool outputs."))
        ideas = llm.invoke(messages).content
        # collect keywords if returned by keyword_suggest
        kws = []
        for m in messages:
            if getattr(m, "name", "") == "keyword_suggest" and isinstance(m.content, list):
                kws = m.content
    else:
        ideas = msg.content
        kws = []
    return {**state, "ideas": ideas, "keywords": kws}

def outliner(state: BlogState) -> BlogState:
    """Node 2: Turn ideas into a structured outline (H2/H3)."""
    sys = SystemMessage(content="You are an expert outliner. Use markdown H2/H3; include intro & conclusion.")
    usr = HumanMessage(content=f"Ideas:\n{state['ideas']}\nKeywords: {', '.join(state.get('keywords', []))}")
    outline = llm.invoke([sys, usr]).content
    return {**state, "outline": outline}

def writer(state: BlogState) -> BlogState:
    """Node 3: Write the draft from outline."""
    sys = SystemMessage(content="You are a blog writer. Produce a cohesive draft with headings, TL;DR, and clear paragraphs.")
    usr = HumanMessage(content=(
        f"Topic: {state['topic']}\nAudience: {state['audience']}\nTone: {state['tone']}\n"
        f"Target words: {state['target_words']}\nOutline:\n{state['outline']}\n"
        f"Extra instructions: {state.get('instructions','')}"
    ))
    draft = llm.invoke([sys, usr]).content
    return {**state, "draft": draft}

def supervisor(state: BlogState) -> BlogState:
    """Node 4: Review and decide 'revise' or 'approve'. Also ask the USER for final approval."""
    sys = SystemMessage(content=(
        "You are a strict supervisor. Give 3–6 concrete improvement notes if needed; else say APPROVED."
    ))
    usr = HumanMessage(content=f"Evaluate this draft for quality, tone, structure, coherence:\n\n{state['draft']}")
    notes = llm.invoke([sys, usr]).content

    # If LLM suggests more work OR user wants another pass, go to 'revise'
    auto_decision = "revise" if "APPROVED" not in notes.upper() else "approve"
    # Ask human in the loop
    approved_by_user = ask_user_approval(notes)
    decision = "approve" if approved_by_user else "revise"

    # Prefer user’s decision, but we could combine logic if you want
    return {**state, "supervisor_notes": notes, "decision": decision}

def finalizer(state: BlogState) -> BlogState:
    """Node 5: Produce final package (title, meta, tags, slug, final markdown)."""
    sys = SystemMessage(content=(
        "You are a content ops specialist. Produce: Title (<=60 chars), Meta (<=160 chars), "
        "Slug, 3–5 tags, and Final Markdown body. Keep the user's tone."
    ))
    usr = HumanMessage(content=f"Draft:\n{state['draft']}\nNotes:\n{state.get('supervisor_notes','')}\nTone: {state['tone']}")
    final_pkg = llm.invoke([sys, usr]).content
    return {**state, "final": final_pkg}


In [22]:
# -----------------------------
# 5) Graph wiring with conditional loop
# -----------------------------
graph = StateGraph(BlogState)

graph.add_node("intake", intake)
graph.add_node("ideas_researcher", ideas_researcher)
graph.add_node("outliner", outliner)
graph.add_node("writer", writer)
graph.add_node("supervisor", supervisor)
graph.add_node("finalizer", finalizer)

graph.set_entry_point("intake")
graph.add_edge("intake", "ideas_researcher")
graph.add_edge("ideas_researcher", "outliner")
graph.add_edge("outliner", "writer")
graph.add_edge("writer", "supervisor")

# conditional edge from supervisor → writer OR finalizer
def route_after_supervisor(state: BlogState) -> str:
    return "writer" if state.get("decision") == "revise" else "finalizer"

graph.add_conditional_edges("supervisor", route_after_supervisor, {"writer": "writer", "finalizer": "finalizer"})
graph.add_edge("finalizer", END)

app = graph.compile()

In [24]:
# -----------------------------
# 6) Run & display final blog
# -----------------------------
initial = BlogState()  # empty: intake will ask the user in real-time
final_state = app.invoke(initial)

display(Markdown("## Final Blog Output"))
display(Markdown(final_state["final"]))


▶ Blog Generator — provide a few details.
Topic: Introduction to RAG evaluation
Audience: 
Tone (casual/friendly/professional/technical): technical
Target word count (e.g., 900): 500
Any extra instructions? 

— SUPERVISOR NOTE —
APPROVED

The draft is well-structured, coherent, and effectively communicates the concept of Retrieval Augmented Generation (RAG) evaluation. Here are some minor suggestions for improvement:

1. **Consider adding a section on limitations**: While the draft highlights the benefits of RAG evaluation, it would be beneficial to discuss potential limitations or challenges associated with implementing RAG. This could include limitations in data quality, scalability, or the need for specialized expertise.
2. **Use more descriptive headings**: While the headings are clear, they could be more descriptive and engaging. For example, instead of "Benefits of RAG Evaluation," consider "Unlocking the Power of RAG: Reduced Hallucinations, Fresher Knowledge, and Improved Trace

## Final Blog Output

**Title:** Unlocking the Power of Retrieval Augmented Generation (RAG) Evaluation
================================================================

**Meta:** Discover the benefits and key components of RAG evaluation, a cutting-edge approach to content generation that combines retrieval and generation.

**Slug:** retrieval-augmented-generation-evaluation

**Tags:** RAG, Retrieval Augmented Generation, Content Generation, Large Language Models, Vector Databases, Hallucinations, Traceability

### Introduction to Retrieval Augmented Generation (RAG) Evaluation

Retrieval Augmented Generation (RAG) is a powerful approach to content generation that combines the strengths of retrieval and generation. By leveraging a vector database and a Large Language Model (LLM), RAG enables the creation of high-quality content while reducing hallucinations and improving traceability. This evaluation framework is essential for developers and content creators who want to harness the full potential of RAG.

### Key Components of RAG Evaluation

#### 1. Pipeline Overview

The RAG pipeline involves several key components:

* **Splitting Data**: Divide input data into manageable chunks for processing. This is crucial for efficient processing and reduces the computational overhead.
* **Generating Embeddings**: Create vector representations of input data. These embeddings serve as a common language for the vector database and the LLM.
* **Creating a Vector Database**: Store embeddings in a database for efficient retrieval. This allows for fast lookup and retrieval of relevant information.
* **Retrieving Information**: Use the vector database to retrieve relevant information. This is where the retrieval component of RAG comes into play.
* **Using a Large Language Model (LLM)**: Generate final content using the retrieved information and LLM. This is where the generation component of RAG comes into play.

#### 2. Benefits of RAG Evaluation

RAG evaluation offers several benefits, including:

* **Reduced Hallucinations**: RAG reduces the likelihood of generated content being incorrect or misleading. This is because RAG relies on the retrieval of existing knowledge, rather than generating content from scratch.
* **Access to Fresher Knowledge**: RAG enables the use of up-to-date information and knowledge. This is because the vector database can be updated in real-time, allowing for the incorporation of new information.
* **Improved Traceability**: RAG provides a clear audit trail of the content generation process. This makes it easier to identify the sources of information and the reasoning behind the generated content.

#### 3. Key Considerations for RAG Evaluation

When implementing RAG evaluation, there are several key considerations to keep in mind:

* **Choosing an Effective Chunking Strategy**: Divide input data into optimal chunks for processing. This can significantly impact the performance and efficiency of the RAG pipeline.
* **Ensuring Prompt Grounding**: Ensure that the LLM is properly grounded in the input data. This is crucial for generating high-quality content that is relevant to the input data.
* **Evaluating the Performance of the System**: Regularly assess the quality and accuracy of generated content. This can help identify areas for improvement and optimize the RAG pipeline.

### Application of RAG Evaluation

RAG evaluation has a wide range of applications, including:

#### 1. Content Strategy

* **Blog Workflow**: Use RAG to generate high-quality blog posts. This can help reduce the workload of content creators and improve the consistency of blog posts.
* **Prompting**: Use RAG to generate effective prompts for LLMs. This can help improve the quality and relevance of generated content.
* **Citations**: Use RAG to generate accurate citations and references. This can help improve the credibility and reliability of generated content.

### Limitations and Future Directions

While RAG evaluation offers numerous benefits, there are several limitations and challenges associated with its implementation. These include:

* **Data Quality**: RAG relies on high-quality data to generate accurate and relevant content. Poor data quality can lead to hallucinations and decreased performance.
* **Scalability**: RAG can be computationally intensive, particularly when dealing with large datasets. This can make it challenging to scale RAG to meet the needs of large organizations.
* **Specialized Expertise**: RAG requires specialized expertise in areas such as natural language processing, vector databases, and LLMs. This can make it challenging to implement RAG without significant investment in training and resources.

### Conclusion
================================================================

In conclusion, RAG evaluation is a powerful approach to content generation that offers numerous benefits, including reduced hallucinations, access to fresher knowledge, and improved traceability. By understanding the key components of RAG evaluation, including the pipeline overview, benefits, and key considerations, content creators and developers can effectively leverage RAG to generate high-quality content.

**TL;DR**: RAG evaluation is a powerful approach to content generation that combines the strengths of retrieval and generation. By leveraging a vector database and a Large Language Model (LLM), RAG enables the creation of high-quality content while reducing hallucinations and improving traceability.

**Target Words: 500**

**Word Count: 517**