In [1]:
from langchain_core.messages import convert_to_messages


def pretty_print_message(message, indent=False):
    pretty_message = message.pretty_repr(html=True)
    if not indent:
        print(pretty_message)
        return

    indented = "\n".join("\t" + c for c in pretty_message.split("\n"))
    print(indented)


def pretty_print_messages(update, last_message=False):
    is_subgraph = False
    if isinstance(update, tuple):
        ns, update = update
        # skip parent graph updates in the printouts
        if len(ns) == 0:
            return

        graph_id = ns[-1].split(":")[0]
        print(f"Update from subgraph {graph_id}:")
        print("\n")
        is_subgraph = True

    for node_name, node_update in update.items():
        update_label = f"Update from node {node_name}:"
        if is_subgraph:
            update_label = "\t" + update_label

        print(update_label)
        print("\n")

        messages = convert_to_messages(node_update["messages"])
        if last_message:
            messages = messages[-1:]

        for m in messages:
            pretty_print_message(m, indent=is_subgraph)
        print("\n")

In [None]:
from langchain.agents import Tool, initialize_agent
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.tools import tool
from langgraph.graph import END, StateGraph
from typing import List, Dict
import requests
from bs4 import BeautifulSoup
import re
from langsmith import traceable
from langsmith.wrappers import wrap_openai

LANGSMITH_TRACING="true"
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_PROJECT="news-agent"


TARGET_AUTHORS = {
    "HLInvest"
}

TARGET_DATE = "25 July 2025"  # Change as needed
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; NewsScraper/1.0)"}
BASE_URL = "https://klse.i3investor.com"
IMAGE_FOLDER = "technical_charts"


# -- 2.1 Headline Extraction Agent --
@tool
def parse_headlines_agent():
    """
    Getting the news headlines from the i3investor blog page.
    """
    url = f"{BASE_URL}/web/headline/blog?type=research"
    resp = requests.get(url, headers=HEADERS, timeout=10)
    resp.raise_for_status()

    html = resp.text
    soup = BeautifulSoup(html, "html.parser")
    container = soup.select_one("#news-blog")
    results = []

    current_date = None
    for el in container.find_all(recursive=False):
        h5 = el.select_one("h5")
        if h5:
            match = re.search(r"\d{1,2} \w+ \d{4}", h5.text)
            if match:
                current_date = match.group(0)
            continue

        if el.name == "ul" and "ms-4" in el.get("class", []):
            li = el.select_one("li")
            if not li:
                continue
            a_tag = li.find("a", href=True)
            subtitle = li.select_one("span.subtitle a")
            if not a_tag or not subtitle:
                continue
            author = subtitle.text.strip()
            if author not in TARGET_AUTHORS:
                continue
            if current_date != TARGET_DATE:
                continue

            full_url = a_tag["href"] if a_tag["href"].startswith("http") else BASE_URL + a_tag["href"]

            results.append(
                f"Title: {a_tag.text.strip()} (Author: {author}, Date: {current_date}, URL: {full_url})"
            )

    paragraph = "Today's headlines:\n" + "\n".join(results) if results else "No headlines found for today."
    return {"paragraph": paragraph}

# -- 2.2 Blog Parser Agent (content structuring) --
@tool
def parse_blog_content_agent(item: Dict) -> Dict:
    """Fetches and parses blog post content from url and extracts clean text and images."""
    url = item["url"]
    resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
    soup = BeautifulSoup(resp.text, "html.parser")
    content_div = soup.select_one("#blogcontent")
    if not content_div:
        item["content"] = ""
        return item
    paragraphs = [tag.get_text(strip=True) for tag in content_div.find_all(["h3", "p", "li"])]
    images = [img["src"] for img in content_div.find_all("img") if img.get("src")]
    item["content"] = "\n".join(paragraphs)
    item["images"] = [src.split("/")[-1] for src in images]  # Save only filenames
    return item



llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0,
    max_tokens=None,
    # reasoning_format="hidden",
    timeout=None,
    max_retries=0,
    # other params...
)

In [None]:
from langgraph.prebuilt import create_react_agent


parse_headlines_agent = create_react_agent(
    model=llm,
    tools=[parse_headlines_agent],
    prompt=(
        "You are a headlines research (including title, author, urls, date) agent.\n\n"
        "INSTRUCTIONS:\n"
        "- Assist ONLY getting the headlines tasks, DO NOT do any deep crawling\n"
        "- After you're done with your tasks, respond to the supervisor directly\n"
        "- Respond ONLY with the results of your work, do NOT include ANY other text."
    ),
    name="parse_headlines_agent",
)


parse_blog_content_agent = create_react_agent(
    model=llm,
    tools=[parse_blog_content_agent],
    prompt=(
        "You are a web-scraping for detailed blog page agent. DO NOT use this if user only request to know the headlines\n\n"
        "INSTRUCTIONS:\n"
        "- Assist ONLY with extracting blog post content from url and extracts clean text and images tasks\n"
        "- After you're done with your tasks, respond to the supervisor directly\n"
        "- Respond ONLY with the results of your work, do NOT include ANY other text."
    ),
    name="parse_blog_content_agent",
)



In [15]:
from typing import Annotated
from langchain_core.tools import tool, InjectedToolCallId
from langgraph.prebuilt import InjectedState
from langgraph.graph import StateGraph, START, MessagesState
from langgraph.types import Command


def create_handoff_tool(*, agent_name: str, description: str | None = None):
    name = f"transfer_to_{agent_name}"
    description = description or f"Ask {agent_name} for help."

    @tool(name, description=description)
    def handoff_tool(
        state: Annotated[MessagesState, InjectedState],
        tool_call_id: Annotated[str, InjectedToolCallId],
    ) -> Command:
        tool_message = {
            "role": "tool",
            "content": f"Successfully transferred to {agent_name}",
            "name": name,
            "tool_call_id": tool_call_id,
        }
        return Command(
            goto=agent_name,  
            update={**state, "messages": state["messages"] + [tool_message]},  
            graph=Command.PARENT,  
        )

    return handoff_tool


# Handoffs
assign_to_parse_headlines_agent = create_handoff_tool(
    agent_name="parse_headlines_agent",
    description="Assign task to a parse_headlines agent.",
)

assign_to_parse_blog_content_agent = create_handoff_tool(
    agent_name="parse_blog_content_agent",
    description="Assign task to a parse_blog_content agent.",
)


supervisor_agent = create_react_agent(
    model=llm,
    tools=[assign_to_parse_headlines_agent, assign_to_parse_blog_content_agent],
    prompt=(
        "You are a supervisor managing two agents:\n"
        "- a news headlines agent. Assign headlines research (including title, author, urls, date) tasks to this agent\n"
        "- a blog content agent. Assign extracting detailed blog content tasks to this agent\n"
        "Assign work to one agent at a time, do not call agents in parallel.\n"
        "After all agents have completed their tasks, answer the user's question using the collected outputs."
    ),
    name="supervisor",
)

from langgraph.graph import END

# Define the multi-agent supervisor graph
supervisor = (
    StateGraph(MessagesState)
    # NOTE: `destinations` is only needed for visualization and doesn't affect runtime behavior
    .add_node(supervisor_agent, destinations=("parse_headlines_agent", "parse_blog_content_agent", END))
    .add_node(parse_headlines_agent)
    .add_node(parse_blog_content_agent)
    .add_edge(START, "supervisor")
    # always return back to the supervisor
    .add_edge("parse_headlines_agent", "supervisor")
    .add_edge("parse_blog_content_agent", "supervisor")
    .compile()
)

# from IPython.display import display, Image

# display(Image(supervisor.get_graph().draw_mermaid_png()))

for chunk in supervisor.stream(
    {
        "messages": [
            {
                "role": "user",
                "content": "What is the news headlines for 25 July 2025?",
            }
        ]
    },
):
    pretty_print_messages(chunk, last_message=True)

final_message_history = chunk["supervisor"]["messages"]

Update from node supervisor:


Name: transfer_to_parse_headlines_agent

Successfully transferred to parse_headlines_agent


Update from node parse_headlines_agent:


Name: parse_headlines_agent

{"headlines": [{"title": "Traders Brief - HLIB Retail Research –25 July", "author": "HLInvest", "date": "25 July 2025", "url": "https://klse.i3investor.com/web/blog/detail/hleresearch/2025-07-25-story-h499657040-Traders_Brief_HLIB_Retail_Research_ndash_25_July"}]}


Update from node supervisor:


Name: supervisor

The news headlines for 25 July 2025 is "Traders Brief - HLIB Retail Research –25 July" by HLInvest.


