In [1]:
# Install dependencies
import subprocess
import sys

try:
    subprocess.run(
        ["uv", "pip", "install", "-r", "requirements.txt"],
        capture_output=True,
        text=True,
        check=True
    )
    print("Dependencies installed successfully")
except (subprocess.CalledProcessError, FileNotFoundError):
    # Fallback to pip if uv is not available
    subprocess.run(
        [sys.executable, "-m", "pip", "install", "-r", "requirements.txt"],
        check=True
    )
    print("Dependencies installed successfully")

Dependencies installed successfully


# Real-Time Research Agent Demo

This notebook demonstrates a LangChain v1 agent powered by vLLM and Firecrawl MCP tools for live web research.

In [2]:
# Environment variables
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# RunPod Pod endpoint (get from: https://www.runpod.io/console/pods)
# Deploy a pod with vLLM and Hermes-2-Pro-Mistral-7B model
RUNPOD_ENDPOINT_URL = os.getenv("RUNPOD_ENDPOINT_URL", "YOUR_RUNPOD_ENDPOINT_HERE")
RUNPOD_API_KEY = os.getenv("RUNPOD_API_KEY", "YOUR_RUNPOD_API_KEY_HERE")

# Firecrawl API (get from: https://www.firecrawl.dev/)
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", "YOUR_FIRECRAWL_API_KEY_HERE")

print(f"Firecrawl API Key: {'Set' if FIRECRAWL_API_KEY != 'YOUR_FIRECRAWL_API_KEY_HERE' else 'Not set'}")
print(f"RunPod Endpoint: {'Set' if RUNPOD_ENDPOINT_URL != 'YOUR_RUNPOD_ENDPOINT_HERE' else 'Not set'}")

Firecrawl API Key: Set
RunPod Endpoint: Set


In [3]:
# Import dependencies
from langchain_openai import ChatOpenAI
from langchain.agents import create_agent
from langchain.tools import tool
import json
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Connect to Firecrawl MCP Server using LangChain MCP Adapters
from langchain_mcp_adapters.client import MultiServerMCPClient

# Configure Firecrawl MCP server
mcp_client = MultiServerMCPClient({
    "firecrawl": {
        "transport": "stdio",
        "command": "npx",
        "args": ["-y", "firecrawl-mcp"]
    }
})

# Get tools from MCP server
# Note: Jupyter notebooks have a running event loop, so we use await directly
firecrawl_tools = await mcp_client.get_tools()

print(f"Connected to Firecrawl MCP server")
print(f"Available tools: {[tool.name for tool in firecrawl_tools]}")

Connected to Firecrawl MCP server
Available tools: ['firecrawl_scrape', 'firecrawl_map', 'firecrawl_search', 'firecrawl_crawl', 'firecrawl_check_crawl_status', 'firecrawl_extract']


In [5]:

from langchain_openai import ChatOpenAI
from pydantic import SecretStr

llm = ChatOpenAI(
    api_key=SecretStr(RUNPOD_API_KEY),
    base_url=RUNPOD_ENDPOINT_URL,
    model="NousResearch/Hermes-2-Pro-Mistral-7B",
    max_completion_tokens=1024,  
    temperature=0.7
)

print("Connected to RunPod pod")

try:
    response = llm.invoke("Say 'Hello from vLLM!' in one sentence.")
    print(f"Test response: {response.content}")
except Exception as e:
    print(f"Connection test failed: {e}")
    print("The model may still be loading. Wait a few minutes and try again.")

Connected to RunPod pod
Test response: Hello from vLLM! I'm ready to assist you with your tasks and provide you with valuable information.


In [12]:
system_prompt = """
You are a research assistant. Use firecrawl_search to find information and firecrawl_scrape for details. Be concise.
## CRITICAL TOOL USAGE RULES for firecrawl_search:
- NEVER use the 'sources' parameter - it will cause an error
- ONLY use 'query' (required) and 'limit' (optional) parameters
- The sources parameter is broken and must be avoided completely
- Correct example: {"query": "machine learning engineer skills 2024", "limit": 5}
- Wrong example: {"query": "...", "sources": ["web"]} ← THIS WILL FAIL
"""

# Create agent - pass only essential tools
agent = create_agent(
    model=llm,
    tools=firecrawl_tools,
    system_prompt=system_prompt
)

print("Agent created successfully!")
print(f"Tools available: {[tool.name for tool in firecrawl_tools]}")

# Quick test (MCP tools require async)
test_response = await agent.ainvoke({
    "messages": [{"role": "user", "content": "What can you help me research?"}]
})

print("\nAgent test response:")
print(test_response["messages"][-1].content)

Agent created successfully!
Tools available: ['firecrawl_scrape', 'firecrawl_map', 'firecrawl_search', 'firecrawl_crawl', 'firecrawl_check_crawl_status', 'firecrawl_extract']

Agent test response:
I can assist you in researching various topics related to the web. Please provide a specific query or topic you would like to search for.


In [13]:
response = await agent.ainvoke({
    "messages": [
        {"role": "user", "content": "Give me a quick snapshot of notable AI governance headlines from the past week in India. Highlight why they matter."}
    ]
})

print("Agent Response:")
print(response["messages"][-1].content)

Agent Response:
Here's a snapshot of notable AI governance headlines from the past week in India, along with their significance:

1. **"India's AI Governance Guidelines: Their pro-innovation approach ...**" [Livemint]
   - The article discusses India's AI guidelines that balance technological advancement with societal safeguards, grounded in pro-innovation principles.

2. **"3 in 4 developers in India are learning AI skills on their own: report" [CxOtoday]
   - IBM's findings reveal that many organizations are bypassing AI governance for rapid AI adoption. The article also mentions recent tech layoffs impacting thousands of professionals.

3. **"How is AI going to be regulated in India? | Explained - The Hindu" [The Hindu]
   - The article explains the government's India AI Governance Guidelines, the necessity of guidelines, and the key thrust behind them.

4. **"Governance Guidelines for AI - Shankar IAS Parliament" [Shankar IAS Parliament]
   - The article reports on the Ministry of 