## Expected Outputs

- **Cell 1-3**: Setup confirmation messages
- **Cell 4**: Basic research answer with sources
- **Cell 5**: Academic paper results + comprehensive analysis  
- **Cell 6**: Multi-source research with web + academic sources
- **Cell 7**: Caching demonstration with similarity scores
- **Cell 8**: Task breakdown visualization showing MCP workflow
- **Cell 9**: Document processing analysis results
- **Cell 10**: Summary statistics and feature checklist

This notebook demonstrates all core assignment requirements plus bonus features like source citations, reasoning steps, and multi-source orchestration.

### Setup

In [21]:
import sys
import os
from pathlib import Path
import json
from datetime import datetime

# Add project root to path for imports
# Navigate up from notebooks/ to project root
project_root = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
print(f"📁 Detected project root: {project_root}")

# Add both project root and src directory to Python path
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
src_path = project_root / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

print(f"🔧 Added to Python path:")
print(f"   • {project_root}")
print(f"   • {src_path}")

# Import core components
try:
    from models.model_builder import ModelBuilder
    from orchestration.mcp_simulator import MCPSimulator
    from orchestration.search_coordinator import SearchCoordinator
    from tools.web_search import WebSearchTool
    from tools.arxiv_search import ArxivSearchTool
    from tools.vector_db import VectorDBTool
    from utils.document_processor import DocumentProcessor

    print("✅ All imports successful using src-relative imports!")
except ImportError as e:
    print(f"❌ Import error with src-relative imports: {e}")
    # Fallback to absolute imports if running from project root
    try:
        from src.models.model_builder import ModelBuilder
        from src.orchestration.mcp_simulator import MCPSimulator
        from src.orchestration.search_coordinator import SearchCoordinator
        from src.tools.web_search import WebSearchTool
        from src.tools.arxiv_search import ArxivSearchTool
        from src.tools.vector_db import VectorDBTool
        from src.utils.document_processor import DocumentProcessor

        print("✅ All imports successful using absolute src imports!")
    except ImportError as e2:
        print(f"❌ Import error with absolute imports: {e2}")
        print(
            "🔍 Please ensure you're running from the project root or notebooks directory"
        )
        print(f"   Current working directory: {Path.cwd()}")
        print(f"   Project structure expected:")
        print(f"     project_root/")
        print(f"       ├── src/")
        print(f"       │   ├── models/")
        print(f"       │   ├── orchestration/")
        print(f"       │   └── tools/")
        print(f"       └── notebooks/")
        raise

print(f"📁 Working directory: {project_root}")

📁 Detected project root: /home/vlofgren/Documents/Projects/research-assistant-mcp
🔧 Added to Python path:
   • /home/vlofgren/Documents/Projects/research-assistant-mcp
   • /home/vlofgren/Documents/Projects/research-assistant-mcp/src
✅ All imports successful using src-relative imports!
📁 Working directory: /home/vlofgren/Documents/Projects/research-assistant-mcp


### Setup 2: Environment Setup

In [22]:
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Check API keys
openai_key = os.getenv("OPENAI_API_KEY")
tavily_key = os.getenv("TAVILY_API_KEY")

print("🔑 API Key Status:")
print(f"   OpenAI: {'✅ Set' if openai_key else '❌ Missing'}")
print(
    f"   Tavily: {'✅ Set' if tavily_key else '❌ Missing (web search will use fallback)'}"
)

if not openai_key:
    print("\n⚠️  To get full functionality, set OPENAI_API_KEY in your .env file")

🔑 API Key Status:
   OpenAI: ✅ Set
   Tavily: ✅ Set


### Initialize Research Assistant Components

In [23]:
def create_demo_research_assistant():
    """Create and configure the research assistant for demo."""

    print("🔧 Initializing Research Assistant Components...")

    # 1. Create model builder
    model = (
        ModelBuilder()
        .with_provider("openai")
        .with_model("gpt-4o-mini")
        .with_temperature(0.7)
        .with_max_tokens(1000)
        .with_system_prompt(
            """You are an expert research assistant using MCP workflow.
            Provide clear, comprehensive responses with proper citations and structured analysis."""
        )
        .build()
    )
    print("   ✅ Model Builder configured")

    # 2. Initialize tools
    web_search_tool = WebSearchTool(api_key=tavily_key)
    arxiv_tool = ArxivSearchTool()
    document_processor = DocumentProcessor()
    print("   ✅ Search tools initialized")

    # 3. Vector database for caching
    vector_db = VectorDBTool(persist_directory="data/vector_db")
    print("   ✅ Vector database connected")

    # 4. MCP simulator (orchestrator)
    mcp_simulator = MCPSimulator(model, vector_db=vector_db)
    print("   ✅ MCP Simulator ready")

    # 5. Search coordinator
    search_coordinator = SearchCoordinator(
        web_api_key=tavily_key, openai_api_key=openai_key
    )
    print("   ✅ Search Coordinator initialized")

    return {
        "model": model,
        "mcp_simulator": mcp_simulator,
        "web_search": web_search_tool,
        "arxiv_search": arxiv_tool,
        "document_processor": document_processor,
        "vector_db": vector_db,
        "search_coordinator": search_coordinator,
    }


# Initialize the assistant
assistant = create_demo_research_assistant()
print("\n🚀 Research Assistant is ready for demo!")

🔧 Initializing Research Assistant Components...


INFO:models.model_builder:Model built successfully with provider: openai
INFO:tools.web_search:Tavily client initialized successfully


   ✅ Model Builder configured


INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:tools.web_search:Insight extraction model loaded.
INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:tools.arxiv_search:ArXiv insight extraction model loaded.


INFO:tools.vector_db:VectorDBTool initialised (provider=openai, collection='research_assistant', dir='data/vector_db')
INFO:src.tools.web_search:Tavily client initialized successfully


   ✅ Search tools initialized
   ✅ Vector database connected


INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:src.tools.web_search:Insight extraction model loaded.
INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:src.tools.arxiv_search:ArXiv insight extraction model loaded.
INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:src.tools.intelligent_search_planner:Model builder initialized for search planning
INFO:orchestration.mcp_simulator:Search coordinator initialized successfully
INFO:src.tools.web_search:Tavily client initialized successfully
INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:src.tools.web_search:Insight extraction model loaded.
INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:src.tools.arxiv_search:ArXiv insight extraction model loaded.
INFO:src.models.model_builder:Model built successfully with provider: openai
INFO:src.tools.intelligent_search_planner:Model builder 

   ✅ MCP Simulator ready
   ✅ Search Coordinator initialized

🚀 Research Assistant is ready for demo!


### Demo 1 - Simple Research Question

In [24]:
question1 = "What are the latest developments in quantum computing for 2024?"

print(f"🔬 Research Question: {question1}")
print("=" * 60)

# Run research using MCP simulator
session_id = assistant["mcp_simulator"].create_session(question1)
result1 = assistant["mcp_simulator"].run_research(question1)

print("\n📋 Final Answer:")
print(result1.get("answer", "No answer generated"))

print(f"\n📊 Research Statistics:")
print(f"   • Session ID: {session_id}")
print(f"   • Sources found: {len(result1.get('sources', []))}")
print(f"   • Reasoning steps: {len(result1.get('reasoning_steps', []))}")

INFO:orchestration.mcp_simulator:Created research session: session_20250730_084113_4050
INFO:orchestration.mcp_simulator:Created research session: session_20250730_084113_4050


🔬 Research Question: What are the latest developments in quantum computing for 2024?


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:tools.task_planner:Planning tasks for question: What are the latest developments in quantum computing for 2024?
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:tools.task_planner:4 tasks planned
INFO:orchestration.mcp_simulator:Planned 4 tasks for question using TaskPlannerTool
INFO:orchestration.mcp_simulator:Executing task task_1: Search for recent articles, papers, and news about quantum computing developments in 2024.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.search_coordinator:Intelligent analysis: Domain=technology, ArXiv suitable=True, Confidence=0.9
INFO:src.tools.web_search:Search completed: 15 results found
INFO:orchestration.search_coordinator:Performing ArXiv search - validated by intelligent analysis
INFO:src.tools.arxiv_search:ArXiv search completed: 10 results found
IN


📋 Final Answer:
# Comprehensive Report on the Latest Developments in Quantum Computing for 2024

## Introduction
Quantum computing is rapidly evolving, with significant advancements occurring in hardware capabilities, error correction methods, and diverse applications across various fields. This report compiles the latest developments in quantum computing for 2024, focusing on breakthroughs in technology, research trends, and emerging applications.

## 1. Hardware Progress and Error Correction

### Key Innovations
1. **Google's Willow Quantum Chip**: Announced in December 2024, this chip demonstrates exponential improvements in error correction as qubit counts increase, marking a significant step forward in quantum technology scalability and reliability [A Quantum Leap Forward](https://cacm.acm.org/news/a-quantum-leap-forward/) - Web.
   
2. **IBM's Heron Quantum Processor**: Featuring 156 qubits, the Heron processor has achieved a remarkable 99.9% fidelity for two-qubit gates through

### Demo 2 - Academic Research with ArXiv

In [25]:
question2 = "What are the most significant machine learning breakthroughs in natural language processing from recent academic papers?"

print(f"🎓 Academic Research Question: {question2}")
print("=" * 80)

# First, let's search ArXiv directly to show the tool
print("🔍 Searching ArXiv for recent papers...")
arxiv_results = assistant["arxiv_search"].search_recent_papers(
    "machine learning natural language processing breakthrough",
    days_back=60,
)

print(f"\n📚 Found {len(arxiv_results)} recent papers:")
for i, paper in enumerate(arxiv_results[:3], 1):
    print(f"   {i}. {paper.title[:80]}...")
    print(
        f"      Authors: {', '.join(paper.authors[:2])}{'...' if len(paper.authors) > 2 else ''}"
    )
    print(f"      Published: {paper.published_date}")
    print()

# Now run full research
print("🧠 Running comprehensive research with MCP workflow...")
result2 = assistant["mcp_simulator"].run_research(question2)

print("\n📋 Comprehensive Research Result:")
print(result2.get("answer", "No answer generated"))

🎓 Academic Research Question: What are the most significant machine learning breakthroughs in natural language processing from recent academic papers?
🔍 Searching ArXiv for recent papers...


INFO:tools.arxiv_search:ArXiv search completed: 10 results found
INFO:tools.arxiv_search:Downloading PDF for: MetaCLIP 2: A Worldwide Scaling Recipe
INFO:tools.arxiv_search:Successfully extracted 61158 characters from PDF
INFO:tools.arxiv_search:Downloading PDF for: StepAL: Step-aware Active Learning for Cataract Surgical Videos
INFO:tools.arxiv_search:Successfully extracted 28269 characters from PDF
INFO:tools.arxiv_search:Downloading PDF for: X-Omni: Reinforcement Learning Makes Discrete Autoregressive Image   Generative Models Great Again
INFO:tools.arxiv_search:Successfully extracted 62746 characters from PDF
INFO:tools.arxiv_search:Downloading PDF for: MetaLab: Few-Shot Game Changer for Image Recognition
INFO:tools.arxiv_search:Successfully extracted 48402 characters from PDF
INFO:tools.arxiv_search:Downloading PDF for: Pitfalls when tackling the exponential concentration of parameterized   quantum models
INFO:tools.arxiv_search:Successfully extracted 98459 characters from PDF
INF


📚 Found 10 recent papers:
   1. MetaCLIP 2: A Worldwide Scaling Recipe...
      Authors: Yung-Sung Chuang, Yang Li...
      Published: 2025-07-29T17:59:58Z

   2. StepAL: Step-aware Active Learning for Cataract Surgical Videos...
      Authors: Nisarg A. Shah, Bardia Safaei...
      Published: 2025-07-29T17:59:14Z

   3. X-Omni: Reinforcement Learning Makes Discrete Autoregressive Image   Generative ...
      Authors: Zigang Geng, Yibing Wang...
      Published: 2025-07-29T17:59:04Z

🧠 Running comprehensive research with MCP workflow...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:tools.task_planner:Planning tasks for question: What are the most significant machine learning breakthroughs in natural language processing from recent academic papers?
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:tools.task_planner:4 tasks planned
INFO:orchestration.mcp_simulator:Planned 4 tasks for question using TaskPlannerTool
INFO:orchestration.mcp_simulator:Executing task task_1: Identify recent academic papers on machine learning breakthroughs in natural language processing (NLP).
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.search_coordinator:Intelligent analysis: Domain=technology, ArXiv suitable=True, Confidence=0.9
INFO:src.tools.web_search:Search completed: 15 results found
INFO:orchestration.search_coordinator:Performing ArXiv search - validated by intelligent analysis
I


📋 Comprehensive Research Result:
# Comprehensive Report on Recent Breakthroughs in Machine Learning for Natural Language Processing (NLP)

## Introduction

Natural Language Processing (NLP) has seen remarkable advancements in recent years, driven largely by breakthroughs in machine learning techniques. This report synthesizes findings from various academic papers to highlight the most significant developments in the field, focusing on the integration of deep learning, the transition from rule-based systems to more sophisticated models, and the application of machine learning across diverse domains.

## Key Breakthroughs in NLP

### 1. Evolution from Rule-Based to Deep Learning Approaches

A significant trend in NLP is the transition from traditional rule-based systems to deep learning methods. This evolution has allowed for more nuanced language understanding and generation capabilities. Recent studies highlight the effectiveness of deep learning models, such as Long Short-Term Memory

 ### Demo 3 - Complex Multi-Source Research

In [26]:
question3 = "How effective are global health programs for malaria prevention in sub-Saharan Africa, and what do recent studies show about cost-effectiveness?"

print(f"🌍 Complex Research Question: {question3}")
print("=" * 90)

# Use search coordinator for multi-source approach
print("🔄 Using Search Coordinator for multi-source research...")

# Create intelligent search plan
search_plans = assistant["search_coordinator"].plan_searches(question3, focus="")
print(f"\n📋 Search Plan Created:")
print(f"   • Search strategies: {len(search_plans)}")

# Execute searches (using the first plan as an example)
search_results = (
    assistant["search_coordinator"].execute_search_plan(search_plans[0])
    if search_plans
    else None
)
if search_results:
    print(f"   • Web results: {len(search_results.web_results)}")
    print(f"   • Academic results: {len(search_results.arxiv_results)}")
else:
    print("   • No search results generated")

# Run full MCP research
result3 = assistant["mcp_simulator"].run_research(question3)

print("\n📋 Multi-Source Research Result:")
print(result3.get("answer", "No answer generated"))

# Show sources used
if result3.get("sources"):
    print(f"\n📖 Sources Used ({len(result3['sources'])}):")
    for i, source in enumerate(result3["sources"][:5], 1):
        source_type = source.get("type", "unknown")
        title = source.get("title", "Untitled")[:60]
        print(f"   {i}. [{source_type.upper()}] {title}...")

🌍 Complex Research Question: How effective are global health programs for malaria prevention in sub-Saharan Africa, and what do recent studies show about cost-effectiveness?
🔄 Using Search Coordinator for multi-source research...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.search_coordinator:Created 2 intelligent search plans for domain: SearchDomain.MEDICINE



📋 Search Plan Created:
   • Search strategies: 2


INFO:src.tools.web_search:Search completed: 10 results found
INFO:orchestration.search_coordinator:Search executed: 10 web + 0 arxiv results
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ap

   • Web results: 10
   • Academic results: 0


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:tools.task_planner:Planning tasks for question: How effective are global health programs for malaria prevention in sub-Saharan Africa, and what do recent studies show about cost-effectiveness?
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:tools.task_planner:7 tasks planned
INFO:orchestration.mcp_simulator:Planned 7 tasks for question using TaskPlannerTool
INFO:orchestration.mcp_simulator:Executing task task_1: Search for recent studies and reviews on the effectiveness of global health programs for malaria prevention in sub-Saharan Africa.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.search_coordinator:Intelligent analysis: Domain=medicine, ArXiv suitable=False, Confidence=0.9
INFO:src.tools.web_search:Search completed: 15 results found
INFO:orchestration.search_coordinator:Skipping Ar


📋 Multi-Source Research Result:
# Comprehensive Report on the Effectiveness and Cost-Effectiveness of Global Health Programs for Malaria Prevention in Sub-Saharan Africa

## Research Question
**How effective are global health programs for malaria prevention in sub-Saharan Africa, and what do recent studies show about cost-effectiveness?**

## Introduction
Malaria remains a significant public health challenge in sub-Saharan Africa, with complex dynamics involving vector control, treatment, and prevention strategies. This report synthesizes recent research findings on the effectiveness and cost-effectiveness of global health programs aimed at malaria prevention in the region.

## Effectiveness of Malaria Prevention Programs

### Progress in Malaria Control
From 2000 to 2022, substantial progress was made in reducing malaria incidence by 40% and mortality rates by 60% in the WHO African Region, indicating the effectiveness of ongoing malaria control efforts ([Current Status of Malaria Co

### Demo 4 - Vector Database Caching

In [27]:
print("💾 Testing Vector Database Caching...")

# Ask a question for the first time
cache_question = "What is the current state of renewable energy adoption globally?"

print(f"🔍 First-time question: {cache_question}")

# Check if cached
cached_results = assistant["vector_db"].similarity_search(cache_question, k=3)
print(f"📊 Cache check: {len(cached_results)} similar entries found")

if cached_results:
    for i, result in enumerate(cached_results, 1):
        similarity = (1 - result.get("distance", 1)) * 100
        title = result.get("metadata", {}).get("title", "Cached Result")
        print(f"   {i}. Similarity: {similarity:.1f}% - {title[:50]}...")

# Run research and cache the result
print("\n🔬 Running fresh research...")
result4 = assistant["mcp_simulator"].run_research(cache_question)

# Manually cache this Q&A for demonstration
assistant["vector_db"].add_texts(
    texts=[cache_question],
    metadatas=[
        {
            "title": f"Q&A: {cache_question[:50]}...",
            "question": cache_question,
            "answer": result4.get("answer", ""),
            "timestamp": datetime.now().isoformat(),
            "sources_count": len(result4.get("sources", [])),
        }
    ],
)

print("✅ Result cached for future queries")
print(f"📋 Answer: {result4.get('answer', 'No answer generated')[:200]}...")

💾 Testing Vector Database Caching...
🔍 First-time question: What is the current state of renewable energy adoption globally?


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:orchestration.mcp_simulator:Created research session: session_20250730_085326_7116


📊 Cache check: 0 similar entries found

🔬 Running fresh research...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:tools.task_planner:Planning tasks for question: What is the current state of renewable energy adoption globally?
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:tools.task_planner:5 tasks planned
INFO:orchestration.mcp_simulator:Planned 5 tasks for question using TaskPlannerTool
INFO:orchestration.mcp_simulator:Executing task task_1: Gather recent reports and articles on global renewable energy adoption to understand the current state.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.search_coordinator:Intelligent analysis: Domain=science, ArXiv suitable=False, Confidence=0.9
INFO:src.tools.web_search:Search completed: 15 results found
INFO:orchestration.search_coordinator:Skipping ArXiv search - not suitable for domain: science
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/c

✅ Result cached for future queries
📋 Answer: # Current State of Renewable Energy Adoption Globally

## Introduction
The global landscape of renewable energy adoption is rapidly evolving, with countries setting ambitious targets and implementing ...


### Demo 5 - Task Breakdown Visualization

In [28]:
breakdown_question = "Compare the environmental impact of electric vehicles versus traditional gasoline cars"

print(f"🧩 Demonstrating Task Breakdown for: {breakdown_question}")
print("=" * 80)

# Create a session to see task planning
session_id = assistant["mcp_simulator"].create_session(breakdown_question)
session = assistant["mcp_simulator"].sessions[session_id]

# Generate task plan
tasks = assistant["mcp_simulator"].high_level_plan(breakdown_question)
session.tasks = tasks

print(f"📋 MCP Simulator broke down the question into {len(tasks)} tasks:")
print()

for i, task in enumerate(tasks, 1):
    print(f"Task {i}: {task.task_type.value.upper()}")
    print(f"   Description: {task.description}")
    print(f"   Status: {task.status.value}")
    print()

print("⚡ Executing tasks step by step:")
print()

for i, task in enumerate(tasks, 1):
    print(f"🔄 Executing Task {i}: {task.task_type.value.upper()}")

    # Execute the task
    task_result = assistant["mcp_simulator"].plan_task(task, session)

    print(f"   ✅ Completed: {task.description}")
    if isinstance(task_result, dict) and "summary" in task_result:
        print(f"   📝 Summary: {task_result['summary'][:100]}...")
    print()

print("🎯 All tasks completed! Final answer available in session.")

INFO:orchestration.mcp_simulator:Created research session: session_20250730_085517_4884
INFO:tools.task_planner:Planning tasks for question: Compare the environmental impact of electric vehicles versus traditional gasoline cars


🧩 Demonstrating Task Breakdown for: Compare the environmental impact of electric vehicles versus traditional gasoline cars


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:tools.task_planner:10 tasks planned
INFO:orchestration.mcp_simulator:Planned 10 tasks for question using TaskPlannerTool
INFO:orchestration.mcp_simulator:Executing task task_1: Gather general information on the environmental impact of electric vehicles (EVs).


📋 MCP Simulator broke down the question into 10 tasks:

Task 1: SEARCH
   Description: Gather general information on the environmental impact of electric vehicles (EVs).
   Status: pending

Task 2: SEARCH
   Description: Gather general information on the environmental impact of traditional gasoline cars.
   Status: pending

Task 3: EXTRACT
   Description: Extract data on greenhouse gas emissions from manufacturing, operation, and disposal of EVs.
   Status: pending

Task 4: EXTRACT
   Description: Extract data on greenhouse gas emissions from manufacturing, operation, and disposal of gasoline cars.
   Status: pending

Task 5: EXTRACT
   Description: Extract data on resource consumption (e.g., minerals, water) for EVs and gasoline cars.
   Status: pending

Task 6: EXTRACT
   Description: Extract data on other environmental impacts such as air pollution, noise, and land use for both vehicle types.
   Status: pending

Task 7: SUMMARIZE
   Description: Summarize the environmental impacts o

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.search_coordinator:Intelligent analysis: Domain=science, ArXiv suitable=False, Confidence=0.9
INFO:src.tools.web_search:Search completed: 15 results found
INFO:orchestration.search_coordinator:Skipping ArXiv search - not suitable for domain: science
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INF

   ✅ Completed: Gather general information on the environmental impact of electric vehicles (EVs).

🔄 Executing Task 2: SEARCH


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.search_coordinator:Intelligent analysis: Domain=science, ArXiv suitable=False, Confidence=0.9
INFO:src.tools.web_search:Search completed: 15 results found
INFO:orchestration.search_coordinator:Skipping ArXiv search - not suitable for domain: science
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INF

   ✅ Completed: Gather general information on the environmental impact of traditional gasoline cars.

🔄 Executing Task 3: EXTRACT
   ✅ Completed: Extract data on greenhouse gas emissions from manufacturing, operation, and disposal of EVs.

🔄 Executing Task 4: EXTRACT
   ✅ Completed: Extract data on greenhouse gas emissions from manufacturing, operation, and disposal of gasoline cars.

🔄 Executing Task 5: EXTRACT
   ✅ Completed: Extract data on resource consumption (e.g., minerals, water) for EVs and gasoline cars.

🔄 Executing Task 6: EXTRACT
   ✅ Completed: Extract data on other environmental impacts such as air pollution, noise, and land use for both vehicle types.

🔄 Executing Task 7: SUMMARIZE
   ✅ Completed: Summarize the environmental impacts of electric vehicles based on collected data.

🔄 Executing Task 8: SUMMARIZE
   ✅ Completed: Summarize the environmental impacts of gasoline cars based on collected data.

🔄 Executing Task 9: SYNTHESIZE


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.mcp_simulator:Task task_9 completed successfully
INFO:orchestration.mcp_simulator:Executing task task_10: Prepare a comprehensive report comparing the environmental impacts of electric vehicles and gasoline cars.


   ✅ Completed: Compare the summarized environmental impacts of EVs and gasoline cars to identify key differences and trade-offs.

🔄 Executing Task 10: REPORT


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:orchestration.mcp_simulator:Task task_10 completed successfully


   ✅ Completed: Prepare a comprehensive report comparing the environmental impacts of electric vehicles and gasoline cars.

🎯 All tasks completed! Final answer available in session.


###  System Performance and Wrap-up

In [29]:
print("📊 Research Assistant Demo Summary")
print("=" * 50)

# Check vector database stats
try:
    collection = assistant["vector_db"].collection
    total_docs = collection.count()
    print(f"💾 Vector Database: {total_docs} documents cached")
except:
    print(f"💾 Vector Database: Ready for use")

# Check session history
simulator = assistant["mcp_simulator"]
total_sessions = len(simulator.sessions)
print(f"📝 Research Sessions: {total_sessions} sessions created")

if simulator.sessions:
    latest_session_id = list(simulator.sessions.keys())[-1]
    latest_session = simulator.sessions[latest_session_id]
    print(f"🔬 Latest Session: {latest_session.original_question[:50]}...")
    print(
        f"   • Tasks completed: {len([t for t in latest_session.tasks if t.status.value == 'completed'])}"
    )
    print(f"   • Sources gathered: {len(latest_session.sources)}")
    print(f"   • Reasoning steps: {len(latest_session.reasoning_steps)}")

print("\n🎯 Key Features Demonstrated:")
features = [
    "✅ Multi-step task decomposition (MCP workflow)",
    "✅ Web search integration (Tavily API)",
    "✅ Academic paper search (ArXiv API)",
    "✅ Vector database caching",
    "✅ Document processing and analysis",
    "✅ Multi-source information synthesis",
    "✅ Source citation and traceability",
    "✅ Intelligent search planning",
]

for feature in features:
    print(f"   {feature}")

print(f"\n🚀 Demo completed successfully!")
print(
    f"💡 Try running: `assistant['mcp_simulator'].run_research('your question here')`"
)
print(f"🌐 Or launch the Gradio interface with: `python src/main.py`")

📊 Research Assistant Demo Summary
💾 Vector Database: Ready for use
📝 Research Sessions: 5 sessions created
🔬 Latest Session: Compare the environmental impact of electric vehic...
   • Tasks completed: 10
   • Sources gathered: 30
   • Reasoning steps: 0

🎯 Key Features Demonstrated:
   ✅ Multi-step task decomposition (MCP workflow)
   ✅ Web search integration (Tavily API)
   ✅ Academic paper search (ArXiv API)
   ✅ Vector database caching
   ✅ Document processing and analysis
   ✅ Multi-source information synthesis
   ✅ Source citation and traceability
   ✅ Intelligent search planning

🚀 Demo completed successfully!
💡 Try running: `assistant['mcp_simulator'].run_research('your question here')`
🌐 Or launch the Gradio interface with: `python src/main.py`
