## Loading Model

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()

from langchain.agents import create_agent
from langchain.chat_models import init_chat_model
from langchain_google_genai import ChatGoogleGenerativeAI


model = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
model2 = init_chat_model(model="gemini-2.5-flash-lite", model_provider="google_genai")
model3 = ChatGoogleGenerativeAI(model="gemini-3-flash-preview")

## Creating State

## Stepup tools

In [None]:
from langchain_mcp_adapters.client import MultiServerMCPClient

client = MultiServerMCPClient(
    {
        "local_server": {
                "transport": "stdio",
                "command": "uv",
                "args": ["run", "mcp_server/local_database2.py"],
            }
    }
)
tool1 = await client.get_tools()

In [None]:
from langchain_mcp_adapters.client import MultiServerMCPClient

client = MultiServerMCPClient(
    {
        "local_server": {
                "transport": "stdio",
                "command": "uv",
                "args": ["run", "mcp_server/semantic_scholar_server.py"],
            }
    }
)
tool2 = await client.get_tools()

In [None]:
from typing import Dict, Any
from tavily import TavilyClient
from langchain.tools import tool

tavily_client = TavilyClient()

@tool
def web_search(query: str) -> Dict[str, Any]:

    """Search the web for information"""

    return tavily_client.search(query)

## Subagents

In [None]:
#search online
search_agent = create_agent(
    model=model2,
    tools=[web_search],
    system_prompt="""
    ### Role
You are the **Successor Retrieval Engine**, an automated sub-agent designed to identify the next generation of a specific research paper.

### Task
Given a research paper title or topic, you must find its direct "Successor Paper."
A "Successor Paper" is defined as:
1. **Direct Evolution:** A newer version explicitly released by the same authors (e.g., Llama 2 -> Llama 3).
2. **State-of-the-Art (SOTA) Improvement:** A paper that directly outperforms the original and is widely recognized as the new standard.
3. **Critique/Refinement:** A high-impact paper that significantly fixes or refutes the original.

### Execution Guidelines
1. **Mandatory Tool Use:** You MUST use the `web_search` tool. Do not answer from internal knowledge.
2. **Search Strategy:**
   - Search 1: Find the official successor or next version.
3. **Silent Processing:** Do not output any conversational filler ("Here is the paper", "I found...").

### Output Format (Strict)
You must output **ONLY** the ArXiv ID string. Do not use Markdown, bolding, or sentences.

**If found:**
ARXIV:[id_number]
*(Example: ARXIV:2407.21783)*

**If NO successor exists:**
NULL
"""
)

In [None]:
#search local folder
local_file_agent = create_agent(
    model=model2,
    tools=tool1,
    system_prompt="""
   
### OBJECTIVE
You are the Local File Analyst. Your goal is to find a specific PDF on the local machine matching the user's query, read it, and extract structured metadata.

### DISCOVERY & MATCHING PROTOCOL
1. **List First:** Always list available files to confirm existence. Never guess paths.
2. **Fuzzy Match:** Identify the file most similar to the user's request.
3. **Read & Extract:** Once identified, use exact name of the file without " or ' read the file content and parse it into the schema below.

### OUTPUT SCHEMA (STRICT JSON)
You must output a single valid JSON object with NO markdown formatting. Use "null" for missing fields.

{
    "paper_title": "str",
    "publication_year": int,
    "core_claims": ["str", "str"],
    "environment": {
        "hardware_specs": {"GPU": "str", "VRAM": "str"},
        "software_stack": ["str"]
    },
    "benchmarks": {
        "dataset_name": "str",
        "dataset_version": "str",
        "evaluation_metrics": ["str"]
    },
    "results": {
        "reported_performance": {"metric_name": float}
    },
    "compatibility": {
        "baseline_comparisons": ["str"],
        "normalization_factors": "str or null"
    }
    "model": {
    "architecture": "str",  // e.g., "ResNet-50", "BERT-base"
    "parameters_count": int,  // Total model parameters
    "framework": "str",  // "PyTorch 2.0.1", "TensorFlow 2.13"
    "precision": "str"  // "fp32", "fp16", "mixed"
    }
    "hyperparameters": {
    "learning_rate": float,
    "batch_size": int,
    "epochs": int,
    "optimizer": "str",
    "scheduler": "str or null",
    "weight_decay": float,
    "dropout": float
    }
    "data_processing": {
    "preprocessing_steps": ["str"],  // ["resize_224", "normalize_imagenet", "augmentation"]
    "data_splits": {"train": int, "val": int, "test": int},
    "training_samples": int
    }
    "reproducibility": {
    "random_seed": int,
    "deterministic_mode": bool,
    "training_time_hours": float,
    "inference_latency_ms": float
    }

}
"""
)

In [None]:
#search on semantic scholar
scholar_agent = create_agent(
    model=model2,
    tools=tool2,
    system_prompt="""
### OBJECTIVE
You are the Academic Metadata Extractor. Your task is to retrieve a specific research paper from Semantic Scholar using its ArXiv ID and extract structured metadata.

### EXECUTION RULES
1. **Search Precision:** Use the provided ID *exactly* as is. Do not add keywords.
2. **Factuality:** Only extract data. Do not guess.
3. **Format:** Output raw JSON only. No Markdown blocks (```json).

### OUTPUT SCHEMA (STRICT JSON)
You must map the data into this specific structure:

{
    "paper_title": "str",
    "publication_year": int,
    "core_claims": ["str", "str"],
    "environment": {
        "hardware_specs": {"GPU": "str", "VRAM": "str"},
        "software_stack": ["str"]
    },
    "benchmarks": {
        "dataset_name": "str",
        "dataset_version": "str",
        "evaluation_metrics": ["str"]
    },
    "results": {
        "reported_performance": {"metric_name": float}
    },
    "compatibility": {
        "baseline_comparisons": ["str"],
        "normalization_factors": "str or null"
    }
    "model": {
    "architecture": "str",  // e.g., "ResNet-50", "BERT-base"
    "parameters_count": int,  // Total model parameters
    "framework": "str",  // "PyTorch 2.0.1", "TensorFlow 2.13"
    "precision": "str"  // "fp32", "fp16", "mixed"
    }
    "hyperparameters": {
    "learning_rate": float,
    "batch_size": int,
    "epochs": int,
    "optimizer": "str",
    "scheduler": "str or null",
    "weight_decay": float,
    "dropout": float
    }
    "data_processing": {
    "preprocessing_steps": ["str"],  // ["resize_224", "normalize_imagenet", "augmentation"]
    "data_splits": {"train": int, "val": int, "test": int},
    "training_samples": int
    }
    "reproducibility": {
    "random_seed": int,
    "deterministic_mode": bool,
    "training_time_hours": float,
    "inference_latency_ms": float
    }

}
"""
)

## main coordinator

In [None]:
from langchain.messages import HumanMessage, ToolMessage
@tool
async def local_file_search(paper_title: str) -> str:
    """search the pdf in local computer file
    Args:
        paper_title: The exact title of the paper to look for.
    """

    response = await local_file_agent.ainvoke({"messages": [HumanMessage(content=f"Find the file {paper_title} and extract information from it")]})
    return response['messages'][-1].content

In [None]:
@tool
async def online_file_search(paper_title: str) -> str:
    """search on online about better or updated paper
    Args:
        paper_title: The exact title of the paper to look for.
        """
   
    response = await search_agent.ainvoke({"messages": [HumanMessage(content=f"Find the follow up or better paper {paper_title} and extract Arxiv id of the paper [ example: ARXIV:2407.21783] ")]})
    return response['messages'][-1].content

In [None]:
@tool
async def scholar_file_search(paper_id: str) -> str:
    """search on semantic scholar for the specific paper
     Args:
        paper_id: The exact arvix id of the paper to look for.
    """
    
    response = await scholar_agent.ainvoke({"messages": [HumanMessage(content=f"Find the file {paper_id} and extract information from it")]})
    return response['messages'][-1].content

In [None]:
from deepagents import create_deep_agent

system_prompt2 = """
### ROLE
You are the **Expert Research Coordinator**. Your goal is to orchestrate a "Drift Detection" workflow to determine if a user's local research paper has been superseded by a better, newer "successor paper."

### WORKFLOW PROTOCOL
Execute these steps logically. ensure the output of one step feeds into the next.

1. **Local Discovery**:
   - Call `local_file_search` with the user's target paper title or paper on that folder.
   - *Goal*: Confirm if we have the baseline paper and extract its key performance claims.

2. **Successor Identification**:
   - Call `online_file_search` using the paper title.
   - *Goal*: Find a "Successor Paper" (newer, cites original, SOTA improvement).
   - *Constraint*: If found, you MUST extract a valid ArXiv ID (e.g., `ARXIV:2407.21783`).

3. **Deep Retrieval**:
   - IF a successor ID is found: Call `scholar_file_search` with that specific ArXiv ID, provide id with full Arxivid (e.g., `ARXIV:2407.21783`) not just 2407.21783 .
   - IF NO successor: return null and re run the online_file search.
   - *Goal*: Retrieve the full structured metadata (Hardware, Claims, Benchmarks) as defined in the tool's schema.

4. **Comparative Synthesis**:
   - Compare the findings from Step 1 (Local) and Step 3 (Scholar/Online).
   - *Analysis*: Focus on "Drift" â€” has the dataset version changed? Is the evaluation metric different? Is the hardware requirement significantly higher?

### OUTPUT FORMAT
Your final response must be a structured summary:

1. **Successor Status**: (Yes/No - Name of paper if Yes)
2. **Drift Analysis (Comparison)**:
   - **Performance**: How much better is it? (Cite specific metrics from the extracted data).
   - **Environment**: Are the hardware specs/software stack comparable?
   - **Data**: Did they use the same dataset version?
   - **Compare Paper A (Local) and Paper B (Successor)**. For every shared benchmark, calculate the Percentage Improvement. Identify any Metric Drift (e.g., changing from 'Accuracy' to 'F1-Score'). If the environment specs (VRAM/GPU) have increased by 20%, flag this as 'Resource Inflation'.
3. **Recommendation**: "Should I read it?" (Yes/No with reasoning).
"""

# model3 = ChatGoogleGenerativeAI(model="gemini-3-flash")
Main_agent = create_deep_agent(
    model=model2,
    tools=[local_file_search, scholar_file_search, online_file_search],
    system_prompt=system_prompt2
)

In [None]:
from langchain.messages import HumanMessage

response = await Main_agent.ainvoke(
    {"messages": [HumanMessage(content="check whether Llama 2: Open Foundation and Fine-Tuned Chat Models paper in local computer or not")]},

)

In [None]:
from pprint import pprint

pprint(response)