In [9]:
import os
from typing import Any, Dict, List, Optional
from langchain.tools import BaseTool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
import fitz  # PyMuPDF
import pandas as pd
from PIL import Image
import io
import base64
import magic
import openpyxl

from langchain_core.tools import tool

@tool
def calculator(expression: str) -> str:
    """Evaluates a math expression like '2 + 2 * 3'."""
    try:
        return str(eval(expression, {"__builtins__": {}}))
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for a query and return maximum 2 results.
    
    Args:
        query: The search query."""
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ])
    return {"wiki_results": formatted_search_docs}

@tool
def web_search(query: str) -> dict:
    """Search Tavily and return up to 3 results formatted for the agent."""
    tavily = TavilySearchResults(max_results=3)
    search_docs = tavily.invoke(query)           # list[dict]

    formatted = "\n\n---\n\n".join(
        f'<Document source="{doc.get("url", "")}"/>\n{doc.get("content", "")}\n</Document>'
        for doc in search_docs
    )

    return {"web_results": formatted}

@tool
def arvix_search(query: str) -> str:
    """Search Arxiv for a query and return maximum 3 result.
    
    Args:
        query: The search query."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ])
    return {"arvix_results": formatted_search_docs}

@tool
def execute_python(code: str) -> str:
    """Compiles and executes a Python snippet (sandboxed)."""
    try:
        local_vars = {}
        exec(code, {}, local_vars)
        return str(local_vars.get("result", "[Executed]"))
    except Exception as e:
        return f"Error: {str(e)}"


@tool
def read_pdf(filename: str) -> str:
    """
    Extracts and returns text from the first few pages of a PDF file.
    """
    if not os.path.exists(filename):
        return f"Error: File '{filename}' not found."
        
    doc = None
    try:
        doc = fitz.open(filename)
        text = ""
        for page in doc[:3]:  # Limit to first 3 pages for speed
            text += page.get_text()
        return text[:1000]  # Limit output to 1000 characters
    except Exception as e:
        return f"Error reading PDF: {str(e)}"
    finally:
        if doc:
            doc.close()
    

@tool
def read_spreadsheet(file_name: str) -> str:
    """
    Reads spreadsheet data (CSV, XLSX, XLS, TSV) and returns the first few rows as a formatted string.
    """
    try:
        ext = os.path.splitext(file_name)[-1].lower()
        
        if ext == ".csv":
            df = pd.read_csv(file_name)
        elif ext == ".tsv":
            df = pd.read_csv(file_name, sep="\t")
        elif ext in [".xlsx", ".xls"]:
            df = pd.read_excel(file_name, engine="openpyxl" if ext == ".xlsx" else "xlrd")
        else:
            return f"Unsupported file extension: {ext}"

        return df.head().to_string(index=False)
    
    except Exception as e:
        return f"Error reading spreadsheet: {str(e)}"
    
@tool
def recognize_image(image_description: str) -> str:
    """
    Uses a ChatGPT model to recognize objects in an image based on a textual description.
    Returns the model's response.
    """
    # Initialize ChatGPT model
    chat_model = ChatOpenAI(model="gpt-4")

    try:
        prompt = f"Describe the objects or scene in the image based on the following description: '{image_description}'."
        response = chat_model.invoke(prompt)
        return response.content
    except Exception as e:
        return f"Error processing image description: {str(e)}"


In [14]:
import os
import wikipedia
import logging

from typing import TypedDict, Annotated, List
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage, AnyMessage
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA
from langchain.retrievers import MultiVectorRetriever
from langchain.tools.retriever import create_retriever_tool
from langgraph.graph import START, StateGraph, MessagesState
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition
from supabase.client import Client, create_client
from langsmith import Client as LangSmithClient
from langchain.callbacks.tracers import LangChainTracer
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()
logger.info("Environment variables loaded")

# Initialize LangSmith
langsmith_client = LangSmithClient()
run_collector = RunCollectorCallbackHandler()
tracer = LangChainTracer(
    project_name="gaia-agent",
    client=langsmith_client
)
callback_manager = CallbackManager([tracer, run_collector])

# System prompt guiding tool use and step-by-step reasoning
system_prompt = SystemMessage(content="""
You are a helpful assistant tasked with answering questions using a set of tools. 
Now, I will ask you a question. Report your thoughts, and finish your answer with the following template: 
FINAL ANSWER: [YOUR FINAL ANSWER]. 
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
Your answer should only start with "FINAL ANSWER: ", then follows with the answer. 
""")

tools = [calculator, 
        read_pdf, 
        read_spreadsheet, 
        recognize_image, 
        execute_python, 
        web_search,
        wiki_search,
        arvix_search]

# Initialize LLM
llm = ChatOpenAI(
    model="gpt-4o-mini", 
    temperature=0,
    callbacks=[tracer]
)
llm_with_tools = llm.bind_tools(tools)

class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

def assistant(state: AgentState):
    return {"messages": [llm_with_tools.invoke(state["messages"])]}

# 6. Build graph
builder = StateGraph(AgentState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))

builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", tools_condition)
builder.add_edge("tools", "assistant")

# Compile the graph agent
agent = builder.compile()

question = """What is the first name of the only Malko Competition recipient 
from the 20th Century (after 1977) whose nationality on record is a country that 
no longer exists?"""
    
response = agent.invoke({
    "messages": [
        {"type": "user", "content": question}
    ]
})

print(response['messages'][-1].content)

2025-06-14 20:06:05,205 - __main__ - INFO - Environment variables loaded
2025-06-14 20:06:06,092 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-14 20:06:14,090 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-14 20:06:17,090 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-14 20:06:19,886 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-14 20:06:22,996 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-14 20:06:27,047 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


The only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists is **Claus Peter Flor**. He won the competition in **1980** and his nationality is recorded as **Czechoslovakia**, which dissolved in 1993.
