how to extend single document agent to handle multiple documents, with increasing degrees of complexity

In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
#download papers

urls =[
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=hSyW5go0v8"
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "selfrag.pdf"
]

In [5]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [6]:
import requests
for url, name in zip(urls, papers):
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    if response.status_code == 200:
        with open(name, "wb") as f:
            f.write(response.content)
        print(f"✅ Saved: {name}")
    else:
        print(f"❌ Failed: {url} — Status code: {response.status_code}")

✅ Saved: metagpt.pdf
✅ Saved: longlora.pdf
✅ Saved: selfrag.pdf


In [7]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex,SummaryIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.tools import FunctionTool, QueryEngineTool
from llama_index.core.vector_stores import MetadataFilters, FilterCondition
from typing import List, Optional

In [8]:
def get_doc_tools(
    file_path:str,
    name:str,
) -> str:
    """Get vector query and summary query tools from a document."""
    
    # load documents
    documents = SimpleDirectoryReader(input_files = [file_path]).load_data()
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)
    vector_index = VectorStoreIndex(nodes)
    
    def vector_query(
        query:str,
        page_numbers: Optional[list[str]] = None
    ) -> str:
        """Use to answer questions over MetaGPT paper.
        
        Useful if you have specific questions over MetaGPT paper.
        Always leave page_numbers as None unless there is specific page you want to search for.
        
        Args:
            query (str): the string query to be embedded
            page_numbers (optional[list[str]]): filter by set of pages. Leave as None 
                            if we want to perform a vector search
                            over all pages. Otherwise, filter by the set of specified pages.
        """
        
        page_numbers = page_numbers or []
        metadata_dicts = [
            {"key": "page_label", "value":p} for p in page_numbers
        ]
        
        query_engine = vector_index.as_query_engine(
            similarity_top_k=2,
            filters=MetadataFilters.from_dicts(
                metadata_dicts,
                condition=FilterCondition.OR
            )
        )
        response = query_engine.query(query)
        return response
    
    vector_query_tool = FunctionTool.from_defaults(
        name = f"Vector_tool_{name}",
        fn=vector_query
    )
    
    summary_index = SummaryIndex(nodes)
    summary_query_engine = summary_index.as_query_engine(
        response_mode = "tree_summarize",
        use_async = True,
    )
    
    summary_tool = QueryEngineTool.from_defaults(
        name=f"Summary_tool_{name}",
        query_engine=summary_query_engine,
        description = (
            "Use only if you want to get a holistic summary of MetaGPT."
            "Do not use if you have specific questions over MetaGPT"
        ),
    )
    
    return vector_query_tool, summary_tool

In [9]:
# from pathlib import Path
# print(Path(papers))

In [10]:
#convert each paper into  tool
from pathlib import Path

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)
    paper_to_tools_dict[paper] = [vector_tool, summary_tool]


Getting tools for paper: metagpt.pdf
Getting tools for paper: longlora.pdf
Getting tools for paper: selfrag.pdf


In [14]:
# Get tools in list
initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]


In [15]:
initial_tools

[<llama_index.core.tools.function_tool.FunctionTool at 0x313a3b1c0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x3144dcdf0>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x314610a30>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x3150d9540>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x314593640>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x314655330>]

here we have 3 papers

1. MetaGPT tools are Vector tool, summary tool
2. Longlora tools are Vector tool, summary tool
3. selfrag tools are Vector tool, summary tool

In [16]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model = "gpt-3.5-turbo")

Create agent worker

In [17]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    initial_tools,
    llm=llm,
    verbose = True
)

agent = AgentRunner(agent_worker)

In [18]:
response = agent.query(
    "Tell me about the evaluation dataset used in LongLoRa, "
    "and then tell me about the evaluation results"
)

Added user message to memory: Tell me about the evaluation dataset used in LongLoRa, and then tell me about the evaluation results
=== Calling Function ===
Calling function: Summary_tool_longlora with args: {"input": "evaluation dataset used in LongLoRa"}
=== Function Output ===
The evaluation dataset used in LongLoRa is the PG19 dataset.
=== Calling Function ===
Calling function: Summary_tool_longlora with args: {"input": "evaluation results of LongLoRa"}
=== Function Output ===
The evaluation results of LongLoRA demonstrate comparable or even better performance than other Llama2-based long-context models on benchmarks like LongBench and LEval. Additionally, LongLoRA requires significantly lower computational overhead and fewer training hours compared to other fine-tuning methods like LoRA.
=== LLM Response ===
The evaluation dataset used in LongLoRa is the PG19 dataset. The evaluation results of LongLoRa demonstrate comparable or even better performance than other Llama2-based long-c

In [19]:
response = agent.query(
    "Give me a summary of both Self-rag and longlora"
)

print(str(response))

Added user message to memory: Give me a summary of both Self-rag and longlora
=== Calling Function ===
Calling function: Summary_tool_selfrag with args: {"input": "Self-rag"}
=== Function Output ===
SELF-RAG is a framework designed to improve the quality and factuality of large language models by incorporating on-demand retrieval and self-reflection mechanisms. It utilizes special tokens known as reflection tokens to enable a language model to retrieve, generate, and evaluate text passages and its own outputs. This approach has shown superior performance compared to existing models across various tasks, highlighting its effectiveness in enhancing model accuracy and reliability.
=== Calling Function ===
Calling function: Summary_tool_longlora with args: {"input": "longlora"}
=== Function Output ===
LongLoRA is an efficient fine-tuning approach that extends the context length of large language models by using shifted sparse attention (S2-Attn) during training to approximate the standard 

In [20]:
response = agent.query(
    "How to evaluate RAG, LoRa and MetaGPT"
)

print(str(response))

Added user message to memory: How to evaluate RAG, LoRa and MetaGPT
=== Calling Function ===
Calling function: Summary_tool_selfrag with args: {"input": "How to evaluate RAG"}
=== Function Output ===
To evaluate RAG, one can consider metrics such as accuracy, factuality, fluency, citation precision, and recall. These metrics help assess the model's performance in generating responses that are accurate, supported by evidence, fluent, and properly cited. Additionally, conducting ablation studies to analyze the impact of key components like retrieval, self-reflection, and different inference-time customization settings on the model's overall performance can provide insights into the importance of each component in improving the model's output quality. Another aspect to consider is the relevance, supportiveness, and usefulness of the generated content, ensuring that the output is fully supported by evidence, relevant to the initial instruction and context, and provides a helpful and inform

In [21]:
urls=[
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=LzPWWPAdY4",
    "https://openreview.net/pdf?id=VTF8yNQM66",
    "https://openreview.net/pdf?id=hSyW5go0v8",
    "https://openreview.net/pdf?id=9WD9KwssyT",
    "https://openreview.net/pdf?id=yV6fD7LYkF",
    "https://openreview.net/pdf?id=hnrB5YHoYu",
    "https://openreview.net/pdf?id=WbWtOYIzIK",
    "https://openreview.net/pdf?id=c5pwL0Soay",
    "https://openreview.net/pdf?id=TpD2aG1h0D",
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "loftq.pdf",
    "swebench.pdf",
    "zipformer.pdf",
    "values.pdf",
    "finetune_fair_diffusion.pdf",
    "knowledge_card.pdf",
    "metra.pdf",
    "vr_mcl.pdf"
]

In [23]:
import requests
for url, name in zip(urls, papers):
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    if response.status_code == 200:
        with open(name, "wb") as f:
            f.write(response.content)
        print(f"✅ Saved: {name}")
    else:
        print(f"❌ Failed: {url} — Status code: {response.status_code}")

KeyboardInterrupt: 

In [None]:
from pathlib import Path

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)
    paper_to_tools_dict[paper] = [vector_tool, summary_tool]

1. Index the tools. using llama_index as it has extensive indexing capabilities over general text documents.

But these tools are generally python objects, we need to convert and serialize these objects into string representation and back.

This is solved through the object index abstraction in LlamaIndex.


In [None]:
#Get tools in the list
all_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [None]:
type(all_tools)

In [None]:
# define an "object" index and retriever over these tools

from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex


obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex
)

In [None]:
obj_retriever = obj_index.as_retriever(similarity_top_k=3)

In [None]:
type(obj_index)

Here we need more advanced agent and tool architecture. 

The issue is that let's say we try to index all 11 papers which  now has 20 tools, or lets say we try to index 100 papers or 1000 papers or more.

Even though LLM context window are getting longer, stuffing too many tools selections into the LLM prompt leads to the following issues:

1. Tools may not fit all in the prompt, especially when your number of documents are big and you are modelling each document as a separate tool or a set of tools.

Cost and latency will spike because you are increasing the number of tokens in the prompt, and also the outline can actually get confused, The LLM may fail to pick the right tool

when the number of choices is too large.

Solution :- When a user ask a query, we actually perform Retrieval Augmentation, but not on the level of tet, but actually on the level of tools.

1. We first retrieve small set of relevant tools, and then feed the relevant tools to the agent reasoning prompt instead of all the tools. This retrieval process

is similar to retrieval process used in RAG. At a simplest it can just be top k vector search, but you can add all the retrieval advance techniques 

you want to filter out the relevant set of results.

Lets see how to perform rag on tools like discuss before

In [None]:
len(initial_tools)

In [None]:
tools = obj_retriever.retrieve(
    "Tell me about the eval dataset used in MetaGPT and SWE-Bench"
)

In [None]:
tools[0].metadata

In [None]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    tool_retriever = obj_retriever,
    llm=llm,
    system_prompt="""\
        Your are an agent designed to answer queries over a set of given papers.
        Please always use the tools provided to answer a question. Do not rely on prior knowledge.\
        """,
    verbose = True
)

agent = AgentRunner(agent_worker)

In [None]:
response = agent.query(
    "Tell me about the evaluation dataset used "
    "in MetaGPT and compare it against SWE-Bench"
)

In [None]:
response = agent.query(
    "Compare and contrast the LoRa papers(LongLoRa, LoftQ). "
    "Analyze the approach in each paper first."
)