# LlamaIndex Multi-Document Agent

In [1]:
import nest_asyncio

nest_asyncio.apply()

In [3]:
from typing import List, Optional

from llama_index.core.indices import SummaryIndex, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.tools import QueryEngineTool
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.tools import FunctionTool
from llama_index.core.vector_stores import MetadataFilters, FilterCondition


def get_doc_tools(
    file_path: str,
    name: str,
) -> str:
    """Get vector query and summary query tools from a document."""

    # load documents
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)
    vector_index = VectorStoreIndex(nodes)

    def vector_query(query: str, page_numbers: Optional[List[str]] = None) -> str:
        """Use to answer questions over a given paper.

        Useful if you have specific questions over the paper.
        Always leave page_numbers as None UNLESS there is a specific page you want to search for.

        Args:
            query (str): the string query to be embedded.
            page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE
                if we want to perform a vector search
                over all pages. Otherwise, filter by the set of specified pages.

        """

        page_numbers = page_numbers or []
        metadata_dicts = [{"key": "page_label", "value": p} for p in page_numbers]

        query_engine = vector_index.as_query_engine(
            similarity_top_k=2,
            filters=MetadataFilters.from_dicts(
                metadata_dicts, condition=FilterCondition.OR
            ),
        )
        response = query_engine.query(query)
        return response

    vector_query_tool = FunctionTool.from_defaults(
        name=f"vector_tool_{name}", fn=vector_query
    )

    summary_index = SummaryIndex(nodes)
    summary_query_engine = summary_index.as_query_engine(
        response_mode="tree_summarize",
        use_async=True,
    )
    summary_tool = QueryEngineTool.from_defaults(
        name=f"summary_tool_{name}",
        query_engine=summary_query_engine,
        description=(f"Useful for summarization questions related to {name}"),
    )

    return vector_query_tool, summary_tool

In [4]:
import os

from llama_index.core.settings import Settings
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.llms.azure_openai import AzureOpenAI

AZURE_API_ENDPOINT = os.environ.get("AZURE_API_ENDPOINT")
AZURE_API_KEY = os.environ.get("AZURE_API_KEY")

llm = AzureOpenAI(
    model="gpt-4-32k",
    api_version="2024-02-01",
    azure_endpoint=AZURE_API_ENDPOINT,
    api_key=AZURE_API_KEY,
    deployment_name="gpt-4-32k",
    temperature=0,
)
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-3-small",
    azure_endpoint=AZURE_API_ENDPOINT,
    api_key=AZURE_API_KEY,
    api_version="2024-02-01",
    deployment_name="text-embedding-3-small",
)

Settings.llm = llm
Settings.embed_model = embed_model

In [7]:
from pathlib import Path

urls = [
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=hSyW5go0v8",
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "selfrag.pdf",
]

# Download papers
for url, paper in zip(urls, papers):
    os.system(f"wget {url} -O {paper}")

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)
    paper_to_tools_dict[paper] = [vector_tool, summary_tool]

--2024-06-22 10:18:50--  https://openreview.net/pdf?id=VtmBAGCN7o
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving openreview.net (openreview.net)... 35.184.86.251
Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16911937 (16M) [application/pdf]
Saving to: ‘metagpt.pdf’

     0K .......... .......... .......... .......... ..........  0%  904K 18s
    50K .......... .......... .......... .......... ..........  0% 1.08M 16s
   100K .......... .......... .......... .......... ..........  0% 4.66M 12s
   150K .......... .......... .......... .......... ..........  1% 1.40M 12s
   200K .......... .......... .......... .......... ..........  1% 3.94M 10s
   250K .......... .......... .......... .......... ..........  1% 1.38M 10s
   300K .......... .......... .......... .......... ..........  2% 1.33M 11s
   350K .......... .......... .......... .......... ..........  2% 2.01M 10s
   400K

Getting tools for paper: metagpt.pdf
Getting tools for paper: longlora.pdf
Getting tools for paper: selfrag.pdf


In [8]:
initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [9]:
len(initial_tools)

6

In [10]:
from llama_index.core.agent import FunctionCallingAgentWorker, AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    initial_tools,
    llm=llm,
    verbose=True,
)
agent = AgentRunner(agent_worker)

In [12]:
response = agent.query(
    "Tell me about the evaluation dataset used in LongLoRA, "
    "and then tell me about the evaluation results"
)

Added user message to memory: Tell me about the evaluation dataset used in LongLoRA, and then tell me about the evaluation results
=== Calling Function ===
Calling function: vector_tool_longlora with args: {"query": "evaluation dataset used in LongLoRA"}
=== Function Output ===
The evaluation dataset used in LongLoRA is the proof-pile test set.
=== Calling Function ===
Calling function: vector_tool_longlora with args: {"query": "evaluation results of LongLoRA"}
=== Function Output ===
LongLoRA has demonstrated strong empirical results on various tasks on Llama2 models from 7B/13B to 70B. It extends Llama2 7B from 4k context to 100k, or Llama2 70B to 32k on a single 8 ×A100 machine. It also closes the accuracy gap between conventional LoRA and full fine-tuning, while still maintaining up to 1.8 ×lower memory cost than full fine-tuning. Furthermore, LongLoRA improves the training speed of LoRA by up to 1.8 ×with S2-Attn.
=== LLM Response ===
LongLoRA was evaluated using the proof-pile te

In [11]:
response = agent.query("Give me a summary of both Self-RAG and LongLoRA")
print(str(response))

Added user message to memory: Give me a summary of both Self-RAG and LongLoRA
=== Calling Function ===
Calling function: summary_tool_selfrag with args: {"input": "selfrag"}
=== Function Output ===
Self-Reflective Retrieval-Augmented Generation (SELF-RAG) is a framework designed to enhance the quality and factuality of large language models (LLMs) through retrieval and self-reflection. It trains a single arbitrary LLM to adaptively retrieve passages on-demand, generate, and reflect on retrieved passages and its own generations using special tokens, called reflection tokens. This makes the LLM controllable during the inference phase, enabling it to tailor its behavior to diverse task requirements. Experiments have shown that SELF-RAG significantly outperforms state-of-the-art LLMs and retrieval-augmented models on a diverse set of tasks.
=== Calling Function ===
Calling function: summary_tool_longlora with args: {"input": "longlora"}
=== Function Output ===
LongLoRA is an efficient fine

## Setup an agent over 11 papers

In [13]:
urls = [
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=LzPWWPAdY4",
    "https://openreview.net/pdf?id=VTF8yNQM66",
    "https://openreview.net/pdf?id=hSyW5go0v8",
    "https://openreview.net/pdf?id=9WD9KwssyT",
    "https://openreview.net/pdf?id=yV6fD7LYkF",
    "https://openreview.net/pdf?id=hnrB5YHoYu",
    "https://openreview.net/pdf?id=WbWtOYIzIK",
    "https://openreview.net/pdf?id=c5pwL0Soay",
    "https://openreview.net/pdf?id=TpD2aG1h0D",
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "loftq.pdf",
    "swebench.pdf",
    "selfrag.pdf",
    "zipformer.pdf",
    "values.pdf",
    "finetune_fair_diffusion.pdf",
    "knowledge_card.pdf",
    "metra.pdf",
    "vr_mcl.pdf",
]

for url, paper in zip(urls, papers):
    os.system(f"wget {url} -O {paper}")

--2024-06-22 14:48:34--  https://openreview.net/pdf?id=VtmBAGCN7o
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving openreview.net (openreview.net)... 35.184.86.251
Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16911937 (16M) [application/pdf]
Saving to: ‘metagpt.pdf’

     0K .......... .......... .......... .......... ..........  0%  969K 17s
    50K .......... .......... .......... .......... ..........  0% 1.02M 16s
   100K .......... .......... .......... .......... ..........  0% 1022K 16s
   150K .......... .......... .......... .......... ..........  1% 12.9M 12s
   200K .......... .......... .......... .......... ..........  1% 36.5M 10s
   250K .......... .......... .......... .......... ..........  1% 27.1M 8s
   300K .......... .......... .......... .......... ..........  2% 1.18M 9s
   350K .......... .......... .......... .......... ..........  2% 9.36M 8s
   400K ..

In [14]:
from pathlib import Path

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)
    paper_to_tools_dict[paper] = [vector_tool, summary_tool]

Getting tools for paper: metagpt.pdf
Getting tools for paper: longlora.pdf
Getting tools for paper: loftq.pdf
Getting tools for paper: swebench.pdf
Getting tools for paper: selfrag.pdf
Getting tools for paper: zipformer.pdf
Getting tools for paper: values.pdf
Getting tools for paper: finetune_fair_diffusion.pdf
Getting tools for paper: knowledge_card.pdf
Getting tools for paper: metra.pdf
Getting tools for paper: vr_mcl.pdf


In [15]:
all_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [16]:
# define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex

obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex,
)

In [17]:
obj_retriever = obj_index.as_retriever(similarity_top_k=3)

In [18]:
tools = obj_retriever.retrieve(
    "Tell me about the eval dataset used in MetaGPT and SWE-Bench"
)

In [25]:
for tool in tools:
    print(tool.metadata)


ToolMetadata(description='Useful for summarization questions related to metagpt', name='summary_tool_metagpt', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)
ToolMetadata(description='Useful for summarization questions related to swebench', name='summary_tool_swebench', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)
ToolMetadata(description='Useful for summarization questions related to metra', name='summary_tool_metra', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)


In [22]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    tool_retriever=obj_retriever,
    llm=llm,
    system_prompt=""" \
You are an agent designed to answer queries over a set of given papers.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\

""",
    verbose=True,
)
agent = AgentRunner(agent_worker)

In [26]:
response = agent.query(
    "Tell me about the evaluation dataset used "
    "in MetaGPT and compare it against SWE-Bench"
)
print(str(response))

Added user message to memory: Tell me about the evaluation dataset used in MetaGPT and compare it against SWE-Bench
=== Calling Function ===
Calling function: vector_tool_swebench with args: {"query": "evaluation dataset"}
=== Function Output ===
The evaluation dataset consists of 2,294 task instances. The performance of various models on this dataset is measured in terms of how many patches were generated, applied successfully, and required a post-generation fix to apply successfully for each model and retrieval setting combination. Some of the models evaluated include ChatGPT-3.5, Claude 2, GPT-4, and SWE-Llama 13b and 7b. The evaluation dataset is characterized by a variety of tags associated with the tasks, reflecting a diversity of task types beyond just fixing bugs. The GPT-4 BM25 27k and "Oracle" settings were run on a 25% subset of the evaluation dataset.
=== LLM Response ===
I apologize for the confusion, but it seems there was an error in retrieving the information about the 

In [27]:
response = agent.query(
    "Compare and contrast the LoRA papers (LongLoRA, LoftQ). "
    "Analyze the approach in each paper first. "
)

Added user message to memory: Compare and contrast the LoRA papers (LongLoRA, LoftQ). Analyze the approach in each paper first. 
=== Calling Function ===
Calling function: summary_tool_longlora with args: {"input": "approach"}
=== Function Output ===
The approach discussed in the context is LongLoRA, an efficient fine-tuning method that extends the context sizes of pre-trained large language models (LLMs) with limited computation cost. It introduces Shifted Sparse Attention (S2-Attn) during fine-tuning, which effectively enables context extension and leads to computation saving. It also revisits the parameter-efficient fine-tuning regime for context expansion, combining an improved version of Low-Rank Adaptation (LoRA) with S2-Attn. This approach demonstrates strong empirical results on various tasks and is compatible with most existing techniques.
=== Calling Function ===
Calling function: summary_tool_loftq with args: {"input": "approach"}
=== Function Output ===
The approach discuss