In [18]:

from llama_index.core import (
    VectorStoreIndex,
    SimpleKeywordTableIndex,
    SimpleDirectoryReader,
)
from llama_index.core import SummaryIndex
from llama_index.core.schema import IndexNode
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.callbacks import CallbackManager
import nest_asyncio
nest_asyncio.apply()
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader
import os 
from pathlib import Path
from llama_index.embeddings.llamafile import LlamafileEmbedding
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.tools import FunctionTool
from llama_index.llms.ollama import Ollama
from llama_index.core.agent import ReActAgent
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import load_index_from_storage, StorageContext

In [19]:
Settings.llm = Ollama(model="llama3.1:latest", request_timeout=120.0)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [3]:
parser = LlamaParse(
    api_key="llx-",
    result_type="markdown",
    verbose=True,
)


paper_titles = os.listdir('./data/pdf')
paper_titles = [title.split('.pdf')[0] for title in paper_titles if title.endswith('.pdf')]
city_docs = {}
for paper_title in paper_titles:
    city_docs[paper_title] = parser.load_data(f"./data/pdf/{paper_title}.pdf")

Started parsing the file under job_id fb751ad6-26f6-4db9-b14b-956e5921366c
......Started parsing the file under job_id 88c5b2e8-e7dd-4139-8ea9-3ab69f2631d6
..Started parsing the file under job_id 033ea0f3-2d41-4d33-bccf-c36aa806f74e
.Started parsing the file under job_id 237f0354-faaa-495d-85f3-3f8c6cc37449
.Started parsing the file under job_id 0ee474aa-50cf-41bb-9de7-855431f4e7b3
.Started parsing the file under job_id 5c482d0a-9c2d-465e-9c53-e74e1f23ecc5
...Started parsing the file under job_id 946f8c7c-1eb2-417b-be7b-ee9830404351
..Started parsing the file under job_id b004e957-dcbf-4524-93e2-e848d7d7ebf7
Started parsing the file under job_id 6ddfe698-4dc3-4316-8fa3-183d40cf5288
..Started parsing the file under job_id 5f9fb45c-4d95-49a7-9b55-e739eae9b598
....

In [28]:
paper_index = {}
for paper_title in paper_titles:

    # build index
    paper_index[paper_title] = VectorStoreIndex.from_documents(city_docs[paper_title])

    # persist index
    paper_index[paper_title].storage_context.persist(persist_dir=f"./storage/{paper_title}")

In [21]:
node_parser = SentenceSplitter()


# Build agents dictionary
agents = {}
query_engines = {}

# this is for the baseline
all_nodes = []


for idx, paper_title in enumerate(paper_titles):
    nodes = node_parser.get_nodes_from_documents(city_docs[paper_title])
    all_nodes.extend(nodes)

    if not os.path.exists(f"./data/{paper_title}"):
        # build vector index
        vector_index = VectorStoreIndex(nodes)
        vector_index.storage_context.persist(
            persist_dir=f"./data/{paper_title}"
        )
    else:
        vector_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=f"./data/{paper_title}"),
        )

    # build summary index
    summary_index = SummaryIndex(nodes)
    
    # define query engines
    vector_query_engine = vector_index.as_query_engine(llm=Settings.llm)
    summary_query_engine = summary_index.as_query_engine(llm=Settings.llm)

    # define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description = (
        f"Useful for answering questions about the academic paper titled '{paper_title}'. "
        "This tool can provide information on various aspects of the paper, including but not limited to:"
        "\n- The main research question or hypothesis"
        "\n- Methodology and experimental design"
        "\n- Key findings and results"
        "\n- Theoretical framework and background"
        "\n- Implications and conclusions"
        "\n- Related work and literature review"
        "\n- Limitations and future research directions"
        "\nUse a specific question about the paper as input to this tool."
                ),
            ),
        ),
        QueryEngineTool(
            query_engine=summary_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=(
                    "Useful for any requests that require a holistic summary"
                    f" of EVERYTHING about {paper_title}. For questions about"
                    " more specific sections, please use the vector_tool."
                ),
            ),
        ),
    ]

    # build agent
    function_llm = Ollama(model="llama3:latest", request_timeout=120.0)
    agent = ReActAgent.from_tools(
        
        query_engine_tools,
        max_iterations=100,
        llm=function_llm,
        verbose=True,
        system_prompt=f"""\
You are a specialized agent designed to answer queries about {paper_title}.
You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
""",
    )

    agents[paper_title] = agent
    query_engines[paper_title] = vector_index.as_query_engine(
        similarity_top_k=2
    )


In [22]:
# define tool for each document agent
all_tools = []
for paper_title in paper_titles:
    paper_summary = (
        f"This content contains paper articles about {paper_title}. Use"
        f" this tool if you want to answer any questions about {paper_title}.\n"
    )
    doc_tool = QueryEngineTool(
        query_engine=agents[paper_title],
        metadata=ToolMetadata(
            name=f"tool_{paper_title}",
            description=paper_summary,
        ),
    )
    all_tools.append(doc_tool)

In [23]:
# define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex

obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex,
)

top_agent = ReActAgent.from_tools(
    max_iterations=100,
    tool_retriever=obj_index.as_retriever(similarity_top_k=3),
    system_prompt=""" \
You are an agent designed to answer queries about a set of given paper.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\

""",
    verbose=True,
)


In [24]:
response = top_agent.query("Tell me about ReDel: A Toolkit for LLM-Powered Recursive Multi-Agent Systems")

> Running step fef80232-14d6-49a2-b4d6-1f0c056bc84d. Step input: Tell me about ReDel: A Toolkit for LLM-Powered Recursive Multi-Agent Systems
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: tool_ReDel
Action Input: {'input': 'ReDel: A Toolkit for LLM-Powered Recursive Multi-Agent Systems', 'num_beams': 5}
[0m[1;3;34mObservation: Error: No such tool named `tool_ReDel`.
[0m> Running step d16350ac-b131-4792-b4a1-19c7d65ee9cf. Step input: None
[1;3;38;5;200mThought: The user pointed out that there is no such tool as `tool_ReDel`. I need to use a different tool.
Action: tool_A
Action Input: {'input': 'ReDel: A Toolkit for LLM-Powered Recursive Multi-Agent Systems', 'num_beams': 5}
[0m[1;3;34mObservation: Error: No such tool named `tool_A`.
[0m> Running step 1f2c156b-b5c5-4b5f-bd51-91afe1659555. Step input: None
[1;3;38;5;200mThought: The user pointed out that there is no such tool as `tool_A`. I need 

In [25]:

base_index = VectorStoreIndex(all_nodes)
base_query_engine = base_index.as_query_engine(similarity_top_k=4)

In [26]:
response = base_query_engine.query("Tell me about ReDel: A Toolkit for LLM-Powered Recursive Multi-Agent Systems")

In [27]:
response.response

"ReDel is a toolkit for working with recursive multi-agent systems that supports custom tool-use, delegation schemes, event-based logging, and interactive replay in an easy-to-use web interface. It allows developers to quickly build, iterate on, and run experiments involving dynamic multi-agent systems. The toolkit is fully open-source and free to use under the MIT license.\n\nReDel provides a modular interface to create tools for agents to use, an event framework to instrument experiments for later analysis, and a web interface to interact with and explore developer-defined systems. It also offers best-in-class support for system visualization and modern LLMs with tool usage.\n\nThe toolkit has been used to demonstrate recursive multi-agent systems' performance on three diverse benchmarks: FanOutQA, TravelPlanner, and WebArena. The results show that ReDel's recursive delegation system significantly outperforms its corresponding single-agent baseline across all benchmarks, even surpass

In [3]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)

Settings.llm = Ollama(model="llama3:latest", request_timeout=120.0)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

from llama_index.core.tools import QueryEngineTool, ToolMetadata
try:
    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/lyft"
    )
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/uber"
    )
    uber_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

if not index_loaded:
    # load data
    lyft_docs = SimpleDirectoryReader(
        input_files=["./data/10k/lyft_2021.pdf"]
    ).load_data()
    uber_docs = SimpleDirectoryReader(
        input_files=["./data/10k/uber_2021.pdf"]
    ).load_data()

    # build index
    lyft_index = VectorStoreIndex.from_documents(lyft_docs)
    uber_index = VectorStoreIndex.from_documents(uber_docs)

    # persist index
    lyft_index.storage_context.persist(persist_dir="./storage/lyft")
    uber_index.storage_context.persist(persist_dir="./storage/uber")

invalid pdf header: b'\xfd7zXZ'
incorrect startxref pointer(1)
invalid pdf header: b'\xfd7zXZ'
incorrect startxref pointer(1)


In [20]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)
uber_engine = uber_index.as_query_engine(similarity_top_k=3)
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description=(
                "Provides information about Lyft financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description=(
                "Provides information about Uber financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
]

In [21]:

from llama_index.core.agent import ReActAgent

agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=Settings.llm,
    verbose=True,
    # context=context
)

In [22]:

response = agent.chat("What was Lyft's revenue growth in 2021?")
print(str(response))

> Running step e1b5553b-b1df-4aeb-a358-d8bba1264489. Step input: What was Lyft's revenue growth in 2021?
[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: lyft_10k
Action Input: {'input': "Lyft's revenue growth in 2021"}
[0m[1;3;34mObservation: Lyft's revenue reached an all-time high in the three months ended December 31, 2021, increasing compared to the previous quarter. This was driven by an increase in ride frequency and a shift toward higher revenue rides, as well as revenues from licensing and data access agreements.
[0m> Running step 116365d0-e809-4e65-aee9-4b2b14b9b925. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: Lyft experienced significant revenue growth in 2021, specifically in the last quarter of the year, due to increased ride frequency and a shift towards higher-revenue rides, as well as revenues from lice