In [2]:
from helper import get_openai_api_key
OPENAI_API_KEY = get_openai_api_key()


In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
#!wget "https://openreview.net/pdf?id=VtmBAGCN7o" -O metagpt.pdf

--2024-05-11 04:14:16--  https://openreview.net/pdf?id=VtmBAGCN7o
Resolving openreview.net (openreview.net)... 35.184.86.251
Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16911937 (16M) [application/pdf]
Saving to: ‘metagpt.pdf’


2024-05-11 04:14:18 (22.4 MB/s) - ‘metagpt.pdf’ saved [16911937/16911937]



## Load Data 

In [5]:
from llama_index.core import SimpleDirectoryReader

#Load docs
documents = SimpleDirectoryReader(input_files = ["metagpt.pdf"]).load_data()

## Define LLM and embedding model

In [6]:
from llama_index.core.node_parser import SentenceSplitter
splitter = SentenceSplitter(chunk_size = 1024)
nodes = splitter.get_nodes_from_documents(documents)

In [7]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model = "gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

## Define Summary and vector index on the same data

In [8]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)


## Define Query engines and set metadata

In [9]:
summary_query_engine = summary_index.as_query_engine(
    response_mode = "tree_summarize",
    use_async=True,
)

vector_query_engine = vector_index.as_query_engine()


In [10]:
from llama_index.core.tools import QueryEngineTool

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description = (
        "Useful for summarization questions related to MetaGPT"
    ),
)
vector_tool = QueryEngineTool.from_defaults(
    query_engine = vector_query_engine,
    description = (
        "Useful for retrieving specific context from the MetaGPT paper."
    ),
)

## Define Router query engine

In [12]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector = LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [13]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: Useful for summarization questions related to MetaGPT.
[0mThe document introduces MetaGPT, a meta-programming framework for multi-agent collaboration based on Large Language Models (LLMs). It emphasizes role specialization, workflow management, and efficient sharing mechanisms to enhance problem-solving capabilities in software development projects. MetaGPT incorporates Standardized Operating Procedures (SOPs) to streamline workflows, improve code generation quality, and achieve state-of-the-art performance in evaluations. It models a group of agents as a simulated software company, assigning specialized roles and utilizing structured communication interfaces. The document also discusses the performance evaluation of GPT models, ethical concerns, and the benefits of MetaGPT in enabling natural language programming and ensuring transparency, accountability, privacy, and data security.


In [17]:
print(len(response.source_nodes))

34


In [18]:
response = query_engine.query(
    "how do agents share information with other agents?")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: This choice is more relevant as it specifically mentions retrieving specific context from the MetaGPT paper, which would likely include information on how agents share information..
[0mAgents share information with other agents by utilizing a shared message pool where they can publish structured messages. This shared message pool allows all agents to exchange messages directly, enabling them to both publish their own messages and access messages from other agents transparently. Agents can retrieve required information directly from this shared pool, eliminating the need to inquire about other agents and wait for their responses, thus enhancing communication efficiency.


## Eveyrthing together

In [19]:
from utils import get_router_query_engine
query_engine = get_router_query_engine("metagpt.pdf")


In [20]:
response = query_engine.query("Tell me about the ablation study results?")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: The ablation study results are specific context from the MetaGPT paper, so choice 2 is most relevant..
[0mThe ablation study results provide insights into the impact of different components or features of a system by systematically removing them and observing the effects on the overall performance. This method helps in understanding the contributions of individual elements towards the system's functionality or effectiveness.
