In [None]:
# ! pip install -qU llama-index-llms-gemini llama-index llama-index-embeddings-gemini

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.4/40.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.5/264.5 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.3/302.3 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# note: this only works when run in colab!
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

In [3]:
from llama_index.llms.gemini import Gemini
llm = Gemini(
    model="models/gemini-1.5-flash",
    api_key=GOOGLE_API_KEY,
)

In [4]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
import os

In [5]:
source_fd = "10k_data"
input_docs = os.listdir(source_fd)
input_docs

['apple_2021.pdf',
 'google_2021.pdf',
 'jpmc_2021.pdf',
 'meta_2021.pdf',
 'costco_2021.pdf',
 'amazon_2021.pdf',
 'doordash_2021.pdf']

In [6]:
doc_map = {}
for doc in input_docs:
    data = SimpleDirectoryReader(
        input_files=[f"{source_fd}/{doc}"]
    ).load_data()
    doc_map[doc] = data

In [7]:
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core import Settings

gemini_embedding_model = GeminiEmbedding(api_key=GOOGLE_API_KEY, model_name="models/embedding-001")
Settings.embed_model = gemini_embedding_model

In [8]:
index_storage_folders = "10k_data_indexes"
index_map = {}
for doc in doc_map:
    try:
        storage_context = StorageContext.from_defaults(
            persist_dir=f"./{index_storage_folders}/{doc}"
        )
        vector_index = load_index_from_storage(storage_context)
    except:
        vector_index = VectorStoreIndex.from_documents(doc_map[doc], show_progress=True)
    index_map[doc] = vector_index
    vector_index.storage_context.persist(persist_dir=f"./{index_storage_folders}/{doc}")

Parsing nodes:   0%|          | 0/82 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/105 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/134 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/154 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/393 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/620 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/193 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/225 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/81 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/81 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/194 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/234 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/215 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/255 [00:00<?, ?it/s]

In [10]:
engines = {}
for index in index_map:
    engines[index] = index_map[index].as_query_engine(similarity_top_k=3, llm=llm)

In [11]:
from llama_index.core.llms import ChatMessage

In [16]:
for engine in engines:
  print(engine.split("_")[0])

apple
google
jpmc
meta
costco
amazon
doordash


In [21]:
tools = []
for engine in engines:
    name = engine.split("_")[0]
    qetool = QueryEngineTool(
        query_engine = engines[engine],
        metadata=ToolMetadata(
            name=engine,
            description=(
                f"2021 10k for {name}. "
                "Use a detailed plain text question as input to the llm."
            )
        )
    )
    tools.append(qetool)

In [23]:
agent = ReActAgent.from_tools(
    tools,
    llm=llm,
    verbose=True,
    max_turns=10,
)

In [35]:
prompt = "Compare and contrast the 10k information for Apple and JPMC."

In [36]:
response = agent.chat(prompt)
print(str(response))

> Running step e3731dcb-9958-4860-8db9-5f7cc979f47a. Step input: Compare and contrast the 10k information for Apple and JPMC.
[1;3;38;5;200mThought: I need to use the apple_2021.pdf and jpmc_2021.pdf tools to extract relevant financial information for comparison.
Action: apple_2021.pdf
Action Input: {'input': "What was Apple's total revenue, net income, and total assets in 2021?"}
[0m[1;3;34mObservation: Apple's total net sales (revenue) in 2021 were $365,817 million.  Their net income was $94,680 million.  The provided text does not contain information on total assets.

[0m> Running step 5fb80c3d-0afd-4300-90ad-38b02eb33af7. Step input: None
[1;3;38;5;200mThought: I need to use the jpmc_2021.pdf tool to extract relevant financial information for JPMC.
Action: jpmc_2021.pdf
Action Input: {'input': "What were JPMC's total revenue, net income, and total assets in 2021?"}
[0m[1;3;34mObservation: In 2021, JPMC's total net revenue was $121,649 million, its net income was $48,334 mill