In [None]:
!pip install -q llama-index pypdf
!pip install -q llama-index-embeddings-gemini
!pip install -q llama-index-llms-gemini

In [None]:
from pathlib import Path
import os
from google.colab import userdata
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.storage import StorageContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.core import load_index_from_storage

In [None]:
API_KEY = userdata.get('geminiapikey')

In [None]:
pdf_directory = ['/content/part-1.pdf','/content/part-2.pdf']

In [None]:
persist_dir = "./storage"

In [None]:
chunk_size = 1024

In [None]:
Path(persist_dir).mkdir(exist_ok=True)

In [None]:
Settings.embed_model = GeminiEmbedding(
    model_name="models/embedding-001", api_key=API_KEY
)

In [None]:
Settings.llm = Gemini(api_key=API_KEY, model_name="models/gemini-2.5-flash")
Settings.text_splitter = SentenceSplitter(chunk_size=chunk_size)
Settings.chunk_size = chunk_size

In [None]:
def load_or_create_index():
    """Load existing index or create new one if it doesn't exist"""
    if not os.listdir(persist_dir):
        print("Creating new index...")
        # Load PDF documents
        documents = SimpleDirectoryReader(input_files=pdf_directory).load_data()

        # Create and persist index
        index = VectorStoreIndex.from_documents(
            documents, show_progress=True
        )
        index.storage_context.persist(persist_dir=persist_dir)
    else:
        print("Loading existing index...")
        storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
        index = load_index_from_storage(storage_context)

    return index

In [None]:
index = load_or_create_index()

In [None]:
def query_pdfs(question):
    """Query the PDF knowledge base"""
    query_engine = index.as_query_engine(
        similarity_top_k=3,
        response_mode="compact",
        verbose=True
    )
    response = query_engine.query(question)
    return response

In [None]:
response = query_pdfs("What challenges do the heroes face on their journey to recover the Crystal of Lumina?")
print(response)