<a href="https://colab.research.google.com/github/vinaykrshnn-git2026/advanced-rag/blob/main/10_rag_refactored_colab_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Advanced RAG refactored (Search and Augment)

Refer to 08_multimodal_pdf.ipynb for embedding and collection creation code


In [None]:
!pip install -q -r requirement_rag_refactored.txt

## Initialise Copali and Qdrant

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http.exceptions import UnexpectedResponse
from colpali_engine.models import ColPali, ColPaliProcessor
from google.colab import userdata
import torch

#####################################################################
#   Initializing Cloud Qdrant collection
#####################################################################


from qdrant_client import QdrantClient

# Replace these with your actual Cloud credentials
QDRANT_URL = "https://f7369634-b961-4d15-ba60-8b230e810658.us-east4-0.gcp.cloud.qdrant.io"

try:
    # Initialize the Cloud Client
    qdrant_client = QdrantClient(
        url=QDRANT_URL,
        api_key=userdata.get('QDRANT_API_KEY'),
    )
    print("Connected to Qdrant Cloud!")
except Exception as e:
    print(f"Cloud connection failed: {e}")



#####################################################################
#   Initializing Qdrant collection from G Drive
#####################################################################


# Initialize ColPali model and processor
model_name = (
    "vidore/colpali-v1.2"  # Use the latest version available
)
colpali_model = ColPali.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="cuda:0",  # Use "cuda:0" for GPU, "cpu" for CPU, or "mps" for Apple Silicon
)
colpali_processor = ColPaliProcessor.from_pretrained(
    "vidore/colpaligemma-3b-pt-448-base"
)


## Retrieval and augmentation using Copali and gpt-4o

In [None]:

##### Full pipeline search and augment to get result


import torch
from qdrant_client import models
from openai import OpenAI
from google.colab import userdata

def run_visual_rag_full(query_text, colpali_model, colpali_processor, qdrant_client):
    """
    Full pipeline:
    1. Embed query (ColPali)
    2. Search Qdrant (Multi-vector + Quantization)
    3. Augment with GPT-4o Vision
    """

    # --- STEP 1: ENCODE THE TEXT QUERY ---
    with torch.no_grad():
        # Process the query through ColPali
        batch_query = colpali_processor.process_queries([query_text]).to(colpali_model.device)
        # Convert the multi-vector output to a list of lists for Qdrant
        query_embeddings = colpali_model(**batch_query).cpu().float().numpy().tolist()[0]

    # --- STEP 2: SEARCH QDRANT CLOUD ---
    # Using query_points to handle the Scalar Quantization and Multi-vector configuration
    try:
        search_result = qdrant_client.query_points(
            collection_name="identity_documents",
            query=query_embeddings,
            limit=1,
            with_payload=True # Crucial to retrieve the 'base64_image' from disk
        )
    except Exception as e:
        print(f"Search failed: {e}")
        return None

    if not search_result.points:
        print("No matches found.")
        return None

    # Extract top match and payload
    top_hit = search_result.points[0]
    base64_image = top_hit.payload.get('base64_image')
    metadata = {
        "doc": top_hit.payload.get('doc'),
        "page": top_hit.payload.get('page')
    }

    print(f"Match found! Source: {metadata['doc']}, Page: {metadata['page']}")

    # --- STEP 3: AUGMENT WITH GPT-4o VISION ---
    # Initializing OpenAI client using your stored key
    openai_client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))

    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant that answers questions based on the provided image context."
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": f"Question: {query_text}"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{base64_image}" # Using png as per your upsert format
                        },
                    },
                ],
            }
        ],
        max_tokens=500,
    )

    return {
        "answer": response.choices[0].message.content,
        "metadata": metadata
    }

# --- EXECUTION ---
query = "How old is Kaira"
result = run_visual_rag_full(query, colpali_model, colpali_processor, qdrant_client)

if result:
    print("\n--- GPT-4o Response ---")
    print(result['answer'])
