# Multimodal RAG — Describe-then-Embed

Flow:
- **Image**: Gemini describe → Jina embed text → ChromaDB
- **Document** (PDF, Word, Markdown, plain text): extract text → chunk → Jina embed → ChromaDB
- **Query**: Jina embed query → ChromaDB search → Gemini generate answer

## 1. Setup & Config

In [None]:
import os
from pathlib import Path
from IPython.display import display, Image, Markdown

from app.config import settings
from app.engine import (
    describe_image,
    extract_file_text,
    chunk_text,
    ingest_texts,
    query_rag,
    get_chroma_collection,
)

os.makedirs(settings.data_dir, exist_ok=True)
print(f"Data dir: {settings.data_dir}")
print(f"Chroma dir: {settings.chroma_dir}")
print(f"Collection: {settings.chroma_collection}")
print(f"Chunk size: {settings.chunk_size}, overlap: {settings.chunk_overlap}")
print("Setup done!")

## 2. Ingest Image

Image → Gemini describe → text description → Jina embed → ChromaDB

In [None]:
# Change this to your image path
IMAGE_PATH = "data/images/your-image.png"

DESCRIBE_PROMPT = "Describe this image for use in a RAG (Retrieval-Augmented Generation) system. Focus on the key information, concepts, and facts presented. Avoid describing visual styling like colors, fonts, or layout. Be concise and informative."

In [None]:
# 1. Describe image with Gemini
print("Describing image with Gemini...")
description = describe_image(IMAGE_PATH, prompt=DESCRIBE_PROMPT)
print(f"\nDescription:\n{description}")

# 2. Preview image
display(Image(filename=IMAGE_PATH, width=400))

In [None]:
# 3. Embed & store description to ChromaDB
ids = ingest_texts(
    texts=[description],
    metadatas=[{
        "source": IMAGE_PATH,
        "original_filename": Path(IMAGE_PATH).name,
        "type": "image",
    }],
)
print(f"Ingested! Node ID: {ids[0]}")

## 3. Ingest Document

Supported formats: **PDF**, **Word (.docx)**, **Markdown (.md)**, **Plain text (.txt)**

Document → extract text → chunk → Jina embed → ChromaDB

In [None]:
# Change this to your document path (supports .pdf, .docx, .md, .txt)
DOC_PATH = "data/docs/your-file.pdf"

In [None]:
# 1. Extract text from document
print(f"Extracting text from {DOC_PATH}...")
doc_text = extract_file_text(DOC_PATH)
print(f"Extracted {len(doc_text)} characters")
print(f"\nPreview (500 chars):\n{doc_text[:500]}")

In [None]:
# 2. Chunk text
chunks = chunk_text(doc_text)
print(f"Total chunks: {len(chunks)}")
for i, chunk in enumerate(chunks[:3]):
    print(f"\n--- Chunk {i} ({len(chunk)} chars) ---")
    print(chunk[:200] + "...")

In [None]:
# 3. Embed & store all chunks to ChromaDB
doc_name = Path(DOC_PATH).name
doc_type = Path(DOC_PATH).suffix.lstrip(".").lower()
metadatas = [
    {
        "source": DOC_PATH,
        "original_filename": doc_name,
        "type": doc_type,
        "chunk_index": i,
    }
    for i in range(len(chunks))
]

print(f"Ingesting {len(chunks)} chunks...")
ids = ingest_texts(texts=chunks, metadatas=metadatas)
print(f"Done! Ingested {len(ids)} chunks")
for i, node_id in enumerate(ids[:3]):
    print(f"  Chunk {i}: {node_id}")

## 4. Query

Query → Jina embed → ChromaDB search → Gemini generate answer

In [None]:
QUESTION = "your question here"
TOP_K = 2

PROMPT = """Based on the following context, answer the question.

Context:
{context}

Question: {question}

Answer:"""

In [None]:
result = query_rag(QUESTION, top_k=TOP_K, prompt=PROMPT)

display(Markdown(f"### Answer\n\n{result['answer']}"))

print(f"\n--- Sources ({len(result['sources'])}) ---")
for src in result["sources"]:
    score = 1 - src['score']  # cosine distance → similarity
    print(f"  [similarity: {score:.4f}] {src['metadata'].get('original_filename', 'N/A')} ({src['metadata'].get('type', '?')})")
    print(f"           {src['text_preview'][:100]}...")

## 5. Utils

List documents, delete, reset DB

In [None]:
# List all documents in ChromaDB
collection = get_chroma_collection()
data = collection.get()

print(f"Total documents: {len(data['ids'])}")
for i, (doc_id, meta) in enumerate(zip(data["ids"], data["metadatas"])):
    print(f"  {i}. [{doc_id[:8]}...] {meta.get('original_filename', 'N/A')} ({meta.get('type', '?')})")

In [None]:
# Delete documents by ID
# DELETE_IDS = ["id-to-delete"]
# collection = get_chroma_collection()
# collection.delete(ids=DELETE_IDS)
# print(f"Deleted {len(DELETE_IDS)} documents")

In [None]:
# Reset: delete all data in collection
import chromadb
client = chromadb.PersistentClient(path=settings.chroma_dir)
client.delete_collection(settings.chroma_collection)
print("Collection deleted. Run setup cell again to recreate.")