# Multimodal Image Search Demo

This notebook demonstrates:
1. Basic search using CLIP
2. Hybrid search using CLIP + BLIP-2 for better accuracy

## Setup

In [None]:
from src.dataset import Flickr30KDataset
from src.encoder import CLIPEncoder
from src.index import FAISSIndex
from src.search import SearchEngine
from src.reranker import BLIP2Reranker
from src.hybrid_search import HybridSearchEngine
import matplotlib.pyplot as plt

## Load Components

In [None]:
# Load dataset
dataset = Flickr30KDataset('data/images', 'data/results.csv')

# Load encoder
encoder = CLIPEncoder()

# Load indices
image_index = FAISSIndex()
image_index.load('data/image_index.faiss')

text_index = FAISSIndex()
text_index.load('data/text_index.faiss')

# Create search engine
engine = SearchEngine(encoder, image_index, text_index, dataset)
print("Ready!")

## Text-to-Image Search

In [None]:
# Search for images
query = "a dog playing in the park"
results = engine.text_to_image(query, k=5)

print(f"Query: '{query}'\n")
for i, (img_name, score) in enumerate(results, 1):
    print(f"{i}. {img_name} (score: {score:.4f})")

In [None]:
# Visualize results
fig, axes = plt.subplots(1, 5, figsize=(15, 3))
for i, (img_name, score) in enumerate(results):
    img = dataset.get_image(img_name)
    axes[i].imshow(img)
    axes[i].axis('off')
    axes[i].set_title(f'{score:.3f}')
plt.tight_layout()
plt.show()

## Image-to-Text Search

In [None]:
# Use first result from above
test_image = results[0][0]
captions = engine.image_to_text(f'data/images/{test_image}', k=5)

print(f"Image: {test_image}\n")
for i, (caption, score) in enumerate(captions, 1):
    print(f"{i}. {caption}")
    print(f"   Score: {score:.4f}\n")

## Image-to-Image Search

In [None]:
# Find similar images
similar = engine.image_to_image(f'data/images/{test_image}', k=6)

print(f"Query image: {test_image}\n")
for i, (img_name, score) in enumerate(similar, 1):
    print(f"{i}. {img_name} (score: {score:.4f})")

In [None]:
# Visualize similar images
fig, axes = plt.subplots(1, 6, figsize=(18, 3))
for i, (img_name, score) in enumerate(similar):
    img = dataset.get_image(img_name)
    axes[i].imshow(img)
    axes[i].axis('off')
    axes[i].set_title(f'{score:.3f}')
plt.tight_layout()
plt.show()

## Hybrid Search (CLIP + BLIP-2)

Hybrid search uses two stages:
1. CLIP retrieves top 50 candidates (fast)
2. BLIP-2 re-ranks them to find best 5 (accurate)

In [None]:
# Load BLIP-2 re-ranker
print("Loading BLIP-2...")
reranker = BLIP2Reranker()

# Create hybrid search engine
hybrid_engine = HybridSearchEngine(encoder, image_index, dataset, reranker)
print("Ready")

In [None]:
# Compare CLIP-only vs Hybrid search
query = "children playing soccer"

print("=== CLIP Only (single stage) ===")
clip_results = engine.text_to_image(query, k=5)
for i, (img_name, score) in enumerate(clip_results, 1):
    print(f"{i}. {img_name} (score: {score:.4f})")

print("\n=== Hybrid (CLIP + BLIP-2) ===")
hybrid_results = hybrid_engine.search(query, k1=50, k2=5)
for i, (img_name, score) in enumerate(hybrid_results, 1):
    print(f"{i}. {img_name} (score: {score:.4f})")

In [None]:
# Visualize comparison
fig, axes = plt.subplots(2, 5, figsize=(15, 6))

# Top row: CLIP results
for i, (img_name, score) in enumerate(clip_results):
    img = dataset.get_image(img_name)
    axes[0, i].imshow(img)
    axes[0, i].axis('off')
    axes[0, i].set_title(f'CLIP: {score:.3f}')

# Bottom row: Hybrid results  
for i, (img_name, score) in enumerate(hybrid_results):
    img = dataset.get_image(img_name)
    axes[1, i].imshow(img)
    axes[1, i].axis('off')
    axes[1, i].set_title(f'Hybrid: {score:.3f}')

plt.suptitle(f"Query: '{query}'")
plt.tight_layout()
plt.show()

## Try Your Own Query

In [None]:
# Change this to test different queries
my_query = "a black dog running"

results = hybrid_engine.search(my_query, k1=100, k2=5)

fig, axes = plt.subplots(1, 5, figsize=(15, 3))
for i, (img_name, score) in enumerate(results):
    img = dataset.get_image(img_name)
    axes[i].imshow(img)
    axes[i].axis('off')
    axes[i].set_title(f'{score:.3f}')
plt.suptitle(f"Hybrid Search: '{my_query}'")
plt.tight_layout()
plt.show()