**This is the VLM which works on the dataset related to majorly animals and some normal things like car etc**

In [None]:
#pip install transformers pillow faiss-cpu numpy torch gradio torchvision --quiet

import os
import numpy as np
from PIL import Image, ImageFile
from transformers import CLIPProcessor, CLIPModel
import faiss
import torch
from tqdm import tqdm
import gradio as gr
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

ImageFile.LOAD_TRUNCATED_IMAGES = True

class VisualSearchEngine:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f" Initializing on {self.device.upper()} device")


        try:
            self.model = CLIPModel.from_pretrained(
                "openai/clip-vit-base-patch32",
                device_map="auto"
            ).to(self.device).eval()
            self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
            print("Model loaded successfully!")
        except Exception as e:
            raise RuntimeError(f"Model loading failed: {e}")

        self.image_paths = []
        self.index = None

    def prepare_dataset(self):
        """Download and prepare the CIFAR-10 dataset subset"""
        print("Downloading CIFAR-10 dataset...")
        try:
            os.makedirs("cifar10_images", exist_ok=True)

            transform = transforms.Compose([
                transforms.Resize((128, 128)),
                transforms.ToTensor()
            ])

            cifar10 = datasets.CIFAR10(
                root='./data',
                train=True,
                download=True,
                transform=transform
            )

            for idx in tqdm(range(500), desc=" Saving images"):
                image, _ = cifar10[idx]
                img = transforms.ToPILImage()(image)
                img.save(f"cifar10_images/{idx:04d}.jpg", quality=95)

            print("CIFAR-10 subset prepared (500 images)")
            return "cifar10_images"

        except Exception as e:
            print(f"Using fallback images: {e}")
            self.create_fallback_images()
            return "fallback_images"

    def create_fallback_images(self):
        """Generate fallback images if dataset download fails"""
        os.makedirs("fallback_images", exist_ok=True)

        colors = ['red', 'blue', 'green']
        for i, color in enumerate(colors):
            img = Image.new('RGB', (128, 128), color)
            img.save(f'fallback_images/example{i+1}.jpg')

    def build_index(self, image_folder):
        """Build FAISS index with error handling"""
        print("Building search index...")

        self.image_paths = sorted([
            os.path.join(image_folder, f)
            for f in os.listdir(image_folder)
            if f.lower().endswith(('.jpg', '.jpeg', '.png'))
        ][:500])

        if not self.image_paths:
            raise RuntimeError("No valid images found in directory")

        embeddings = []
        valid_paths = []

        for path in tqdm(self.image_paths, desc="Processing images"):
            try:
                with Image.open(path).convert("RGB") as img:
                    inputs = self.processor(
                        images=img,
                        return_tensors="pt"
                    ).to(self.device)

                    with torch.no_grad():
                        embedding = self.model.get_image_features(**inputs)

                    embeddings.append(embedding.cpu().numpy())
                    valid_paths.append(path)
            except Exception as e:
                print(f"Skipped {os.path.basename(path)}: {str(e)}")

        if not embeddings:
            raise RuntimeError("No valid embeddings generated")

        embeddings = np.vstack(embeddings).astype('float32')
        faiss.normalize_L2(embeddings)
        self.index = faiss.IndexFlatIP(embeddings.shape[1])
        self.index.add(embeddings)
        self.image_paths = valid_paths
        print(f"Search index ready with {len(self.image_paths)} images")

    def search(self, query, top_k=5):
        """Unified search for both text and image queries"""
        try:

            if isinstance(query, (str, np.ndarray, Image.Image)):
                embedding = self._get_query_embedding(query)
            else:
                raise ValueError("Invalid query type")

            if embedding is None:
                return []

            embedding = embedding.astype('float32')
            faiss.normalize_L2(embedding)
            _, indices = self.index.search(embedding, top_k)

            return [self.image_paths[i] for i in indices[0] if i < len(self.image_paths)]

        except Exception as e:
            print(f"Search error: {e}")
            return []

    def _get_query_embedding(self, query):
        """Generate embedding for different query types"""
        try:
            if isinstance(query, str):
                if os.path.isfile(query):
                    with Image.open(query) as img:
                        return self._embed_image(img)
                return self._embed_text(query)

            if isinstance(query, np.ndarray):
                query = Image.fromarray(query)

            if isinstance(query, Image.Image):
                return self._embed_image(query)

            raise ValueError("Unsupported query type")

        except Exception as e:
            print(f"Embedding error: {e}")
            return None

    def _embed_image(self, image):
        """Generate image embeddings"""
        inputs = self.processor(
            images=image.convert("RGB"),
            return_tensors="pt"
        ).to(self.device)
        with torch.no_grad():
            return self.model.get_image_features(**inputs).cpu().numpy()

    def _embed_text(self, text):
        """Generate text embeddings"""
        inputs = self.processor(
            text=text,
            return_tensors="pt",
            padding=True,
            truncation=True
        ).to(self.device)
        with torch.no_grad():
            return self.model.get_text_features(**inputs).cpu().numpy()

def create_gui(engine):
    """Create Gradio interface with improved layout"""
    with gr.Blocks(title="Visual Search Engine", theme="soft") as interface:
        gr.Markdown("#  Visual Search Engine")

        with gr.Tab("Text Search"):
            gr.Markdown("###  Search using text description")
            with gr.Row():
                text_input = gr.Textbox(label="Search Query",
                                       placeholder="Enter text (e.g., 'red car', 'animal')")
                text_search = gr.Button("Search", variant="primary")
            gr.Examples(["red truck", "bird flying", "green frog"],
                       inputs=text_input)
            text_output = gr.Gallery(label="Results", columns=5)

        with gr.Tab("Image Search"):
            gr.Markdown("### Search using image")
            with gr.Row():
                image_input = gr.Image(label="Upload Image", type="pil")
                image_search = gr.Button("Search", variant="primary")
            gr.Examples(["fallback_images/example1.jpg"],
                       inputs=image_input)
            image_output = gr.Gallery(label="Results", columns=5)

        text_search.click(
            lambda q: engine.search(q),
            inputs=text_input,
            outputs=text_output
        )
        image_search.click(
            lambda img: engine.search(img),
            inputs=image_input,
            outputs=image_output
        )

    return interface

def main():
    engine = VisualSearchEngine()
    image_dir = engine.prepare_dataset()
    engine.build_index(image_dir)
    create_gui(engine).launch(share=True)

if __name__ == "__main__":
    print("Starting Visual Search Engine...")
    main()

Starting Visual Search Engine...
 Initializing on CPU device


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Model loaded successfully!
Downloading CIFAR-10 dataset...


 Saving images: 100%|██████████| 500/500 [00:00<00:00, 546.85it/s]


CIFAR-10 subset prepared (500 images)
Building search index...


Processing images: 100%|██████████| 500/500 [02:39<00:00,  3.13it/s]


Search index ready with 500 images
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a1d13082d72f5d292d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
