# **Etapa 1 - Buscador de Imagenes por Similitud**
-----

In [None]:
import os
import faiss
import numpy as np
import gradio as gr
from tqdm.notebook import tqdm
from collections import Counter
from sklearn.metrics import ndcg_score
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

# Ruta a la carpeta de imágenes del dataset de perros
dataset_dir = "../dog-images/train" 

# Obtener todos los paths de imágenes recursivamente
image_files = []
for root, dirs, files in os.walk(dataset_dir):
    for fname in files:
        if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_files.append(os.path.join(root, fname))

# Cargar modelo pre-entrenado (sin la capa top)
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Extraer embeddings
embeddings = []
for img_path in tqdm(image_files, desc="Extrayendo embeddings"):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x, verbose=0)
    embeddings.append(features[0])

embeddings = np.array(embeddings).astype('float32')

# Crear índice FAISS
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

print(f"Indexadas {len(embeddings)} imágenes en la base de datos vectorial.")

2025-06-22 11:15:22.036843: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2025-06-22 11:15:22.036869: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-06-22 11:15:22.036873: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-06-22 11:15:22.036888: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-06-22 11:15:22.036900: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-06-22 11:15:24.614964: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Indexadas 7946 imágenes en la base de datos vectorial.


In [None]:
def extraer_raza(path):
    # Extrae la raza del path, asumiendo estructura ../archive/train/raza/imagen.jpg
    return os.path.basename(os.path.dirname(path))

def buscar_similares(img):
    # Preprocesar imagen de entrada
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    # Extraer embedding
    features = model.predict(x, verbose=0)
    query_vec = features.astype('float32')

    # Buscar las 10 imágenes más similares
    D, I = index.search(query_vec, 10)
    # Recuperar paths de imágenes similares
    similares = [image_files[i] for i in I[0]]
    # Extraer razas de los resultados
    razas = [extraer_raza(p) for p in similares]
    
    # Voto mayoritario
    raza_predicha = Counter(razas).most_common(1)[0][0]
    # Cargar imágenes para mostrar
    imgs_resultado = [img] + [image.load_img(p, target_size=(224, 224)) for p in similares]
    return imgs_resultado, f"Raza predicha: {raza_predicha}"

# Interfaz Gradio
gr.Interface(
    fn=buscar_similares,
    inputs=gr.Image(type="pil", label="Subí una imagen de perro"),
    outputs=[gr.Gallery(label="Imagen de consulta y 10 más similares"), gr.Textbox(label="Raza predicha")],
    title="Buscador de Perros Similares",
    description="Subí una imagen de perro y encuentra las 10 más parecidas en el dataset. Se predice la raza por voto mayoritario."
).launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




In [None]:
# Obtener paths de test
test_dir = "../dog-images/test"
test_files = []

for root, dirs, files in os.walk(test_dir):
    for fname in files:
        if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
            test_files.append(os.path.join(root, fname))

def ndcg_para_imagen(img_path):
    # Imagen de test
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x, verbose=0)
    query_vec = features.astype('float32')

    # Buscar 10 más similares
    D, I = index.search(query_vec, 10)
    similares = [image_files[i] for i in I[0]]

    # Relevancia: 1 si la raza coincide, 0 si no
    raza_gt = extraer_raza(img_path)
    relevancias = [1 if extraer_raza(p) == raza_gt else 0 for p in similares]

    # NDCG@10
    return ndcg_score([relevancias], [list(1-np.array(D[0])/np.max(D[0]))])  # Score alto si los más similares son de la misma raza

# Calcular NDCG@10 promedio en el set de test
ndcgs = [ndcg_para_imagen(p) for p in test_files]
print(f"NDCG@10 promedio en test: {np.mean(ndcgs):.4f}")

Created dataset file at: .gradio/flagged/dataset1.csv
NDCG@10 promedio en test: 0.9481
