In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import numpy as np
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Subset
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
import joblib
import os
from PIL import Image
from loguru import logger

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define paths
save_path = "/content/drive/MyDrive/saved"
os.makedirs(save_path, exist_ok=True)

# Load CIFAR-10
dataset = CIFAR10(root='./data', train=True, download=True)
images = [dataset[i][0] for i in range(50000)]
labels = [dataset[i][1] for i in range(50000)]
labels = np.array(labels)

split = int(0.7 * len(images))
x_train, y_train = images[:split], labels[:split]
x_test, y_test = images[split:], labels[split:]

# Preprocessing
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Feature extraction
def extract_features(image_list):
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    model = torch.nn.Sequential(*list(model.children())[:-1])
    model.eval().to(device)

    features = []
    with torch.no_grad():
        for img in image_list:
            img_tensor = preprocess(img).unsqueeze(0).to(device)
            feat = model(img_tensor).squeeze().cpu().numpy()
            features.append(feat)
    return np.array(features)

train_features = extract_features(x_train)
test_features = extract_features(x_test)

# Scaling
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_features)
test_scaled = scaler.transform(test_features)

# Classifier
clf = MLPClassifier(hidden_layer_sizes=(512, 256), max_iter=5, verbose=True)
clf.fit(train_scaled, y_train)

# Save files
joblib.dump(clf, f'{save_path}/mlp_cifar10.pkl')
joblib.dump(scaler, f'{save_path}/scaler.pkl')
np.save(f'{save_path}/train_features.npy', train_features)
np.save(f'{save_path}/train_labels.npy', y_train)
np.save(f'{save_path}/test_features.npy', test_features)
np.save(f'{save_path}/test_labels.npy', y_test)

train_images = np.stack([np.array(img) for img in x_train])
test_images = np.stack([np.array(img) for img in x_test])
np.save(f'{save_path}/train_images.npy', train_images)
np.save(f'{save_path}/test_images.npy', test_images)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
import joblib
import random
from collections import Counter

class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

# Load saved files
base_path = "/content/drive/MyDrive/saved"
clf = joblib.load(f"{base_path}/mlp_cifar10.pkl")
scaler = joblib.load(f"{base_path}/scaler.pkl")
train_features = np.load(f"{base_path}/train_features.npy")
train_labels = np.load(f"{base_path}/train_labels.npy")
train_images = np.load(f"{base_path}/train_images.npy")

test_features = np.load(f"{base_path}/test_features.npy")
test_labels = np.load(f"{base_path}/test_labels.npy")
test_images = np.load(f"{base_path}/test_images.npy")

top_k = 10

correct = 0



samples = random.sample(range(len(test_features)), 1000)

for idx in samples:
    query_img = test_images[idx]
    query_feat = test_features[idx]
    query_label = test_labels[idx]

    query_feat_scaled = scaler.transform([query_feat])
    pred_class = clf.predict(query_feat_scaled)[0]

    class_indices = np.where(train_labels == pred_class)[0]
    class_feats = train_features[class_indices]
    class_imgs = train_images[class_indices]
    class_labels = train_labels[class_indices]

    sims = cosine_similarity([query_feat], class_feats)[0]
    top_indices = np.argsort(sims)[-top_k:][::-1]
    top_labels = class_labels[top_indices]

    predicted_label = Counter(top_labels).most_common(1)[0][0]

    if predicted_label == query_label:
        correct += 1

    if idx < 1000:
        plt.figure(figsize=(15, 3))
        plt.subplot(1, top_k + 1, 1)
        plt.imshow(query_img)
        plt.title(f"Query\nActual: {class_names[query_label]}")
        plt.axis('off')

        for j, top_idx in enumerate(top_indices):
            img = class_imgs[top_idx]
            lbl = class_labels[top_idx]
            plt.subplot(1, top_k + 1, j + 2)
            plt.imshow(img)
            plt.title(f"Top-{j+1}\nLabel: {class_names[lbl]}")
            plt.axis('off')

        plt.suptitle(f"Predicted Class: {class_names[pred_class]} | Retrieved Label: {class_names[predicted_label]}", fontsize=14)
        plt.tight_layout()
        plt.show()


precision = correct / 1000
recall = correct / len(samples)
f1_score = 2 * (precision * recall) / (precision + recall)

print(f"Precision@{top_k} over 1000 samples: {precision:.4f}")
print(f" Recall@{top_k} over 1000 samples: {recall:.4f}")
