In [7]:
import faiss
import numpy as np

In [8]:
index = faiss.read_index('faissImageVector1M.index')

In [9]:
vectors = []
index.ntotal

998934

In [10]:
for i in range(index.ntotal):
    # print(type((index.reconstruct(i))))
    # print(len((index.reconstruct(i))))
    vectors.append(index.reconstruct(i))

In [11]:
len(vectors)
vectors = np.array(vectors)
print(vectors.shape)
print(type(vectors[0][0]))

(998934, 768)
<class 'numpy.float32'>


In [6]:
import time

In [7]:
start_time = time.time()
index_flatl2 = faiss.IndexFlatL2(vectors.shape[1])
index_flatl2.add(vectors)
print("Time taken to create IndexFlatL2: %.2f seconds" % (time.time() - start_time))

Time taken to create IndexFlatL2: 1.46 seconds


In [8]:
start_time = time.time()
# Now we want to convert this to an IndexIVFFlat index
# First, we need to train a quantizer
nlist = 1000 # number of Voronoi cells (i.e., clusters)
kmeans = faiss.Kmeans(vectors.shape[1], nlist)
kmeans.train(vectors)

# Use the centroids of the clusters as the quantizer
quantizer = faiss.IndexFlatL2(vectors.shape[1])
quantizer.add(kmeans.centroids)

# Create an IndexIVFFlat index with the trained quantizer
index_ivfflat = faiss.IndexIVFFlat(quantizer, vectors.shape[1], nlist)

# Train the index
# assert not index_ivfflat.is_trained
index_ivfflat.train(vectors)
# assert index_ivfflat.is_trained

# Add vectors to the index
index_ivfflat.add(vectors)

# Print the time taken to create the IndexIVFFlat index
print("Time taken to create IndexIVFFlat: %.2f seconds" % (time.time() - start_time))

Time taken to create IndexIVFFlat: 19.24 seconds


In [9]:
# Assuming 'index' is your IndexIVFFlat index
# faiss.write_index(index_ivfflat, 'Image1000IVF.index')

In [4]:
import torch
from transformers import AutoModel, AutoProcessor
from torchvision import transforms
from PIL import Image
import numpy as np
Imagemodel = AutoModel.from_pretrained("google/siglip-base-patch16-224")
processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224", low_cpu_mem_usage=True, do_rescale=False)
device_type = "cpu"
device = torch.device(device_type)
Imagemodel.to(device)
path = "static/images/3637013_c675de7705.jpg"
newImage = Image.open(path)
transform = transforms.Compose([
            transforms.Resize((300, 300)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
inputs = transform(newImage)
inputs = (inputs - inputs.min()) / (inputs.max() - inputs.min())
inputs = processor(images=inputs, return_tensors="pt").to(device)
features = Imagemodel.get_image_features(**inputs)
features = np.array(features.tolist())

In [6]:
features
del Imagemodel
del processor

In [18]:
# Assuming 'vectors' is your list of vectors and 'query' is your query vector
# vectors = np.array(vectors)
# query = np.array(query)

# Calculate the start time
import time
start_time = time.time()

# Calculate the distance between the query and each vector
distances = np.linalg.norm(vectors - features, axis=1)

# Get the indices of the K smallest distances
K = 100000  # Change this to the number of similar images you want to retrieve
indices = np.argpartition(distances, K)[:K]

# Calculate the end time
end_time = time.time()

# Print the time taken
print("Time taken: %.4f seconds" % (end_time - start_time))

Time taken: 48.6658 seconds
