In [None]:
import glassppy as glass
from ann_dataset import dataset_dict

In [None]:
topk = 10
dataset = dataset_dict['sift-128-euclidean']()
X_train = dataset.get_database()
X_test = dataset.get_queries()
Y = dataset.get_groundtruth(topk)
n, d = X_train.shape
nq, d = X_test.shape
metric = dataset.metric
print(f"n = {n}, d = {d}, nq = {nq}, metric = {metric}")
print(f"dataset size = {n * d * 4 / 1024 / 1024:.2f}MB")


In [None]:
index = glass.Index(index_type="HNSW", dim=d, metric=metric, R=32, L=100)
g = index.build(X_train)


In [None]:
s = glass.Searcher(graph=g, data=X_train, metric=metric, level=2)
s.set_ef(36)


In [None]:
from os import cpu_count
from time import time
num_threads = cpu_count()

pred = s.batch_search(query=X_test, k=topk, num_threads=num_threads).reshape(-1, topk)
recall = dataset.evaluate(pred, topk)
print(f"Recall = {recall * 100:.2f}%")


In [None]:
s.batch_search(query=X_test, k=topk, num_threads=num_threads)  # warmup
for iter in range(10):
    t1 = time()
    pred = s.batch_search(query=X_test, k=topk, num_threads=num_threads)
    t2 = time()
    print(f"QPS = {nq / (t2 - t1):.2f}")


In [None]:
s.optimize()


In [None]:
s.batch_search(query=X_test, k=topk, num_threads=num_threads)  # warmup
for iter in range(10):
    t1 = time()
    pred = s.batch_search(query=X_test, k=topk,
                          num_threads=num_threads).reshape(-1, topk)
    t2 = time()
    print(f"QPS = {nq / (t2 - t1)}")
