In [None]:
import matplotlib.pyplot as plt

PLOT_DIR = "../experiments/plots"

In [None]:
search_data = {
    "num_entities": [10_000, 25_000, 50_000, 75_000, 100_000, 150_000, 200_000, 250_000, 375_000, 500_000, 625_000, 750_000, 875_000, 1_000_000],
    "Exact":[2.798,6.482,13.683,20.145,25.788,38.183,52.098,64.081,97.705,129.498,169.355,202.125,239.98,281.573],
    "MultiVecHNSW": [0.572,0.625,0.759,0.750,0.774,0.815,0.824,0.962,1.015,1.143,1.148,1.158,1.147,1.187],
    "HNSWRerank": [1.561,2.186,2.664,3.267,3.513,4.516,4.994,5.294,5.960,7.061,7.508,8.540,9.272,8.939]
}

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(search_data["num_entities"], search_data["Exact"], marker='o', linestyle='-', label='Sequential Search', color='red')
plt.plot(search_data["num_entities"], search_data["HNSWRerank"], marker='^', linestyle='-', label='HNSWRerank', color='blue')
plt.plot(search_data["num_entities"], search_data["MultiVecHNSW"], marker='s', linestyle='-', label='MultiVecHNSW', color='green')

plt.xscale('log')
plt.yscale('log')

plt.xlabel('Number of Indexed Entities')
plt.ylabel('Search Time (ms)')
plt.title('Similarity Search Performance at Recall 0.95')
plt.legend()

# info box
info_text = ("Dimensions: [384, 768]\nMetrics: [cos,cos]\nWeights: [0.5,0.5]\nRecall: 0.95\nk=50")
plt.annotate(info_text, xy=(0.05, 0.75), xycoords='axes fraction', 
             bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='lightgray'))


plt.grid(True, which="both", linestyle="--", linewidth=0.5)

# save plot
plt.savefig(f"{PLOT_DIR}/similarity_search_performance.png", dpi=300, bbox_inches='tight')

plt.show()

In [None]:
construction_data =  {
    "num_entities": [10_000, 25_000, 50_000, 75_000, 100_000, 150_000, 200_000, 250_000, 375_000, 500_000, 625_000, 750_000, 875_000, 1_000_000],
    "MultiVecHNSW": [13.57,41.440,96.85,144.69,200.67,314.693376,430.1843251,550.52,870.79,1170.12,1447.72,1783.51,2128.36,2575.93],
    "HNSWRerank": [15.96,53.86,125.21,202.61,276.81,438.93,607.42,777.03,1222.65,1683.63,2166.75,2708.79,3164.55,3838.43]
}

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(construction_data["num_entities"], construction_data["HNSWRerank"], marker='^', linestyle='-', label='HNSWRerank', color='blue')
plt.plot(construction_data["num_entities"], construction_data["MultiVecHNSW"], marker='s', linestyle='-', label='MultiVecHNSW', color='green')

plt.xscale('log')
plt.yscale('log')


plt.xlabel('Number of Indexed Entities')
plt.ylabel('Construction time (s)')
plt.title('Index Construction Time for Different Dataset Sizes')
plt.legend()

# info box
info_text = ("Dimensions: [384, 768]\nMetrics: [cos,cos]\nWeights: [0.5,0.5]")
plt.annotate(info_text, xy=(0.05, 0.75), xycoords='axes fraction', 
             bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='lightgray'))


plt.grid(True, which="both", linestyle="--", linewidth=0.5)

# save plot
plt.savefig(f"{PLOT_DIR}/index_construction.png", dpi=300, bbox_inches='tight')

plt.show()