# Getting Started 

In [1]:
import numpy as np
d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

In [2]:
import faiss                   # make faiss available
index = faiss.IndexFlatL2(d)   # build the index
print(index.is_trained)
index.add(xb)                  # add vectors to the index
print(index.ntotal)

True
100000


In [3]:
#search

k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
D, I = index.search(xq, k)     # actual search
print(I[:5])                   # neighbors of the 5 first queries
print(I[-5:])                  # neighbors of the 5 last queries

[[  0 393 363  78]
 [  1 555 277 364]
 [  2 304 101  13]
 [  3 173  18 182]
 [  4 288 370 531]]
[[0.        7.1751733 7.2076297 7.251163 ]
 [0.        6.3235645 6.684581  6.7999454]
 [0.        5.7964087 6.3917365 7.2815123]
 [0.        7.2779055 7.527987  7.6628466]
 [0.        6.7638035 7.2951207 7.368815 ]]
[[ 381  207  210  477]
 [ 526  911  142   72]
 [ 838  527 1290  425]
 [ 196  184  164  359]
 [ 526  377  120  425]]
[[ 9900 10500  9309  9831]
 [11055 10895 10812 11321]
 [11353 11103 10164  9787]
 [10571 10664 10632  9638]
 [ 9628  9554 10036  9582]]


# random 10 vectors

In [1]:
import os
import numpy as np
import faiss

# FAISS 인덱스 초기화
# L2 거리 (유클리디안 거리) 기준 인덱스 생성
index = faiss.IndexFlatL2(5)  # 벡터 차원 5


In [2]:
# 벡터 생성
example_vectors = np.random.randn(10, 5).astype('float32')  # FAISS는 float32를 요구
example_ids = [f"vec_{i}" for i in range(len(example_vectors))]

# 벡터 삽입
index.add(example_vectors)  # 벡터 추가
print(f"FAISS 인덱스에 {index.ntotal}개의 벡터가 추가되었습니다.")

# ID와 벡터 인덱스 매핑
id_to_index = {i: example_ids[i] for i in range(len(example_vectors))}

# 인덱스 저장
faiss.write_index(index, "faiss_index.bin")

FAISS 인덱스에 10개의 벡터가 추가되었습니다.


In [3]:
# 저장된 인덱스 불러오기
index = faiss.read_index("faiss_index.bin")

# 벡터 검색
test_vec = np.random.randn(1, 5).astype('float32')  # 검색할 벡터
k = 5  # 상위 5개 유사 벡터 검색

# 검색 수행
distances, indices = index.search(test_vec, k)

# 결과 출력
def show_results(test_vec, distances, indices):
    for rank, (dist, idx) in enumerate(zip(distances[0], indices[0])):
        vector_id = id_to_index[idx]
        print(f"Rank {rank + 1}: ID = {vector_id}, Distance = {dist}")

print("\nTop 5 Nearest Vectors:")
show_results(test_vec, distances, indices)


Top 5 Nearest Vectors:
Rank 1: ID = vec_7, Distance = 2.33762526512146
Rank 2: ID = vec_6, Distance = 2.458592176437378
Rank 3: ID = vec_8, Distance = 2.7448859214782715
Rank 4: ID = vec_9, Distance = 2.774972438812256
Rank 5: ID = vec_0, Distance = 2.900031089782715


# Faster Search

In [4]:
nlist = 100
k = 4
quantizer = faiss.IndexFlatL2(d)  # the other index
index = faiss.IndexIVFFlat(quantizer, d, nlist)
assert not index.is_trained
index.train(xb)
assert index.is_trained

index.add(xb)                  # add may be a bit slower as well
D, I = index.search(xq, k)     # actual search
print(I[-5:])                  # neighbors of the 5 last queries
index.nprobe = 10              # default nprobe is 1, try a few more
D, I = index.search(xq, k)
print(I[-5:])                  # neighbors of the 5 last queries

[[ 9900  9309  9810 10048]
 [11055 10895 10812 11321]
 [11353 10164  9787 10719]
 [10571 10664 10632 10203]
 [ 9628  9554  9582 10304]]
[[ 9900 10500  9309  9831]
 [11055 10895 10812 11321]
 [11353 11103 10164  9787]
 [10571 10664 10632  9638]
 [ 9628  9554 10036  9582]]


# Lower Memory

In [5]:
nlist = 100
m = 8                             # number of subquantizers
k = 4
quantizer = faiss.IndexFlatL2(d)  # this remains the same
index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)
                                    # 8 specifies that each sub-vector is encoded as 8 bits
index.train(xb)
index.add(xb)
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
index.nprobe = 10              # make comparable with experiment above
D, I = index.search(xq, k)     # search
print(I[-5:])

[[   0   78  424  753]
 [   1 1063  555  617]
 [   2  304  134  179]
 [   3   64  527 1057]
 [   4  288  531  827]]
[[1.5518408 6.273082  6.416301  6.5346537]
 [1.4250197 5.6698246 6.129957  6.5875382]
 [1.7222786 5.677806  6.116646  6.123891 ]
 [1.8276348 6.7033954 6.9841585 7.0637865]
 [1.5124168 5.6538734 6.3072824 6.457743 ]]
[[ 9853  9966 10914 10437]
 [10765 10403  9014 10240]
 [11291 10600 11383 10494]
 [10005 10664 10122 10125]
 [ 9905  9229 10304 10370]]
