In [2]:
import faiss
import numpy as np
import pickle
import os

# 指定 project
project = "openstack"
output_path = f"../Data_Extraction/git_base/datasets/{project}/"

# 加载 FAISS 索引
msg_index = faiss.read_index(os.path.join(output_path, "msg_faiss.index"))
file_index = faiss.read_index(os.path.join(output_path, "file_faiss.index"))

# 加载映射关系
with open(os.path.join(output_path, "faiss_mappings.pkl"), "rb") as f:
    mappings = pickle.load(f)

msg_map = mappings["msg_map"]
file_map = mappings["file_map"]
msg_keys = mappings["msg_keys"]
file_keys = mappings["file_keys"]

# 获取前 5 个向量
msg_vectors = np.zeros((msg_index.ntotal, msg_index.d), dtype=np.float32)
msg_index.reconstruct_n(0, msg_index.ntotal, msg_vectors)

file_vectors = np.zeros((file_index.ntotal, file_index.d), dtype=np.float32)
file_index.reconstruct_n(0, file_index.ntotal, file_vectors)

print("First 5 message vectors:\n", msg_vectors[:5])
print("First 5 file vectors:\n", file_vectors[:5])

# 打印第一个向量的键
print("First 5 message keys:", msg_keys[:5])
print("First 5 file keys:", file_keys[:5])

First 5 message vectors:
 [[-0.1878775   0.09335123 -0.14806958 ... -0.07652314  0.03929425
  -0.02381434]
 [-0.05505275  0.01169699 -0.1782115  ...  0.15295771  0.07378335
   0.10323487]
 [-0.13016282  0.19422832 -0.12284414 ... -0.06242881  0.20154738
  -0.08208967]
 [-0.17750208 -0.00132745 -0.06495102 ... -0.03381703  0.09146328
   0.0186007 ]
 [-0.1751883  -0.03381472 -0.18982865 ...  0.10819813  0.13160154
   0.08182798]]
First 5 file vectors:
 [[ 0.05229216  0.00773004 -0.16374594 ... -0.08864175  0.02697686
   0.02410326]
 [-0.02425106 -0.01306191  0.04515519 ...  0.01513861  0.08771811
  -0.05708595]
 [-0.14431135  0.00811005 -0.00506699 ... -0.03577298  0.11180034
   0.00344987]
 [-0.00932041 -0.09639138 -0.06233175 ...  0.07630786  0.13215972
   0.02574849]
 [-0.19238627  0.04958484  0.1134759  ...  0.06895549 -0.00513332
  -0.00970071]]
First 5 message keys: ['b110264f5d0faf1f1769b1800eb11b058f2db23c', '785435a18f65fe85561d3ad39a2dfaf19b44bb41', '346671c9b46d6f53847a8780c79