In [1]:
# HNSW方法对比测试框架
# 基于FAISS baseline的三种HNSW方法对比

import json
import numpy as np
import time
import matplotlib.pyplot as plt
from io_utils import read_fbin, read_ibin
import hnsw_cosine_status as hnsw_status
import hnsw_cosine_status_high as hnsw_high
import hnsw_cosine_norm as hnsw_norm
import simple_sim_hash
import importlib

print("=== HNSW方法对比测试框架 ===")
print("三种方法:")
print("1. Status (RoarGraph方法)")
print("2. High (高层新增边方法)")
print("3. Norm (高维映射方法)")

# 重新加载模块
importlib.reload(hnsw_status)
importlib.reload(hnsw_high)
importlib.reload(hnsw_norm)


=== HNSW方法对比测试框架 ===
三种方法:
1. Status (RoarGraph方法)
2. High (高层新增边方法)
3. Norm (高维映射方法)


<module 'hnsw_cosine_norm' from '/root/code/vectordbindexing/hnsw_cosine_norm.py'>

In [35]:
# 1. 加载数据和FAISS baseline结果
print("\n=== 1. 加载数据和FAISS baseline结果 ===")

# 数据路径
file_path = "/root/code/vectordbindexing/Text2Image/base.1M.fbin"
query_path = "/root/code/vectordbindexing/Text2Image/query.public.100K.fbin"
ground_truth_path = "/root/code/vectordbindexing/Text2Image/groundtruth.public.100K.ibin"

# 读取数据
print("读取数据...")
data_vector = read_fbin(file_path)
query_vector = read_fbin(query_path)
print(f"数据向量: {data_vector.shape}")
print(f"查询向量: {query_vector.shape}")

# 使用前500K数据进行测试
train_data_vector = data_vector[:500000]
print(f"训练数据: {train_data_vector.shape}")

# 加载FAISS baseline结果
print("加载FAISS baseline结果...")
try:
    with open('/root/code/vectordbindexing/faiss_top100_results.json', 'r') as f:
        faiss_top100_results = json.load(f)
    with open('/root/code/vectordbindexing/faiss_effort_percentiles.json', 'r') as f:
        effort_percentiles = json.load(f)
    print(f"✅ 成功加载FAISS结果:")
    print(f"   - Top100结果: {len(faiss_top100_results)} 个查询")
    print(f"   - Effort分位数: {len(effort_percentiles)} 个分位数")
except FileNotFoundError:
    print("❌ 未找到FAISS baseline结果文件")
    print("请先运行 hnsw_baseline_analysis.ipynb 生成baseline结果")
    raise



=== 1. 加载数据和FAISS baseline结果 ===
读取数据...


数据向量: (1000000, 200)
查询向量: (100000, 200)
训练数据: (500000, 200)
加载FAISS baseline结果...
✅ 成功加载FAISS结果:
   - Top100结果: 100000 个查询
   - Effort分位数: 7 个分位数


In [36]:
import torch
ground_truth = read_ibin(ground_truth_path)
print(f"Ground truth形状: {ground_truth.shape}")

def clean_ground_truth(ground_truth_vector):
    new_ground_truth = []
    for j in ground_truth_vector:
        if j >= len(train_data_vector):
            continue
        new_ground_truth.append(j)
    return torch.tensor(new_ground_truth)

ground_truth_0 = clean_ground_truth(ground_truth[0])
print(f"Ground truth形状: {ground_truth_0.shape}")
print(f"Ground truth[0]: {ground_truth_0}")


Ground truth形状: (100000, 100)
Ground truth形状: torch.Size([0])
Ground truth[0]: tensor([])


In [37]:
# 2. 数据分割和Ground Truth准备
print("\n=== 2. 数据分割和Ground Truth准备 ===")

# 使用前10K个查询进行测试
test_query_count = 10000
test_queries = query_vector[:test_query_count]

# 分割训练集和测试集 (9/10用于训练，1/10用于测试)
train_size = int(test_query_count * 0.9)
train_queries = test_queries[:train_size]
test_queries_final = test_queries[train_size:]

print(f"训练查询数量: {len(train_queries)}")
print(f"测试查询数量: {len(test_queries_final)}")

# 准备Ground Truth
print("准备Ground Truth...")
train_ground_truth = {}
test_ground_truth = {}

for i in range(len(train_queries)):
    if str(i) in faiss_top100_results:
        train_ground_truth[i] = faiss_top100_results[str(i)]

for i in range(len(test_queries_final)):
    global_idx = train_size + i
    if str(global_idx) in faiss_top100_results:
        test_ground_truth[i] = faiss_top100_results[str(global_idx)]
print(f"测试集Ground Truth: {len(test_ground_truth)} 个, test_ground_truth[0]: {test_ground_truth[0]}")

print(f"训练集Ground Truth: {len(train_ground_truth)} 个")
print(f"测试集Ground Truth: {len(test_ground_truth)} 个")

def calculate_recall_at_k(predicted_ids, ground_truth_ids, k):
    """计算recall@k"""
    if len(ground_truth_ids) == 0:
        return 0.0
    
    top_k_pred = set(predicted_ids[:k])
    valid_gt = set(ground_truth_ids)
    
    intersection = top_k_pred.intersection(valid_gt)
    recall = len(intersection) / len(valid_gt)
    return recall



=== 2. 数据分割和Ground Truth准备 ===
训练查询数量: 9000
测试查询数量: 1000
准备Ground Truth...
测试集Ground Truth: 1000 个, test_ground_truth[0]: [450938, 433164, 273168, 197768, 446539, 114597, 166119, 351005, 297013, 387034, 118529, 270950, 443415, 475354, 119404, 481549, 348835, 351111, 380869, 354360, 46775, 104998, 57946, 222629, 46956, 26165, 327783, 142800, 498687, 454881, 417378, 15560, 396110, 241241, 392323, 365809, 439516, 141796, 482766, 74151, 478561, 161810, 115050, 30807, 65268, 26159, 76581, 313282, 99660, 496140, 91828, 270484, 307967, 339310, 72767, 76276, 35872, 351813, 342630, 6940, 187681, 218782, 453213, 316839, 498876, 305505, 443486, 77713, 355767, 423349, 20107, 387996, 158089, 58428, 96610, 133870, 11408, 364752, 142080, 328994, 366763, 91908, 431275, 241251, 309460, 465622, 71743, 314026, 495976, 312480, 497829, 474173, 4450, 153810, 133791, 441544, 92647, 86201, 322329, 162520]
训练集Ground Truth: 9000 个
测试集Ground Truth: 1000 个


In [38]:
# 3. 创建三种HNSW索引
print("\n=== 3. 创建三种HNSW索引 ===")

# 通用参数
M = 64
ef_construction = 128
ef_search = 200

# 3.1 Status方法 (RoarGraph方法)
print("\n3.1 创建Status方法索引...")
index_status = hnsw_status.HNSWIndex(M=M, ef_construction=ef_construction, ef_search=ef_search, random_seed=1)
simHash_status = simple_sim_hash.SimpleSimHash(dim=200)

print("添加训练数据到Status索引...")
start_time = time.time()
for img_id, vec in enumerate(train_data_vector):
    index_status.add_item_fast10k(vec, id=img_id, lsh=simHash_status, limit=100)
status_build_time = time.time() - start_time
print(f"Status索引构建完成，耗时: {status_build_time:.2f}秒")

# 使用FAISS top100结果构建cross distribution边
print("构建cross distribution边...")
start_time = time.time()
for i, query in enumerate(train_queries[:1000]):  # 使用前1000个查询
    if i % 100 == 0:
        print(f"  处理查询 {i+1}/1000")
    
    if i in train_ground_truth:
        faiss_top100 = train_ground_truth[i]
        # 在HNSW中搜索这些节点
        layer1_nodes = []
        for node_id in faiss_top100[:50]:  # 取前50个
            if node_id in index_status.items and index_status.items[node_id].level >= 1:
                layer1_nodes.append(node_id)
        
        # 在第1层按照RoarGraph逻辑新增边
        if len(layer1_nodes) >= 2:
            stats = index_status.build_cross_distribution_edges(
                query=query,
                top_k=min(10, len(layer1_nodes)),
                max_new_edges_per_node=4
            )

status_edge_time = time.time() - start_time
print(f"Status cross distribution边构建完成，耗时: {status_edge_time:.2f}秒")



=== 3. 创建三种HNSW索引 ===

3.1 创建Status方法索引...
添加训练数据到Status索引...


Status索引构建完成，耗时: 1230.02秒
构建cross distribution边...
  处理查询 1/1000
  处理查询 101/1000
  处理查询 201/1000
  处理查询 301/1000
  处理查询 401/1000
  处理查询 501/1000
  处理查询 601/1000
  处理查询 701/1000
  处理查询 801/1000
  处理查询 901/1000
Status cross distribution边构建完成，耗时: 0.20秒


In [39]:
# 3.2 High方法 (高层新增边方法)
print("\n3.2 创建High方法索引...")
index_high = hnsw_high.HNSWIndex(M=M, ef_construction=ef_construction, ef_search=ef_search, random_seed=1)
simHash_high = simple_sim_hash.SimpleSimHash(dim=200)

print("添加训练数据到High索引...")
start_time = time.time()
for img_id, vec in enumerate(train_data_vector):
    index_high.add_item_fast10k(vec, id=img_id, lsh=simHash_high, limit=100)
high_build_time = time.time() - start_time
print(f"High索引构建完成，耗时: {high_build_time:.2f}秒")

# 使用FAISS top100结果构建高层边
print("构建高层边...")
start_time = time.time()
for i, query in enumerate(train_queries[:1000]):  # 使用前1000个查询
    if i % 100 == 0:
        print(f"  处理查询 {i+1}/1000")
    
    if i in train_ground_truth:
        faiss_top100 = train_ground_truth[i]
        # 在HNSW中搜索这些节点到第1层的映射
        layer1_nodes = []
        for node_id in faiss_top100[:50]:  # 取前50个
            if node_id in index_high.items and index_high.items[node_id].level >= 1:
                layer1_nodes.append(node_id)
        
        # 在第1层按照RoarGraph逻辑新增边
        if len(layer1_nodes) >= 2:
            stats = index_high.build_cross_distribution_edges(
                query=query,
                top_k=min(10, len(layer1_nodes)),
                max_new_edges_per_node=4
            )

high_edge_time = time.time() - start_time
print(f"High高层边构建完成，耗时: {high_edge_time:.2f}秒")



3.2 创建High方法索引...
添加训练数据到High索引...


High索引构建完成，耗时: 1350.18秒
构建高层边...
  处理查询 1/1000
  处理查询 101/1000
  处理查询 201/1000
  处理查询 301/1000
  处理查询 401/1000
  处理查询 501/1000
  处理查询 601/1000
  处理查询 701/1000
  处理查询 801/1000
  处理查询 901/1000
High高层边构建完成，耗时: 3.99秒


In [13]:
# 3.3 Norm方法 (高维映射方法)
print("\n3.3 创建Norm方法索引...")
# 创建数据预处理器
preprocessor = hnsw_norm.DataPreprocessor(
    use_pca=True,
    n_components=200,
    use_global_whitening=True,
    sub_modality_scaling=True
)

print("拟合预处理器...")
start_time = time.time()
preprocessor.fit(train_data_vector, train_data_vector, sample_size=10000)
norm_preprocess_time = time.time() - start_time
print(f"预处理器拟合完成，耗时: {norm_preprocess_time:.2f}秒")

# 创建HNSW索引
index_norm = hnsw_norm.HNSWIndex(
    M=M,
    ef_construction=ef_construction,
    ef_search=ef_search,
    preprocessor=preprocessor
)

print("添加训练数据到Norm索引...")
start_time = time.time()
for i, vec in enumerate(train_data_vector):
    index_norm.add_item(vec, id=i)
norm_build_time = time.time() - start_time
print(f"Norm索引构建完成，耗时: {norm_build_time:.2f}秒")


3.3 创建Norm方法索引...
拟合预处理器...
Fitting preprocessor on 10000 text samples and 10000 image samples
Preprocessor fitting completed
预处理器拟合完成，耗时: 0.08秒
添加训练数据到Norm索引...


Norm索引构建完成，耗时: 3554.47秒


In [14]:
print(f"\n=== 索引构建总结 ===")
print(f"Status方法: 构建{status_build_time:.2f}s + 边构建{status_edge_time:.2f}s = {status_build_time + status_edge_time:.2f}s")
print(f"High方法: 构建{high_build_time:.2f}s + 边构建{high_edge_time:.2f}s = {high_build_time + high_edge_time:.2f}s")
print(f"Norm方法: 预处理{norm_preprocess_time:.2f}s + 构建{norm_build_time:.2f}s = {norm_preprocess_time + norm_build_time:.2f}s")


=== 索引构建总结 ===
Status方法: 构建1192.35s + 边构建0.14s = 1192.49s
High方法: 构建1136.47s + 边构建2.59s = 1139.06s
Norm方法: 预处理0.08s + 构建3554.47s = 3554.55s


In [None]:
# 4. Recall性能测试 (ef_search=32)
print("\n=== 4. Recall性能测试 (ef_search=32) ===")

def test_method_recall(index, queries, ground_truths, method_name, ef_search=32):
    """测试方法的recall性能"""
    recalls = []
    search_times = []
    search_steps_list = []
    
    print(f"测试{method_name}方法 (ef_search={ef_search})...")
    for i, query in enumerate(queries):
        # if i % 100 == 0:
        #     print(f"  处理查询 {i+1}/{len(queries)}")
        
        if i not in ground_truths:
            continue
            
        # 搜索
        start_time = time.time()
        results, search_steps = index.query_with_steps(query, k=100, ef=ef_search)
        search_time = time.time() - start_time
        if i == 0:
            print(f"  搜索结果: {results}")
            print(f"  真实结果: {ground_truths[i]}")
        
        # 计算recall@100
        recall = calculate_recall_at_k(results, ground_truths[i], 100)
        
        recalls.append(recall)
        search_times.append(search_time)
        search_steps_list.append(search_steps)
    
    return {
        'mean_recall': np.mean(recalls),
        'std_recall': np.std(recalls),
        'mean_steps': np.mean(search_steps_list),
        'std_steps': np.std(search_steps_list),
        'mean_time': np.mean(search_times),
        'std_time': np.std(search_times),
        'recalls': recalls,
        'search_steps': search_steps_list,
        'search_times': search_times
    }

# 测试三种方法
print("测试Status方法...")
status_results = test_method_recall(index_status, test_queries_final, test_ground_truth, "Status", ef_search=32)

print("测试High方法...")
high_results = test_method_recall(index_high, test_queries_final, test_ground_truth, "High", ef_search=32)

print("测试Norm方法...")
norm_results = test_method_recall(index_norm, test_queries_final, test_ground_truth, "Norm", ef_search=32)

# 显示结果
print(f"\n=== Recall性能测试结果 ===")
print(f"{'方法':<10} {'Mean Recall':<12} {'Mean Steps':<12} {'Mean Time(ms)':<15}")
print("-" * 55)
print(f"{'Status':<10} {status_results['mean_recall']:<12.3f} {status_results['mean_steps']:<12.1f} {status_results['mean_time']*1000:<15.2f}")
print(f"{'High':<10} {high_results['mean_recall']:<12.3f} {high_results['mean_steps']:<12.1f} {high_results['mean_time']*1000:<15.2f}")
print(f"{'Norm':<10} {norm_results['mean_recall']:<12.3f} {norm_results['mean_steps']:<12.1f} {norm_results['mean_time']*1000:<15.2f}")



=== 4. Recall性能测试 (ef_search=32) ===
测试Status方法...
测试Status方法 (ef_search=32)...
  搜索结果: [484776, 357178, 309679, 493505, 322198, 472983, 478010, 357973, 266942, 483270, 461479, 475748, 412971, 289833, 273606, 291312, 278630, 367879, 357418, 303458, 490132, 460651, 331321, 366391, 326373, 329278, 405708, 342168, 276840, 435866, 363273, 483043, 376521, 283668, 472399, 359568, 476681, 360644, 316593, 273807, 357954, 294527, 391355, 373303, 361546, 490705, 363068, 362327, 367618, 393743, 471253, 273391, 399451, 378149, 359549, 356723, 477948, 304383, 419289, 329015, 460955, 414921, 498991, 316207, 316147, 361020, 305214, 342729, 335296, 354560, 427552, 362675, 360582, 417235, 372257, 343123, 359575, 356893, 330547, 360570, 356601, 492555, 458415, 483098, 484072, 434699, 341061, 320019, 360337, 498503, 316458, 355158, 361940, 496470, 358447, 378796, 279808, 361141, 357918, 335179]
  真实结果: [450938, 433164, 273168, 197768, 446539, 114597, 166119, 351005, 297013, 387034, 118529, 270950, 44341

测试High方法...
测试High方法 (ef_search=32)...
  搜索结果: [484776, 357178, 309679, 493505, 322198, 472983, 478010, 357973, 266942, 483270, 461479, 475748, 412971, 289833, 273606, 291312, 278630, 367879, 357418, 303458, 490132, 460651, 331321, 366391, 326373, 329278, 405708, 342168, 276840, 435866, 363273, 483043, 376521, 283668, 472399, 359568, 476681, 360644, 316593, 273807, 357954, 294527, 391355, 373303, 361546, 490705, 363068, 362327, 367618, 393743, 471253, 273391, 399451, 378149, 359549, 356723, 477948, 304383, 419289, 329015, 460955, 414921, 498991, 316207, 316147, 361020, 305214, 342729, 335296, 354560, 427552, 362675, 360582, 417235, 372257, 343123, 359575, 356893, 330547, 360570, 356601, 492555, 458415, 483098, 484072, 434699, 341061, 320019, 360337, 498503, 316458, 355158, 361940, 496470, 358447, 378796, 279808, 361141, 357918, 335179]
  真实结果: [450938, 433164, 273168, 197768, 446539, 114597, 166119, 351005, 297013, 387034, 118529, 270950, 443415, 475354, 119404, 481549, 348835, 351111,

In [43]:
# 5. Effort分位数测试 (Recall90下的搜索步长)
print("\n=== 5. Effort分位数测试 (Recall90下的搜索步长) ===")

def find_ef_for_recall90(index, query, ground_truth, k=100):
    """找到达到recall90所需的最小ef_search值"""
    for ef in [16, 32, 64, 128, 256, 512]:
        results, steps = index.query_with_steps(query, k=k, ef=ef)
        recall = calculate_recall_at_k(results, ground_truth, k)
        if recall >= 0.90:
            return ef, recall, steps
    return 512, 0.0, 0

# 测试effort分位数对应的query
effort_results = {}
print("测试effort分位数对应的查询...")

for percentile, info in effort_percentiles.items():
    query_id = info['query_id']
    
    # 检查query_id是否在测试范围内
    if query_id < len(test_queries_final):
        query = test_queries_final[query_id]
        
        if query_id in test_ground_truth:
            gt = test_ground_truth[query_id]
            
            print(f"测试P{percentile} (query_id={query_id})...")
            
            # 测试三种方法
            ef_status, recall_status, steps_status = find_ef_for_recall90(index_status, query, gt)
            ef_high, recall_high, steps_high = find_ef_for_recall90(index_high, query, gt)
            ef_norm, recall_norm, steps_norm = find_ef_for_recall90(index_norm, query, gt)
            
            effort_results[percentile] = {
                'query_id': query_id,
                'status': {'ef': ef_status, 'recall': recall_status, 'steps': steps_status},
                'high': {'ef': ef_high, 'recall': recall_high, 'steps': steps_high},
                'norm': {'ef': ef_norm, 'recall': recall_norm, 'steps': steps_norm}
            }
            
            print(f"  Status: ef={ef_status}, recall={recall_status:.3f}, steps={steps_status}")
            print(f"  High: ef={ef_high}, recall={recall_high:.3f}, steps={steps_high}")
            print(f"  Norm: ef={ef_norm}, recall={recall_norm:.3f}, steps={steps_norm}")

# 显示结果汇总
print(f"\n=== Recall90下的Effort分位数步长汇总 ===")
print(f"{'Percentile':<12} {'Status Steps':<15} {'High Steps':<15} {'Norm Steps':<15}")
print("-" * 60)
for percentile, results in effort_results.items():
    print(f"P{percentile:<10} {results['status']['steps']:<15.1f} {results['high']['steps']:<15.1f} {results['norm']['steps']:<15.1f}")



=== 5. Effort分位数测试 (Recall90下的搜索步长) ===
测试effort分位数对应的查询...

=== Recall90下的Effort分位数步长汇总 ===
Percentile   Status Steps    High Steps      Norm Steps     
------------------------------------------------------------


In [None]:
# 6. 结果可视化
print("\n=== 6. 结果可视化 ===")

plt.figure(figsize=(18, 12))

# 子图1: Recall对比
plt.subplot(2, 3, 1)
methods = ['Status', 'High', 'Norm']
recalls = [status_results['mean_recall'], high_results['mean_recall'], norm_results['mean_recall']]
recall_stds = [status_results['std_recall'], high_results['std_recall'], norm_results['std_recall']]

plt.bar(methods, recalls, yerr=recall_stds, capsize=5, alpha=0.7)
plt.ylabel('Mean Recall@100')
plt.title('Recall性能对比 (ef_search=32)')
plt.grid(True, alpha=0.3)

# 子图2: 搜索步数对比
plt.subplot(2, 3, 2)
steps = [status_results['mean_steps'], high_results['mean_steps'], norm_results['mean_steps']]
steps_stds = [status_results['std_steps'], high_results['std_steps'], norm_results['std_steps']]

plt.bar(methods, steps, yerr=steps_stds, capsize=5, alpha=0.7, color='orange')
plt.ylabel('Mean Search Steps')
plt.title('搜索步数对比 (ef_search=32)')
plt.grid(True, alpha=0.3)

# 子图3: 搜索时间对比
plt.subplot(2, 3, 3)
times = [status_results['mean_time']*1000, high_results['mean_time']*1000, norm_results['mean_time']*1000]
times_stds = [status_results['std_time']*1000, high_results['std_time']*1000, norm_results['std_time']*1000]

plt.bar(methods, times, yerr=times_stds, capsize=5, alpha=0.7, color='green')
plt.ylabel('Mean Search Time (ms)')
plt.title('搜索时间对比 (ef_search=32)')
plt.grid(True, alpha=0.3)

# 子图4: Recall90下的搜索步数对比
plt.subplot(2, 3, 4)
percentiles = list(effort_results.keys())
status_steps = [effort_results[p]['status']['steps'] for p in percentiles]
high_steps = [effort_results[p]['high']['steps'] for p in percentiles]
norm_steps = [effort_results[p]['norm']['steps'] for p in percentiles]

x = np.arange(len(percentiles))
width = 0.25

plt.bar(x - width, status_steps, width, label='Status', alpha=0.7)
plt.bar(x, high_steps, width, label='High', alpha=0.7)
plt.bar(x + width, norm_steps, width, label='Norm', alpha=0.7)

plt.xlabel('Effort Percentile')
plt.ylabel('Search Steps (Recall90)')
plt.title('Recall90下的搜索步数对比')
plt.xticks(x, [f'P{p}' for p in percentiles])
plt.legend()
plt.grid(True, alpha=0.3)

# 子图5: Recall分布对比
plt.subplot(2, 3, 5)
plt.hist(status_results['recalls'], bins=20, alpha=0.5, label='Status', density=True)
plt.hist(high_results['recalls'], bins=20, alpha=0.5, label='High', density=True)
plt.hist(norm_results['recalls'], bins=20, alpha=0.5, label='Norm', density=True)
plt.xlabel('Recall@100')
plt.ylabel('Density')
plt.title('Recall分布对比')
plt.legend()
plt.grid(True, alpha=0.3)

# 子图6: 搜索步数分布对比
plt.subplot(2, 3, 6)
plt.hist(status_results['search_steps'], bins=20, alpha=0.5, label='Status', density=True)
plt.hist(high_results['search_steps'], bins=20, alpha=0.5, label='High', density=True)
plt.hist(norm_results['search_steps'], bins=20, alpha=0.5, label='Norm', density=True)
plt.xlabel('Search Steps')
plt.ylabel('Density')
plt.title('搜索步数分布对比')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# 7. 保存最终结果
print("\n=== 7. 保存最终结果 ===")

# 准备保存的数据
final_results = {
    'experiment_info': {
        'description': 'HNSW方法对比测试',
        'dataset': 'Text2Image 500K vectors',
        'test_queries': len(test_queries_final),
        'train_queries': len(train_queries),
        'parameters': {
            'M': M,
            'ef_construction': ef_construction,
            'ef_search': ef_search
        }
    },
    'build_times': {
        'status': status_build_time + status_edge_time,
        'high': high_build_time + high_edge_time,
        'norm': norm_preprocess_time + norm_build_time
    },
    'recall_results': {
        'status': {
            'mean_recall': float(status_results['mean_recall']),
            'std_recall': float(status_results['std_recall']),
            'mean_steps': float(status_results['mean_steps']),
            'std_steps': float(status_results['std_steps']),
            'mean_time_ms': float(status_results['mean_time'] * 1000),
            'std_time_ms': float(status_results['std_time'] * 1000)
        },
        'high': {
            'mean_recall': float(high_results['mean_recall']),
            'std_recall': float(high_results['std_recall']),
            'mean_steps': float(high_results['mean_steps']),
            'std_steps': float(high_results['std_steps']),
            'mean_time_ms': float(high_results['mean_time'] * 1000),
            'std_time_ms': float(high_results['std_time'] * 1000)
        },
        'norm': {
            'mean_recall': float(norm_results['mean_recall']),
            'std_recall': float(norm_results['std_recall']),
            'mean_steps': float(norm_results['mean_steps']),
            'std_steps': float(norm_results['std_steps']),
            'mean_time_ms': float(norm_results['mean_time'] * 1000),
            'std_time_ms': float(norm_results['std_time'] * 1000)
        }
    },
    'effort_percentiles': effort_results
}

# 保存到JSON文件
results_file = '/root/code/vectordbindexing/hnsw_methods_comparison_results.json'
with open(results_file, 'w') as f:
    json.dump(final_results, f, indent=2)

print(f"结果已保存到: {results_file}")

# 总结报告
print(f"\n=== HNSW方法对比测试总结 ===")
print(f"1. 实验设置:")
print(f"   - 数据集: {final_results['experiment_info']['dataset']}")
print(f"   - 训练查询: {final_results['experiment_info']['train_queries']} 个")
print(f"   - 测试查询: {final_results['experiment_info']['test_queries']} 个")
print(f"   - 参数: M={M}, ef_construction={ef_construction}, ef_search={ef_search}")

print(f"\n2. 构建时间对比:")
print(f"   - Status方法: {final_results['build_times']['status']:.2f}秒")
print(f"   - High方法: {final_results['build_times']['high']:.2f}秒")
print(f"   - Norm方法: {final_results['build_times']['norm']:.2f}秒")

print(f"\n3. Recall性能对比 (ef_search=32):")
print(f"   - Status方法: {final_results['recall_results']['status']['mean_recall']:.3f} ± {final_results['recall_results']['status']['std_recall']:.3f}")
print(f"   - High方法: {final_results['recall_results']['high']['mean_recall']:.3f} ± {final_results['recall_results']['high']['std_recall']:.3f}")
print(f"   - Norm方法: {final_results['recall_results']['norm']['mean_recall']:.3f} ± {final_results['recall_results']['norm']['std_recall']:.3f}")

print(f"\n4. 搜索步数对比 (ef_search=32):")
print(f"   - Status方法: {final_results['recall_results']['status']['mean_steps']:.1f} ± {final_results['recall_results']['status']['std_steps']:.1f}")
print(f"   - High方法: {final_results['recall_results']['high']['mean_steps']:.1f} ± {final_results['recall_results']['high']['std_steps']:.1f}")
print(f"   - Norm方法: {final_results['recall_results']['norm']['mean_steps']:.1f} ± {final_results['recall_results']['norm']['std_steps']:.1f}")

print(f"\n5. 搜索时间对比 (ef_search=32):")
print(f"   - Status方法: {final_results['recall_results']['status']['mean_time_ms']:.2f} ± {final_results['recall_results']['status']['std_time_ms']:.2f}ms")
print(f"   - High方法: {final_results['recall_results']['high']['mean_time_ms']:.2f} ± {final_results['recall_results']['high']['std_time_ms']:.2f}ms")
print(f"   - Norm方法: {final_results['recall_results']['norm']['mean_time_ms']:.2f} ± {final_results['recall_results']['norm']['std_time_ms']:.2f}ms")

print(f"\n6. 关键发现:")
best_recall_method = max(['status', 'high', 'norm'], key=lambda x: final_results['recall_results'][x]['mean_recall'])
fastest_method = min(['status', 'high', 'norm'], key=lambda x: final_results['recall_results'][x]['mean_time_ms'])
most_efficient_method = min(['status', 'high', 'norm'], key=lambda x: final_results['recall_results'][x]['mean_steps'])

print(f"   - 最高Recall: {best_recall_method.title()}方法")
print(f"   - 最快搜索: {fastest_method.title()}方法")
print(f"   - 最少步数: {most_efficient_method.title()}方法")

print(f"\n✅ HNSW方法对比测试完成！")


In [None]:
# === 三种HNSW方法对比测试框架 ===
print("=== 三种HNSW方法对比测试框架 ===")

from io_utils import read_fbin, read_ibin
import faiss
import numpy as np
import time
import json
from collections import defaultdict
import matplotlib.pyplot as plt

# 导入三种HNSW方法
import hnsw_cosine_status as hnsw_status
import hnsw_cosine_status_high as hnsw_high
import simple_sim_hash

print(faiss.__version__)

# 数据路径
file_path = "/root/code/vectordbindexing/Text2Image/base.1M.fbin"
query_path = "/root/code/vectordbindexing/Text2Image/query.public.100K.fbin"

# 读取数据集
print("\n=== 读取数据集 ===")
print("读取图像向量...")
data_vector = read_fbin(file_path)
print(f"图像向量: {data_vector.shape}, dtype: {data_vector.dtype}")

print("读取查询向量...")
query_vector = read_fbin(query_path)
print(f"查询向量: {query_vector.shape}, dtype: {query_vector.dtype}")

# 使用前100K数据进行测试
train_data_vector = data_vector[:100000]
print(f"使用训练数据: {train_data_vector.shape}")

# 数据分割：9/10训练，1/10测试
total_queries = len(query_vector)
train_query_size = int(total_queries * 0.9)
test_query_size = total_queries - train_query_size

train_queries = query_vector[:train_query_size]
test_queries = query_vector[train_query_size:]

print(f"\n数据分割:")
print(f"  总查询数: {total_queries}")
print(f"  训练查询数: {train_query_size}")
print(f"  测试查询数: {test_query_size}")


In [None]:
# === 步骤1: 创建FAISS HNSW索引作为Baseline ===
print("\n=== 步骤1: 创建FAISS HNSW索引作为Baseline ===")

# 创建FAISS HNSW索引
dimension = 200
faiss_index = faiss.IndexHNSWFlat(dimension, 32)  # M=32

# 添加数据到FAISS索引
print("添加数据到FAISS索引...")
start_time = time.time()
faiss_index.add(train_data_vector.astype('float32'))
build_time = time.time() - start_time
print(f"FAISS索引构建完成，耗时: {build_time:.2f}秒")
print(f"索引大小: {faiss_index.ntotal} 个向量")

# 设置搜索参数（使用较宽的beam width获得更准确的结果）
faiss_index.hnsw.efSearch = 512  # 使用较大的efSearch
print(f"FAISS搜索参数: efSearch={faiss_index.hnsw.efSearch}")

# 对训练查询进行FAISS搜索，生成ground truth
print("\n对训练查询进行FAISS搜索...")
faiss_ground_truth = {}
faiss_search_efforts = []

for i, query in enumerate(train_queries):
    start_time = time.time()
    # 搜索top100
    distances, indices = faiss_index.search(query.reshape(1, -1).astype('float32'), 100)
    search_time = time.time() - start_time
    
    faiss_ground_truth[i] = indices[0].tolist()
    faiss_search_efforts.append(search_time * 1000)  # 转换为毫秒
    
    if (i + 1) % 1000 == 0:
        print(f"  完成 {i+1}/{train_query_size} 个查询")

print(f"\nFAISS搜索统计:")
print(f"平均搜索时间: {np.mean(faiss_search_efforts):.2f}ms")
print(f"搜索时间标准差: {np.std(faiss_search_efforts):.2f}ms")

# 计算Effort的P10-P99分位数，并记录对应的query id
print("\n=== 计算Effort分位数 ===")
percentiles = [10, 25, 50, 75, 90, 95, 99]
effort_percentiles = {}
query_ids_by_percentile = {}

for p in percentiles:
    percentile_value = np.percentile(faiss_search_efforts, p)
    effort_percentiles[p] = percentile_value
    
    # 找到effort接近该分位数的query id
    effort_array = np.array(faiss_search_efforts)
    closest_idx = np.argmin(np.abs(effort_array - percentile_value))
    query_ids_by_percentile[p] = closest_idx
    
    print(f"P{p}: {percentile_value:.2f}ms, Query ID: {closest_idx}")

# 保存结果到文件
print("\n=== 保存Ground Truth和Effort数据 ===")
ground_truth_file = "/root/code/vectordbindexing/faiss_ground_truth.json"
with open(ground_truth_file, 'w') as f:
    json.dump(faiss_ground_truth, f, indent=2)
print(f"Ground Truth已保存到: {ground_truth_file}")

effort_data = {
    'effort_percentiles': effort_percentiles,
    'query_ids_by_percentile': query_ids_by_percentile,
    'all_efforts': faiss_search_efforts,
    'statistics': {
        'mean': np.mean(faiss_search_efforts),
        'std': np.std(faiss_search_efforts),
        'min': np.min(faiss_search_efforts),
        'max': np.max(faiss_search_efforts)
    }
}

effort_file = "/root/code/vectordbindexing/faiss_effort_analysis.json"
with open(effort_file, 'w') as f:
    json.dump(effort_data, f, indent=2)
print(f"Effort分析已保存到: {effort_file}")


In [None]:
# === 步骤2: 创建三种HNSW索引 ===
print("\n=== 步骤2: 创建三种HNSW索引 ===")

# 通用参数
M = 64
ef_construction = 128
ef_search = 32  # 测试时使用ef_search=32
random_seed = 1

# 创建LSH对象
simHash = simple_sim_hash.SimpleSimHash(dim=200)

# 1. Status方法 (RoarGraph)
print("\n--- 创建Status方法 (RoarGraph) ---")
start_time = time.time()
status_index = hnsw_status.HNSWIndex(M=M, ef_construction=ef_construction, ef_search=ef_search, random_seed=random_seed)

# 添加训练数据
for img_id, vec in enumerate(train_data_vector):
    status_index.add_item_fast10k(vec, lsh=simHash, limit=100)

# 添加训练查询
for qid, vec in enumerate(train_queries):
    status_index.add_item_fast10k(vec, lsh=simHash, limit=100)

status_build_time = time.time() - start_time
print(f"Status索引构建完成，耗时: {status_build_time:.2f}秒")
print(f"Status索引大小: {len(status_index.items)} 个向量")

# 2. High方法 (高层新增边) - 需要修改逻辑
print("\n--- 创建High方法 (高层新增边) ---")
start_time = time.time()
high_index = hnsw_high.HNSWIndex(M=M, ef_construction=ef_construction, ef_search=ef_search, random_seed=random_seed)

# 添加训练数据
for img_id, vec in enumerate(train_data_vector):
    high_index.add_item_fast10k(vec, lsh=simHash, limit=100)

# 添加训练查询
for qid, vec in enumerate(train_queries):
    high_index.add_item_fast10k(vec, lsh=simHash, limit=100)

high_build_time = time.time() - start_time
print(f"High索引构建完成，耗时: {high_build_time:.2f}秒")
print(f"High索引大小: {len(high_index.items)} 个向量")

# 3. Norm方法 (高维映射) - 暂时使用相同的索引，后续可以扩展
print("\n--- 创建Norm方法 (高维映射) ---")
# 注意：这里暂时使用相同的实现，实际应用中需要实现高维映射逻辑
norm_index = hnsw_status.HNSWIndex(M=M, ef_construction=ef_construction, ef_search=ef_search, random_seed=random_seed)

# 添加训练数据
for img_id, vec in enumerate(train_data_vector):
    norm_index.add_item_fast10k(vec, lsh=simHash, limit=100)

# 添加训练查询
for qid, vec in enumerate(train_queries):
    norm_index.add_item_fast10k(vec, lsh=simHash, limit=100)

norm_build_time = time.time() - start_time
print(f"Norm索引构建完成，耗时: {norm_build_time:.2f}秒")
print(f"Norm索引大小: {len(norm_index.items)} 个向量")

print(f"\n=== 索引构建统计 ===")
print(f"Status方法: {status_build_time:.2f}秒")
print(f"High方法: {high_build_time:.2f}秒")
print(f"Norm方法: {norm_build_time:.2f}秒")


In [None]:
# === 步骤3: 测试三种方法的Recall性能 ===
print("\n=== 步骤3: 测试三种方法的Recall性能 ===")

def calculate_recall_at_k(predicted_ids, ground_truth_ids, k):
    """
    计算recall@k
    
    Args:
        predicted_ids: 预测的top-k结果
        ground_truth_ids: ground truth结果
        k: top-k值
    
    Returns:
        recall@k值
    """
    # 取前k个预测结果
    top_k_pred = set(predicted_ids[:k])
    
    # ground truth已经是正确的索引
    valid_gt = set(ground_truth_ids)
    
    if len(valid_gt) == 0:
        return 0.0
    
    # 计算交集
    intersection = top_k_pred.intersection(valid_gt)
    
    # recall@k = |intersection| / |ground_truth|
    recall = len(intersection) / len(valid_gt)
    return recall

# 测试三种方法的recall性能
methods = {
    'Status (RoarGraph)': status_index,
    'High (高层新增边)': high_index,
    'Norm (高维映射)': norm_index
}

recall_results = {}
search_time_results = {}

# 使用测试集 (后1/10的查询) 进行测试
test_sample_size = min(1000, len(test_queries))  # 使用1000个测试查询
test_sample_queries = test_queries[:test_sample_size]

print(f"使用 {test_sample_size} 个测试查询进行recall测试...")

for method_name, index in methods.items():
    print(f"\n--- 测试 {method_name} ---")
    
    recalls = []
    search_times = []
    
    for i, query in enumerate(test_sample_queries):
        # 获取对应的ground truth (需要调整索引)
        gt_query_id = train_query_size + i
        ground_truth_ids = faiss_ground_truth.get(str(gt_query_id % train_query_size), [])
        
        # 搜索
        start_time = time.time()
        results = index.query(query, k=100, ef=ef_search)
        search_time = time.time() - start_time
        
        # 计算recall@100
        recall = calculate_recall_at_k(results, ground_truth_ids, 100)
        
        recalls.append(recall)
        search_times.append(search_time * 1000)  # 转换为毫秒
        
        if (i + 1) % 100 == 0:
            print(f"  完成 {i+1}/{test_sample_size} 个查询")
    
    # 统计结果
    recall_results[method_name] = {
        'mean': np.mean(recalls),
        'std': np.std(recalls),
        'min': np.min(recalls),
        'max': np.max(recalls),
        'p50': np.percentile(recalls, 50),
        'p95': np.percentile(recalls, 95)
    }
    
    search_time_results[method_name] = {
        'mean': np.mean(search_times),
        'std': np.std(search_times),
        'min': np.min(search_times),
        'max': np.max(search_times)
    }
    
    print(f"  平均recall@100: {recall_results[method_name]['mean']:.3f}")
    print(f"  平均搜索时间: {search_time_results[method_name]['mean']:.2f}ms")

# 显示recall结果汇总
print(f"\n=== Recall@100结果汇总 ===")
print(f"{'方法':<20} {'平均Recall':<12} {'标准差':<10} {'P50':<8} {'P95':<8}")
print("-" * 70)
for method_name, results in recall_results.items():
    print(f"{method_name:<20} {results['mean']:<12.3f} {results['std']:<10.3f} {results['p50']:<8.3f} {results['p95']:<8.3f}")

# 显示搜索时间汇总
print(f"\n=== 搜索时间结果汇总 (ms) ===")
print(f"{'方法':<20} {'平均时间':<12} {'标准差':<10} {'最小值':<10} {'最大值':<10}")
print("-" * 80)
for method_name, results in search_time_results.items():
    print(f"{method_name:<20} {results['mean']:<12.2f} {results['std']:<10.2f} {results['min']:<10.2f} {results['max']:<10.2f}")


In [None]:
# === 步骤4: 测试不同Recall目标下的搜索步长 ===
print("\n=== 步骤4: 测试不同Recall目标下的搜索步长 ===")

def find_ef_for_recall_target(index, queries, ground_truths, target_recall, k=100, ef_range=(32, 512)):
    """
    找到达到目标recall所需的最小ef_search值
    
    Args:
        index: HNSW索引
        queries: 查询向量列表
        ground_truths: 对应的ground truth列表
        target_recall: 目标recall值 (如0.90, 0.95)
        k: top-k值
        ef_range: ef_search搜索范围 (min, max)
    
    Returns:
        (optimal_ef, achieved_recall, search_steps): 最优ef值、达到的recall、搜索步数
    """
    ef_min, ef_max = ef_range
    
    # 测试不同的ef值
    test_efs = [ef_min, ef_min + (ef_max - ef_min) // 4, ef_min + (ef_max - ef_min) // 2, 
                ef_min + 3 * (ef_max - ef_min) // 4, ef_max]
    
    best_ef = ef_max
    best_recall = 0.0
    
    for ef in test_efs:
        recalls = []
        search_steps = []
        
        for query, gt in zip(queries, ground_truths):
            results, steps = index.query_with_steps(query, k=k, ef=ef)
            recall = calculate_recall_at_k(results, gt, k)
            recalls.append(recall)
            search_steps.append(steps)
        
        mean_recall = np.mean(recalls)
        mean_steps = np.mean(search_steps)
        
        if mean_recall >= target_recall and ef < best_ef:
            best_ef = ef
            best_recall = mean_recall
    
    # 计算最优ef对应的搜索步数
    final_recalls = []
    final_steps = []
    for query, gt in zip(queries, ground_truths):
        results, steps = index.query_with_steps(query, k=k, ef=best_ef)
        recall = calculate_recall_at_k(results, gt, k)
        final_recalls.append(recall)
        final_steps.append(steps)
    
    return best_ef, np.mean(final_recalls), np.mean(final_steps)

# 测试不同recall目标（使用P10-P99的查询）
recall_targets = [0.70, 0.80, 0.85, 0.90, 0.95]
recall_step_results = {}

print("测试不同recall目标下的搜索步长...")

for method_name, index in methods.items():
    print(f"\n--- 测试 {method_name} 的recall步长关系 ---")
    
    method_results = {}
    
    for target_recall in recall_targets:
        print(f"  测试目标recall={target_recall*100:.0f}%...")
        
        # 使用P10-P99的查询进行测试
        test_queries_for_recall = []
        test_ground_truths_for_recall = []
        
        for p in percentiles:
            query_id = query_ids_by_percentile[p]
            if query_id < len(train_queries):
                test_queries_for_recall.append(train_queries[query_id])
                ground_truth_ids = faiss_ground_truth.get(str(query_id), [])
                test_ground_truths_for_recall.append(ground_truth_ids)
        
        if len(test_queries_for_recall) > 0:
            optimal_ef, achieved_recall, search_steps = find_ef_for_recall_target(
                index, test_queries_for_recall, test_ground_truths_for_recall, target_recall, k=100
            )
            
            method_results[target_recall] = {
                'ef_search': optimal_ef,
                'achieved_recall': achieved_recall,
                'search_steps': search_steps
            }
            
            print(f"    目标recall: {target_recall*100:.0f}%")
            print(f"    最优ef_search: {optimal_ef}")
            print(f"    达到的recall: {achieved_recall:.3f}")
            print(f"    平均搜索步数: {search_steps:.1f}")
    
    recall_step_results[method_name] = method_results

# 显示结果汇总
print(f"\n=== 不同Recall目标下的搜索步长汇总 ===")
for method_name, method_results in recall_step_results.items():
    print(f"\n--- {method_name} ---")
    print(f"{'目标Recall':<12} {'最优ef':<10} {'达到Recall':<12} {'搜索步数':<10}")
    print("-" * 50)
    for target, results in method_results.items():
        print(f"{target*100:>8.0f}%{'':<4} {results['ef_search']:<10} {results['achieved_recall']:<12.3f} {results['search_steps']:<10.1f}")


In [None]:
# === 步骤5: 可视化结果和总结报告 ===
print("\n=== 步骤5: 可视化结果和总结报告 ===")

# 绘制对比图表
plt.figure(figsize=(20, 12))

# 子图1: Recall对比
plt.subplot(2, 3, 1)
method_names = list(recall_results.keys())
mean_recalls = [recall_results[method]['mean'] for method in method_names]
std_recalls = [recall_results[method]['std'] for method in method_names]

bars = plt.bar(method_names, mean_recalls, yerr=std_recalls, capsize=5, 
               color=['skyblue', 'lightgreen', 'lightcoral'])
plt.xlabel('方法')
plt.ylabel('平均Recall@100')
plt.title('三种方法的Recall性能对比')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

# 添加数值标签
for bar, mean_val in zip(bars, mean_recalls):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
             f'{mean_val:.3f}', ha='center', va='bottom')

# 子图2: 搜索时间对比
plt.subplot(2, 3, 2)
mean_times = [search_time_results[method]['mean'] for method in method_names]
std_times = [search_time_results[method]['std'] for method in method_names]

bars = plt.bar(method_names, mean_times, yerr=std_times, capsize=5,
               color=['orange', 'lightblue', 'lightpink'])
plt.xlabel('方法')
plt.ylabel('平均搜索时间 (ms)')
plt.title('三种方法的搜索时间对比')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

# 添加数值标签
for bar, mean_val in zip(bars, mean_times):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, 
             f'{mean_val:.1f}', ha='center', va='bottom')

# 子图3: Recall vs 搜索步数关系
plt.subplot(2, 3, 3)
colors = ['blue', 'green', 'red']
markers = ['o', 's', '^']

for i, (method_name, method_results) in enumerate(recall_step_results.items()):
    if method_results:
        targets = list(method_results.keys())
        steps = [method_results[target]['search_steps'] for target in targets]
        recalls = [method_results[target]['achieved_recall'] for target in targets]
        
        plt.plot(steps, recalls, marker=markers[i], color=colors[i], 
                linewidth=2, markersize=8, label=method_name)

plt.xlabel('搜索步数')
plt.ylabel('Recall@100')
plt.title('Recall vs 搜索步数关系')
plt.legend()
plt.grid(True, alpha=0.3)

# 子图4: FAISS Baseline搜索时间分布
plt.subplot(2, 3, 4)
plt.hist(faiss_search_efforts, bins=50, alpha=0.7, edgecolor='black', color='lightgray')
plt.xlabel('搜索时间 (ms)')
plt.ylabel('频次')
plt.title('FAISS Baseline搜索时间分布')
plt.grid(True, alpha=0.3)

# 添加分位数线
for p, value in effort_percentiles.items():
    plt.axvline(value, color='red', linestyle='--', alpha=0.7, linewidth=1)

# 子图5: 不同ef_search下的Recall对比
plt.subplot(2, 3, 5)
# 这里可以添加不同ef_search值的对比
ef_values = [32, 64, 128, 256]
recall_by_ef = {}

for method_name, index in methods.items():
    recall_by_ef[method_name] = []
    for ef in ef_values:
        recalls = []
        for i, query in enumerate(test_sample_queries[:100]):  # 使用100个查询快速测试
            gt_query_id = train_query_size + i
            ground_truth_ids = faiss_ground_truth.get(str(gt_query_id % train_query_size), [])
            results = index.query(query, k=100, ef=ef)
            recall = calculate_recall_at_k(results, ground_truth_ids, 100)
            recalls.append(recall)
        recall_by_ef[method_name].append(np.mean(recalls))

for i, (method_name, recalls) in enumerate(recall_by_ef.items()):
    plt.plot(ef_values, recalls, marker=markers[i], color=colors[i], 
             linewidth=2, markersize=8, label=method_name)

plt.xlabel('ef_search')
plt.ylabel('平均Recall@100')
plt.title('不同ef_search下的Recall对比')
plt.legend()
plt.grid(True, alpha=0.3)

# 子图6: 索引构建时间对比
plt.subplot(2, 3, 6)
build_times = [status_build_time, high_build_time, norm_build_time]
bars = plt.bar(method_names, build_times, color=['gold', 'silver', 'bronze'])
plt.xlabel('方法')
plt.ylabel('构建时间 (秒)')
plt.title('索引构建时间对比')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

# 添加数值标签
for bar, time_val in zip(bars, build_times):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
             f'{time_val:.1f}s', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# 总结报告
print("\n=== 三种HNSW方法对比总结报告 ===")
print(f"\n1. 索引构建性能:")
for i, method_name in enumerate(method_names):
    print(f"   {method_name}: {build_times[i]:.2f}秒")

print(f"\n2. Recall@100性能 (ef_search=32):")
for method_name, results in recall_results.items():
    print(f"   {method_name}: {results['mean']:.3f} ± {results['std']:.3f}")

print(f"\n3. 搜索时间性能 (ef_search=32):")
for method_name, results in search_time_results.items():
    print(f"   {method_name}: {results['mean']:.2f}ms ± {results['std']:.2f}ms")

print(f"\n4. FAISS Baseline参考:")
print(f"   平均搜索时间: {np.mean(faiss_search_efforts):.2f}ms")
print(f"   搜索时间范围: {np.min(faiss_search_efforts):.2f}ms - {np.max(faiss_search_efforts):.2f}ms")

print(f"\n5. 性能建议:")
best_recall_method = max(recall_results.keys(), key=lambda x: recall_results[x]['mean'])
fastest_method = min(search_time_results.keys(), key=lambda x: search_time_results[x]['mean'])
print(f"   最佳Recall: {best_recall_method} ({recall_results[best_recall_method]['mean']:.3f})")
print(f"   最快搜索: {fastest_method} ({search_time_results[fastest_method]['mean']:.2f}ms)")

print(f"\n6. 下一步优化方向:")
print(f"   - 可以调整ef_search参数平衡recall和速度")
print(f"   - 可以优化索引构建参数提高质量")
print(f"   - 可以实现真正的高维映射Norm方法")
print(f"   - 可以优化高层新增边的逻辑")

In [None]:
- 仅 layer-1 的聚类；可能需要更高层的处理 (threshold)
- query辅助的问题 (从静态开始)
- 分析 映射-recall 问题 (第一阶段/第二阶段)

什么时候选择 多少个 入口节点？
- Hardness定义

In [None]:
- RoarGraph 源码校验(用同样的数据集 1M的 + 100K query)