# RAG 实验设计
本实验旨在评估不同检索方法（BM25、M3E、多路召回、融合召回、重排序）在RAG（Retrieval-Augmented Generation）系统中的表现。
我们将比较 Recall@10, MRR 和 NDCG@5 三项指标。

In [1]:
# 安装必要库（如未安装）
# !pip install rank_bm25 transformers faiss-cpu datasets sklearn
import numpy as np
from sklearn.metrics import ndcg_score
from typing import List
import random

# 示例数据生成
queries = [f"query_{i}" for i in range(100)]
corpus = [f"doc_{j}" for j in range(1000)]
ground_truth = {q: random.sample(corpus, 1) for q in queries}

## 定义评估函数

In [2]:
def evaluate_ranking(predictions: List[List[str]], ground_truth: dict, k=10):
    recall_scores, mrr_scores, ndcg_scores = [], [], []
    for i, q in enumerate(queries):
        preds = predictions[i][:k]
        gt = ground_truth[q][0]

        recall = 1 if gt in preds else 0
        recall_scores.append(recall)

        if gt in preds:
            rank = preds.index(gt) + 1
            mrr_scores.append(1 / rank)
        else:
            mrr_scores.append(0)

        relevance = [1 if doc == gt else 0 for doc in preds]
        ndcg_scores.append(ndcg_score([relevance], [list(range(len(relevance), 0, -1))]))

    return np.mean(recall_scores), np.mean(mrr_scores), np.mean(ndcg_scores)

## 模拟不同召回方法

In [3]:
# 用于模拟不同方法的召回（简单随机模拟）
def simulate_method(hit_ratio=0.7):
    predictions = []
    for q in queries:
        gt = ground_truth[q][0]
        docs = [gt] if random.random() < hit_ratio else []
        docs += random.sample([d for d in corpus if d != gt], 20)
        random.shuffle(docs)
        predictions.append(docs)
    return predictions

In [4]:
methods = {
    "BM25": 0.68,
    "M3E": 0.72,
    "多路召回": 0.84,
    "融合召回": 0.89,
    "重排序": 0.93
}

results = {}
for method, ratio in methods.items():
    preds = simulate_method(ratio)
    recall, mrr, ndcg = evaluate_ranking(preds, ground_truth)
    results[method] = (round(recall, 2), round(mrr, 2), round(ndcg, 2))

for method, metrics in results.items():
    print(f"{method}: Recall@10={metrics[0]}, MRR={metrics[1]}, NDCG@5={metrics[2]}")

BM25: Recall@10=0.36, MRR=0.11, NDCG@5=0.16
M3E: Recall@10=0.32, MRR=0.09, NDCG@5=0.15
多路召回: Recall@10=0.44, MRR=0.12, NDCG@5=0.19
融合召回: Recall@10=0.45, MRR=0.14, NDCG@5=0.21
重排序: Recall@10=0.41, MRR=0.1, NDCG@5=0.17
