### 后检索优化
    1.重排序-RRF
        重定义class RRFMultiQueryRetriever(MultiQueryRetriever)
            多路召回结果不去重
            根据多路召回的结果进行RRF排序
    2.重排序-CrossEncoderReranker
        model = HuggingFaceCrossEncoder(model_name=model_path, model_kwargs={'device': 'cpu'})
        compressor = CrossEncoderReranker(model=model, top_n=3)
        compression_retriever = ContextualCompressionRetriever(
            base_compressor=compressor,
            base_retriever=retriever  # retriever = 混合检索 或 multi-query
        )
    3.重排序-LongContextReorder
        将重要文本放到长文本的开头或结尾，通常会获得最佳性能
    4.压缩过滤-LLMChainExtractor
      让 LLM 在文档内部做 “抽取式过滤”，把无关内容裁掉，只保留与查询有关的部分
        内容过滤
        内容压缩
        去除无关部分
    5.压缩过滤-LLMChainFilter
      将文档与查询一起输入给 LLM，由 LLM 判断该文档是否与查询足够相关。如果不相关直接丢弃，不进入后续处理链
    6.压缩过滤-EmbeddingsFilter
        EmbeddingsFilter 通过计算查询与文档的 embedding 相似度，根据阈值判断是否保留文档
        与LLMChainFilter类似，但LLMChainFilter = 让 LLM 做语义判断的“智能过滤器（Keep/Drop）
    7.冗余过滤-EmbeddingsRedundantFilter
        对于相似度超过阈值的文档对，只保留其中一个
        redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings_model, similarity_threshold=0.95)

In [None]:
# 1.冲排序-RRF
import logging
from typing import List
from uuid import uuid4

from langchain_chroma import Chroma
from langchain_classic.retrievers import MultiQueryRetriever
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_community.embeddings.dashscope import DashScopeEmbeddings
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

# 格式化输出内容
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i + 1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

# 1. 模型初始化
llm = ChatTongyi(model="qwen-max")
embeddings_model = DashScopeEmbeddings(model="text-embedding-v1")

# 2. 准备数据
texts = [
    "人工智能在医疗诊断中的应用。",
    "人工智能如何提升供应链效率。",
    "NBA季后赛最新赛况分析。",
    "传统法式烘焙的五大技巧。",
    "红楼梦人物关系图谱分析。",
    "人工智能在金融风险管理中的应用。",
    "人工智能如何影响未来就业市场。",
    "人工智能在制造业的应用。",
    "今天天气怎么样",
    "人工智能伦理：公平性与透明度。"
]

# 为每个文档生成唯一ID和元数据
ids = [str(uuid4()) for _ in range(len(texts))]  # 生成UUID作为ID
metadatas = [{"source": f"doc_{i + 1}", "id": f"{ids[i]}"} for i in range(len(texts))]  # 可选的元数据

# 3.向量存储
vectorstore = Chroma.from_texts(
    texts=texts,
    embedding=embeddings_model,
    ids=ids,
    metadatas=metadatas,
    collection_name="rrf"
)
retriever = vectorstore.as_retriever()

# 4. 多query查询召回器重构
# 重写MultiQueryRetriever类，取消unique_union去重，且保留每个问题检索结果的
class RRFMultiQueryRetriever(MultiQueryRetriever):
    # 改写retrieve_documents方法，返回rrf结果
    def retrieve_documents(
            self, queries: List[str], run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        documents = []
        for query in queries:
            docs = self.retriever.invoke(
                query, config={"callbacks": run_manager.get_child()}
            )
            # 原代码中extend修改为append，保持不同检索系统的结构
            documents.append(docs)

        documents = self.rrf_documents(documents)
        return documents

    def rrf_documents(self, documents: list[list[Document]], k=60) -> List[Document]:
        # 初始化rrf字典（key=文档id，value={"rrf_score":累计分数,"doc":文档对象}）
        rrf_scores = {}
        # 遍历每个检索结果列表（每个查询对应的结果）
        for docs in documents:
            # 为每个文档列表计算排名（从1开始）
            for rank, doc in enumerate(docs, 1):
                # 计算当前文档的RRF分数
                rrf_score = 1 / (k + rank)
                # 如果文档已经在字典中，累加RRF分数
                if doc.metadata.get("id") in rrf_scores:
                    rrf_scores[doc.metadata.get("id")]['rrf_score'] += rrf_score
                else:
                    rrf_scores[doc.metadata.get("id")] = {'rrf_score': rrf_score, 'doc': doc}

        # 将字典转换为列表，并根据字段value：RRF分数排序
        sorted_docs = sorted(
            rrf_scores.values(),
            key=lambda x: x['rrf_score'],
            reverse=True  # 降序排列：从大到小
        )

        result = [item['doc'] for item in sorted_docs]

        return result


# 5. 检索
rrf_retriever = RRFMultiQueryRetriever.from_llm(
    retriever=retriever,
    llm=llm,
    include_original=True  # 是否包含原始查询
)

rrf_docs = rrf_retriever.invoke("人工智能的应用")

pretty_print_docs(rrf_docs)


In [None]:
# 2.CrossEncoderReranker+ContextualCompressionRetriever
import os

from langchain_chroma import Chroma
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_community.embeddings import DashScopeEmbeddings

"""
1. CrossEncoderReranker 初始化
    compressor = CrossEncoderReranker(model=model, top_n=3)
    作用：创建一个基于交叉编码器(Cross-Encoder)的重排序器
    参数解析：
        model: 使用的重排序模型实例(这里是HuggingFaceCrossEncoder)
        top_n: 指定保留前多少个重排序后的结果(这里设为3)

    工作原理：
        接收初始检索结果(通常来自向量检索)
        对每个文档与查询(query)的相关性进行精细评分
        根据评分重新排序文档
        只保留top_n个最相关的文档

2. ContextualCompressionRetriever 创建
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor,
        base_retriever=retriever
    )
    作用：将基础检索器和重排序器组合成一个压缩检索器
    参数解析：
        base_compressor: 上面创建的CrossEncoderReranker实例
        base_retriever: 基础检索器(这里是Chroma向量检索器)

工作流程：
    首先使用base_retriever获取初步检索结果
    然后使用base_compressor对这些结果进行重排序和过滤

3. 检索过程
    compressed_docs = compression_retriever.invoke("人工智能的应用")
    执行流程：
        向量检索器(retriever)首先找到与"人工智能的应用"相关的文档(基于向量相似度)
        重排序器(compressor)对这些文档进行更精细的相关性评估：
        计算查询与每个文档的交叉注意力
        生成更准确的相关性分数
        根据重排序分数，只保留前3个最相关的文档
"""


# 格式化输出内容
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i + 1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


# 1. 数据准备
texts = [
    "人工智能在医疗诊断中的应用。",
    "人工智能如何提升供应链效率。",
    "NBA季后赛最新赛况分析。",
    "传统法式烘焙的五大技巧。",
    "红楼梦人物关系图谱分析。",
    "人工智能在金融风险管理中的应用。",
    "人工智能如何影响未来就业市场。",
    "人工智能在制造业的应用。",
    "今天天气怎么样",
    "人工智能伦理：公平性与透明度。"
]

# 2.文档向量化并存储
embeddings_model = DashScopeEmbeddings(model="text-embedding-v1")
vectorstore = Chroma.from_texts(
    texts=texts,
    embedding=embeddings_model,
    collection_name="rrf"
)
retriever = vectorstore.as_retriever()

# 3.排序模型
MODEL_DIR = "/mnt/c/大模型/智泊大模型全栈教程总结/02-教材整理 L2/代码/Langchain/data/BAAI"
model_name = "bge-reranker-large"
model_path = os.path.join(MODEL_DIR, model_name)
model = HuggingFaceCrossEncoder(model_name=model_path, model_kwargs={'device': 'cpu'})

# 4.重排序模型初始化
compressor = CrossEncoderReranker(model=model, top_n=3)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever  # retriever = 混合检索 或 multi-query
)

# 5.召回并重排序
compressed_docs = compression_retriever.invoke("人工智能的应用")
pretty_print_docs(compressed_docs)


In [None]:
# 3.LongContextReorder
from langchain_community.document_transformers import LongContextReorder

"""
根据论文（Lost in the Middle: How Language Models Use Long Contexts）当关键数据位于输入上下文的开头或结尾时，
通常会获得最佳性能。为了减轻 “lost in the middle”的影响，可以在检索后重新排序文档，使最相关的文档置于极值
（例如，上下文的第一和最后一部分），将最不相关的文档置于中间。
"""
# 5,4,3,2,1
# 倒排：1,2,3,4,5
# index%2=0: 往第一个放，index%2=1 往最后放

documents = [
    "相关性:5",
    "相关性:4",
    "相关性:3",
    "相关性:2",
    "相关性:1",
]

reordering = LongContextReorder()
reordered_docs = reordering.transform_documents(documents)

print(reordered_docs)


In [None]:
# 4.LLMChainExtractor
import os

from langchain_chroma import Chroma
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import LLMChainExtractor
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

"""
文档分块检索后，与查询最相关的信息可能隐藏在一个包含大量不相关文本的文档中，输入给LLM，可能会导致更昂贵的LLM调用和较差的响应（噪声）。
"""


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i + 1}:\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


# 1.数据准备
RESOURCE_DIR = "/mnt/c/大模型/智泊大模型全栈教程总结/02-教材整理 L2/代码/Langchain/6.langchain高级RAG/data/resources"
TXT_DOCUMENT_PATH = os.path.join(RESOURCE_DIR, "deepseek百度百科.txt")
loader = TextLoader(TXT_DOCUMENT_PATH, encoding='utf-8')
documents = loader.load()

# 2.向量化并索引
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
chunks = text_splitter.split_documents(documents)
embeddings_model = DashScopeEmbeddings(model="text-embedding-v1")
retriever = Chroma.from_documents(chunks, embeddings_model).as_retriever()

# 3.测试召回
docs = retriever.invoke("deepseek的发展历程")

print("\n1. 压缩前" + '-' * 100)
pretty_print_docs(docs)

# 4.压缩过滤
llm = ChatTongyi(model="qwen-max")
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)
compressed_docs = compression_retriever.invoke("deepseek的发展历程")

print("\n2. 压缩后" + '-' * 100)
pretty_print_docs(compressed_docs)


In [None]:
# 5.压缩过滤-LLMChainFilter
import os

from langchain_chroma import Chroma
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import LLMChainFilter
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i + 1}:\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


# 1.数据准备
RESOURCE_DIR = "/mnt/c/大模型/智泊大模型全栈教程总结/02-教材整理 L2/代码/Langchain/6.langchain高级RAG/data/resources"
TXT_DOCUMENT_PATH = os.path.join(RESOURCE_DIR, "deepseek百度百科.txt")
loader = TextLoader(TXT_DOCUMENT_PATH, encoding='utf-8')
documents = loader.load()

# 2.向量化并索引
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
chunks = text_splitter.split_documents(documents)
embeddings_model = DashScopeEmbeddings(model="text-embedding-v1")
retriever = Chroma.from_documents(chunks, embeddings_model).as_retriever()

# 3.测试召回
docs = retriever.invoke("deepseek的发展历程")
print("\n1.向量召回" + '-' * 100)
pretty_print_docs(docs)

# 4. 过滤器
'''
Given the following question and context, return YES if the context is relevant to the question and NO if it isn't.
> Question: {question}
> Context:
>>>
{context}
>>>
> Relevant (YES / NO):

给定以下问题和上下文，如果上下文与问题相关，则返回YES，否则返回NO。
问题：{question}
上下文：
>>>
{context}
>>>
相关性（是/否）：
'''
llm = ChatTongyi(model="qwen-max")
_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter,
    base_retriever=retriever
)

# 5. 过滤召回
compressed_docs = compression_retriever.invoke("deepseek的发展历程")

print("\n2.LLMChainFilter过滤后" + '-' * 100)
pretty_print_docs(compressed_docs)


In [None]:
# 6.压缩过滤-EmbeddingsFilter
import os

from langchain_chroma import Chroma
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import EmbeddingsFilter
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i + 1}:\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


# 1.数据准备
RESOURCE_DIR = "/mnt/c/大模型/智泊大模型全栈教程总结/02-教材整理 L2/代码/Langchain/6.langchain高级RAG/data/resources"
TXT_DOCUMENT_PATH = os.path.join(RESOURCE_DIR, "deepseek百度百科.txt")
loader = TextLoader(TXT_DOCUMENT_PATH, encoding='utf-8')
documents = loader.load()

# 2.向量化并索引
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
chunks = text_splitter.split_documents(documents)
embeddings_model = DashScopeEmbeddings(model="text-embedding-v1")
retriever = Chroma.from_documents(chunks, embeddings_model).as_retriever()

# 3.测试召回
docs = retriever.invoke("deepseek的发展历程")
print("\n1.向量召回" + '-' * 100)
pretty_print_docs(docs)

# 4. 过滤器
# 对检索到的文档块与查询进行相似度计算，如果相似度大于0.66，则保留该文档块，否则过滤掉
embeddings_filter = EmbeddingsFilter(embeddings=embeddings_model, similarity_threshold=0.66)

# 5. 过滤召回
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter,
    base_retriever=retriever
)

compressed_docs = compression_retriever.invoke("deepseek的发展历程")

print("\n2.过滤后" + '-' * 100)
pretty_print_docs(compressed_docs)



In [None]:
# 7.冗余过滤
import os

from langchain_chroma import Chroma
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
from langchain_community.document_loaders import TextLoader
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i + 1}:\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


# 1.数据准备
RESOURCE_DIR = "/mnt/c/大模型/智泊大模型全栈教程总结/02-教材整理 L2/代码/Langchain/6.langchain高级RAG/data/resources"
TXT_DOCUMENT_PATH = os.path.join(RESOURCE_DIR, "deepseek百度百科.txt")
loader = TextLoader(TXT_DOCUMENT_PATH, encoding='utf-8')
documents = loader.load()

# 2.向量化并索引
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
chunks = text_splitter.split_documents(documents)
embeddings_model = DashScopeEmbeddings(model="text-embedding-v1")
retriever = Chroma.from_documents(chunks, embeddings_model).as_retriever()

# 3.测试召回
docs = retriever.invoke("deepseek的发展历程")
print("\n1.向量召回" + '-' * 100)
pretty_print_docs(docs)

# 4. 过滤器
# 默认文档间相似度超过0.95则为冗余文档
# 计算所有文档之间的嵌入向量相似度
# 对于相似度超过阈值的文档对，只保留其中一个
# 直接使用  redundant_filter.transform_documents(documents)
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings_model, similarity_threshold=0.95)
# 根据问题与文档的相似度过滤
relevant_filter = EmbeddingsFilter(embeddings=embeddings_model, similarity_threshold=0.66)

# 首先应用redundant_filter去除冗余文档
# 然后应用relevant_filter去除与查询不相关的文档
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[redundant_filter, relevant_filter]
)

# 5. 过滤召回
# 首先通过基础检索器(Chroma)获取初步检索结果
# 然后通过压缩管道对结果进行过滤和优化
# 最终返回精炼后的、高质量的文档集
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)
compressed_docs = compression_retriever.invoke("deepseek的发展历程")

print("\n2.过滤后" + '-' * 100)
pretty_print_docs(compressed_docs)


