<h1> Contextual Compression Retriever

In [4]:
# Highly relevant information might be buried within documents containing large amounts of irrelevant text.
# Passing such entire documents to an application can lead to costly LLM calls and lower-quality responses.
# The ContextualCompressionRetriever is designed to address this issue by compressing retrieved documents using the context of the given query, 
# ensuring only relevant information is returned.

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_teddynote.document_compressors import LLMChainExtractor
from langchain.retrievers import ContextualCompressionRetriever
from langchain_openai import ChatOpenAI

In [None]:

# Load the file using TextLoader
loader = TextLoader("./appendix-keywords.txt")

# Split the text using CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)
texts = loader.load_and_split(text_splitter)

# Create a FAISS vector store and convert it to a retriever
retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever()

# Execute a query and retrieve relevant documents
docs = retriever.invoke("Tell me about monetary policy.")

# Print
pretty_print_docs(docs)

<h1> Contextual Compression


In [None]:

# Initialize the Language Model
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")  # OpenAI model

# Create a Document Compressor
compressor = LLMChainExtractor.from_llm(llm)

# Set up the ContextualCompressionRetriever
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever,
)

# Retrieve and Display Initial Results (compare before and after compression)
pretty_print_docs(retriever.invoke("Tell me about monetary policy."))

print("============== LLMChainExtractor 적용 후 ==================")

compressed_docs = compression_retriever.invoke("Tell me about monetary policy.")


<h1> EmbeddingFilter

In [None]:
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# Similarity threshold 0.86
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.86)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke("Tell me about monetary policy.")

pretty_print_docs(compressed_docs)

<h1> Document Compressor Pipeline

In [None]:
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_text_splitters import CharacterTextSplitter


splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)

# Create a Redundancy Filter (filter out redundant text)
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)

# Set Up a Relevance Filter
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.86)

# Combine into a DocumentCompressorPipeline
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[
        splitter,
        redundant_filter,
        relevant_filter,
        LLMChainExtractor.from_llm(llm),
    ]
)