In [9]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
# from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

documents = TextLoader("state_of_the_union.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever()
EMBEDDING_MODEL = "nomic-embed-text"
local_embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)
db = FAISS.from_documents(texts, local_embeddings,normalize_L2=True)
retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .1})
docs = retriever.invoke("What did the president say about Ketanji Brown Jackson")


In [10]:
# Helper function for printing docs


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [11]:
pretty_print_docs(docs)

Document 1:

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:

And my report is this: the State of the Union is strong—because you, the American peo

In [12]:
# 添加一个上下文过滤/上下文压缩
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_ollama import ChatOllama

model_name="deepseek-r1:8b"
llm = ChatOllama(model=model_name,
                            base_url="http://10.168.6.88:11434",temperature = 0,mirostat_tau=2.0,top_k=10,top_p=0.5
                            )
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

<think>
Okay, so I need to figure out what the president said about Ketanji Jackson Brown. Let me read through the context provided.

The context starts with the president talking about legislation he wants passed, like the Freedom to Vote Act and others. Then he honors Justice Stephen Breyer, mentioning his service and retirement. After that, the president talks about his responsibility in nominating someone for the Supreme Court and specifically mentions Judge Ketanji Brown Jackson, calling her one of the nation's top legal minds who will continue Justice Breyer’s legacy.

So, the relevant part is where he directly names Ketanji Brown Jackson and praises her as a top legal mind. That should be the extracted part.
</think>

The president mentioned Ketanji Brown Jackson by name, describing her as one of the nation's top legal minds who will continue Justice Breyer’s legacy.

Extracted relevant part:
>>>
And I did that 4 days ago, when I nominated Circuit Court of Appeals J

In [13]:
# 另一种更简单但更强大的过滤器
from langchain.retrievers.document_compressors import LLMChainFilter

_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
