In [None]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

# Data Loaders

In [None]:
from langchain_openai import ChatOpenAI

chatModel = ChatOpenAI(model="gpt-4o-mini")

In [None]:
# Load Text File

from langchain_community.document_loaders import TextLoader

loader = TextLoader("./data/be-good.txt")

loaded_data = loader.load()

In [None]:
# Load CSV File
from langchain_community.document_loaders import CSVLoader

loader = CSVLoader('./data/Street_Tree_List.csv')

loaded_data = loader.load()

In [None]:
# Load HTML
from langchain_community.document_loaders import UnstructuredHTMLLoader

loader = UnstructuredHTMLLoader('./data/100-startups.html')

loaded_data = loader.load()


In [6]:
# Load PDF
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('./data/5pages.pdf')

loaded_data = loader.load_and_split()


In [7]:
# Load WIkipedia Page
from langchain_community.document_loaders import WikipediaLoader

name = "JFK"

loader = WikipediaLoader(query=name, load_max_docs=1)

loaded_data = loader.load()[0].page_content

In [8]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        ("human", "Answer this {question}, here is some extra {context}"),
    ]
)

messages = chat_template.format_messages(
    question="What was the full name of JFK",
    context=loaded_data
)

In [None]:
response = chatModel.invoke(messages)
response

Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"error":"Forbidden"}\\n\')')


AIMessage(content='The full name of JFK is John Fitzgerald Kennedy.', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 839, 'total_tokens': 849, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'audio_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_51db84afab', 'finish_reason': 'stop', 'logprobs': None}, id='run-efbf487c-bd03-4238-b848-961cc30cdd0c-0', usage_metadata={'input_tokens': 839, 'output_tokens': 10, 'total_tokens': 849})

Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"error":"Forbidden"}\\n\')')
Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"error":"Forbidden"}\\n\')')
Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"error":"Forbidden"}\\n\')')
Failed to batch ingest runs: LangSmithError('Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/batch\', \'{"error":"Forbidden"}\\n\')')
Failed t

# Splitters

## Reminder: steps of the RAG process.
* When you load a document, you end up with strings. Sometimes the strings will be too large to fit into the context window. In those occassions we will use the RAG technique:
    * **Split document in small chunks**.
    * Transform text chunks in numeric chunks (embeddings).
    * Load embeddings to a vector database (aka vector store).
    * Load question and retrieve the most relevant embeddings to respond it.
    * Sent the embeddings to the LLM to format the response properly.

## Splitters: divide the loaded document in small chunks of text
* Also called "Document Transformers".
* See the documentation page [here](https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/).
* See the list of built-in splitters [here](https://python.langchain.com/v0.1/docs/integrations/document_transformers/).

### Simple splitting by character: Character Splitter
* This splits based on characters (by default "\n\n") and measure chunk length by number of characters.
* The "Character Splitter" in the context of RAG (Retrieval-Augmented Generation) applications, specifically using LangChain's tools, is a method that divides text into smaller parts based on specific characters.
* By default, it uses double newline characters ("\n\n") to identify where one chunk of text ends and another begins.
* Each chunk is measured by its number of characters.
* This simple splitting method is useful in RAG applications to help manage and process large blocks of text by breaking them down into manageable, smaller pieces. This can enhance the efficiency and effectiveness of the text retrieval process, which is crucial in generating accurate and contextually relevant responses.

In [10]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./data/be-good.txt")

loaded_data = loader.load()

In [11]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

In [12]:
texts = text_splitter.create_documents([loaded_data[0].page_content])

In [13]:
len(texts)

2

In [17]:
texts[0]

#texts[1]

Document(page_content='Be good')

## Splitting with MetaData

In [20]:
metadatas = [{"this is metadata for first chunk": 0}, {"second chunk": 1}, {"third chunk": 3}]

documents = text_splitter.create_documents(
    [loaded_data[0].page_content, loaded_data[0].page_content], 
    metadatas=metadatas
)

The `CharacterTextSplitter` from LangChain's text splitting utilities helps break down text into manageable chunks along with the option to attach metadata to each chunk. Here’s how it works in simple terms:

1. **Initialization of the Splitter**: The splitter is configured with several parameters:
   - `separator`: This is the character or string at which the text will be split. In this example, it’s set to `"\n\n"`, which means the text will be divided at every occurrence of double newlines.
   - `chunk_size`: This determines the maximum size of each chunk in characters. Here, each chunk can be up to 1000 characters long.
   - `chunk_overlap`: This allows chunks to overlap by a specified number of characters, set here to 200. Overlapping helps ensure that no crucial information is cut off awkwardly at the end of a chunk.
   - `length_function`: This is typically a function to measure the length of the text; `len` simply counts the number of characters.
   - `is_separator_regex`: This indicates whether the separator is a regular expression or not. Here, it’s `False`, meaning the separator is treated as a literal string.

2. **Creating Chunks**: The `create_documents` method is used to split the text. It takes in the text (or texts) and optional metadata, and splits the text based on the defined parameters.

3. **Attaching Metadata**: Optionally, metadata can be attached to each chunk to provide additional context or identifiers. For example, metadata might tag each chunk with its sequence number or categorize it based on content. In the example, metadata like `{"chunk": 0}` could signify that it’s the first chunk.

#### Example Code Explanation:
- **First Splitter Use**: Initially, the splitter is used to split the content of `loaded_data[0].page_content` without metadata. This will just divide the text based on the provided settings.
- **Second Splitter Use**: The same text is then split again, but this time with metadata provided (`metadatas=[{"chunk": 0}, {"chunk": 1}]`). This second run distinguishes each chunk with additional information.

#### Example Outputs:
- **Number of Chunks**: This would typically be printed using `len(texts)`, showing how many chunks the text was split into.
- **First Chunk**: By printing `texts[0]`, you would see the content of the first chunk.
- **First Chunk with Metadata**: Printing `documents[0]` after the second splitting operation would show the first chunk of text along with its corresponding metadata, illustrating how metadata is associated with text chunks.

This method is particularly useful in applications where chunks of text need to be processed independently, but still require some form of contextual or sequential linking.

In [21]:
documents[0]

Document(metadata={'this is metadata for first chunk': 0}, page_content='Be good')

### Recursive Character Splitter
* This text splitter is the recommended one for generic text. 
* It is parameterized by a list of characters. It tries to split on them in order until the chunks are small enough. The default list is ["\n\n", "\n", " ", ""]. 
* This has the effect of trying to keep all paragraphs (and then sentences, and then words) together as long as possible, as those would generically seem to be the strongest semantically related pieces of text.

#### Simple Explanation:

* The "Recursive Character Splitter" is a method used to divide text into smaller, more manageable chunks, designed specifically to maintain the semantic integrity of the text.
* It operates by attempting to split the text using a list of characters in a specified order—beginning with the largest units like paragraphs, then moving to sentences, and finally to individual words if needed.
* The default sequence for splitting is ["\n\n", "\n", " ", ""], which means it first tries to split the text at double newline characters to separate paragraphs, then at single newlines for any remaining large blocks, followed by spaces to isolate sentences or phrases, and finally using an empty string if finer splitting is necessary.
* This method is particularly effective because it tries to keep text chunks as meaningful and complete as possible, ensuring that each chunk has a coherent piece of information.

#### Example of Use:

#### Original Text:
```
Hello, welcome to our store!

\n\nWe offer a variety of products. Our range includes electronics, clothing, and home appliances.
\nOur staff is available to help you during store hours: 9 AM to 9 PM every day.
```

#### Applying Recursive Character Splitter:
- First attempt with `"\n\n"`:
  1. **Chunk 1:** `Hello, welcome to our store!`
  2. **Chunk 2:** `We offer a variety of products. Our range includes electronics, clothing, and home appliances.\nOur staff is available to help you during store hours: 9 AM to 9 PM every day.`

- Second attempt with `"\n"` for remaining long chunk:
  1. **New Chunk 2:** `We offer a variety of products. Our range includes electronics, clothing, and home appliances.`
  2. **Chunk 3:** `Our staff is available to help you during store hours: 9 AM to 9 PM every day.`

- No further splits are necessary as all chunks are now of manageable size.

In this example, the text is initially split into two chunks using the double newline. Since one chunk is still quite long, it then uses a single newline to split it further. Each chunk retains coherent, complete information, reflecting the effective use of the recursive character splitter to preserve the semantic structure of the text.

In [22]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=26,
    chunk_overlap=4
)

In [23]:
text1 = 'abcdefghijklmnopqrstuvwxyzabcdefg'

In [24]:
text2 = """
Data that Speak
LLM Applications are revolutionizing industries such as 
banking, healthcare, insurance, education, legal, tourism, 
construction, logistics, marketing, sales, customer service, 
and even public administration.

The aim of our programs is for students to learn how to 
create LLM Applications in the context of a business, 
which presents a set of challenges that are important 
to consider in advance.
"""

In [25]:
recursive_splitter.split_text(text1)

['abcdefghijklmnopqrstuvwxyz', 'wxyzabcdefg']

In [26]:
recursive_splitter.split_text(text2)

['Data that Speak',
 'LLM Applications are',
 'are revolutionizing',
 'industries such as',
 'banking, healthcare,',
 'insurance, education,',
 'legal, tourism,',
 'construction, logistics,',
 'marketing, sales,',
 'customer service,',
 'and even public',
 'administration.',
 'The aim of our programs',
 'is for students to learn',
 'how to',
 'create LLM Applications',
 'in the context of a',
 'a business,',
 'which presents a set of',
 'of challenges that are',
 'are important',
 'to consider in advance.']

In [27]:
second_recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=0,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)

The separators `["\n\n", "\n", "(?<=\. )", " ", ""]` refer to a sequence of characters and patterns used to split text into smaller parts, with each separator serving a specific function based on the structure of text:

1. `"\n\n"` - This separator targets double newlines, which are often used to denote separate paragraphs or sections within a text. Splitting here is intended to separate distinct thematic or topical blocks of content.

2. `"\n"` - This targets single newline characters, which typically indicate a new line within a paragraph or a soft break in the content, such as between list items or sub-paragraphs.

3. `"(?<=\. )"` - This is a regular expression that looks for a period followed by a space, typically used to signify the end of a sentence. The `(?<= )` part is a "lookbehind" assertion in regex, which means it checks for the occurrence of a period and space before splitting, but does not include them in the split, thus keeping sentences intact.

4. `" "` - This targets single space characters, which are commonly used to separate words. Splitting on spaces can break down text into individual words or phrases, especially useful when finer granularity is needed.

5. `""` - An empty string as a separator is used in text processing to split a text into its individual characters, essentially decomposing the text down to its most basic elements.

Each of these separators is used to progressively split the text into smaller and smaller chunks, starting from larger structural divisions like paragraphs and moving down to the level of individual characters, depending on the needs of the application.

In [28]:
second_recursive_splitter.split_text(text2)

['Data that Speak\nLLM Applications are revolutionizing industries such as \nbanking, healthcare, insurance, education, legal, tourism,',
 'construction, logistics, marketing, sales, customer service, \nand even public administration.',
 'The aim of our programs is for students to learn how to \ncreate LLM Applications in the context of a business,',
 'which presents a set of challenges that are important \nto consider in advance.']

# Embeddings: transform chunks of text in chunks of numbers
* Vector databases work very fast with numbers.
* See the documentation page [here](https://python.langchain.com/v0.1/docs/modules/data_connection/text_embedding/).
* See the list of embedding model providers [here](https://python.langchain.com/v0.1/docs/integrations/text_embedding/).
* **The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query**.

In [29]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings()

In [30]:
chunks_of_text =     [
        "Hi there!",
        "Hello!",
        "What's your name?",
        "Bond, James Bond",
        "Hello Bond!"
    ]

In [31]:
embeddings = embeddings_model.embed_documents(chunks_of_text)

In [None]:
len(embeddings) # we embed 5 documents

5

In [37]:
len(embeddings[0])

1536

In [35]:
embedded_query = embeddings_model.embed_query("What was the name mentioned in the conversation?")

In [36]:
len(embedded_query)

1536

# Vector Store
* Store embeddings in a very fast searchable database.

## Reminder: Steps of the RAG process.
* When you load a document, you end up with strings. Sometimes the strings will be too large to fit into the context window. In those occassions we will use the RAG technique:
    * Split document in small chunks.
    * Transform text chunks in numeric chunks (embeddings).
    * **Load embeddings to a vector database (aka vector store)**.
    * Load question and retrieve the most relevant embeddings to respond it.
    * Sent the embeddings to the LLM to format the response properly.

The `.similarity_search` method in the code is used to find the most relevant text chunks from a pre-processed document that are similar to a given query. Here's how it works step-by-step

1. **Document Processing and Embedding:**
   - The document `state_of_the_union.txt` is loaded using the `TextLoader` from the LangChain Community package.
   - The document is then split into smaller chunks of text using the `CharacterTextSplitter`, where each chunk is 1000 characters long without any overlap between the chunks.
   - Each chunk of text is then transformed into an embedding using `OpenAIEmbeddings`. Embeddings are high-dimensional vectors that represent the semantic content of the text.
   - These embeddings are stored in an instance of `Chroma`, which serves as a vector database optimized for efficient similarity searches.

2. **Using `.similarity_search`:**
   - When you invoke `vector_db.similarity_search(question)`, the method converts the query (`"What did the president say about the John Lewis Voting Rights Act?"`) into an embedding using the same method that was used for the chunks.
   - It then searches the vector database (`vector_db`) to find the chunks whose embeddings are most similar to the embedding of the query. This similarity is typically measured using metrics such as cosine similarity.
   - The search results are sorted by relevance, with the most relevant chunks (those that are semantically closest to the query) returned first.

3. **Output:**
   - The result of the `.similarity_search` is stored in `response`, which contains the relevant chunks and their similarity scores.
   - The script prints the content of the most relevant chunk (the first result), which should ideally contain information about what the president said regarding the John Lewis Voting Rights Act.

This method is particularly useful in applications like question answering or document retrieval where you need to quickly find the most relevant parts of a large text based on a query.

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

# Specify encoding to avoid UnicodeDecodeError
loaded_document = TextLoader('data/state_of_the_union.txt', encoding='utf-8').load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

chunks_of_text = text_splitter.split_documents(loaded_document)

vector_db = Chroma.from_documents(chunks_of_text, OpenAIEmbeddings())

In [44]:
question = "What did the president say about the John Lewis Voting Rights Act?"

response = vector_db.similarity_search(question)

print(response[0].page_content)

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.


# Retrievers

## Vector Stores vs. Retrievers

1. **Purpose and Functionality**:
   - **Vector Stores**: These are specialized databases designed to store information in the form of vectors (high-dimensional data points that represent text or other information). Vector stores are primarily used for quickly searching and retrieving similar vectors based on a query vector. They are focused on efficiently handling similarity comparisons between the stored vectors and any query vector.
   - **Retrievers**: Retrievers are more general tools that use various methods, including vector stores, to find and return relevant documents or information in response to a query. A retriever doesn't necessarily store the information itself but knows how to access and retrieve it when needed.

2. **Storage vs. Retrieval**:
   - **Vector Stores**: As the name implies, these are primarily concerned with storing data in a structured way that makes it fast and efficient to perform similarity searches.
   - **Retrievers**: While they may utilize storage systems like vector stores, retrievers are more focused on the act of fetching the right information in response to a user's query. Their main job is to provide the end-user with the most relevant information or documents based on the input they receive.

3. **Flexibility**:
   - **Vector Stores**: These are somewhat limited in their scope to handling tasks that involve similarity searches within the stored vectors. They are a specific tool for specific types of data retrieval tasks.
   - **Retrievers**: They can be designed to use different back-end systems (like vector stores or other databases) and can be part of larger systems that may involve more complex data processing or response generation.

In summary, vector stores in LangChain are about how information is stored and efficiently accessed based on similarity, while retrievers are about using various methods (including vector stores) to actively fetch and return the right information in response to diverse queries.

## Retriever: returns a response given a question

1. **Retriever: returns a response given a question** - A retriever is a tool that provides specific pieces of information or documents when you ask a question or make a query.

2. **A retriever is an interface that returns documents given an unstructured query. It is more general than a vector store.** - A retriever works by taking a question that doesn't have a fixed format (an unstructured query) and finding relevant documents based on that question. It's a broad tool, more versatile than a vector store, which is just one way to organize information to make retrieval efficient.

3. **A retriever does not need to be able to store documents, only to return (or retrieve) them.** - The main job of a retriever is to find and return documents when asked; it doesn't have to store these documents itself. It can find documents stored elsewhere.

4. **Vector stores can be used as the backbone of a retriever, but there are other types of retrievers as well.** - Although many retrievers use a system called a vector store to help find the right documents quickly (a vector store organizes information into a format that's easy to search through), there are other ways to build a retriever that don't rely on this method.

Overall, a retriever helps you find information you need from a large amount of data by searching through documents, and it can use different methods to do this efficiently.
* See the documentation page [here](https://python.langchain.com/v0.1/docs/modules/data_connection/retrievers/).
* See the list of third-party retrievers [here](https://python.langchain.com/v0.1/docs/integrations/retrievers/).

## Differences .similarity_search vs. .as_retriever()
Both methods involve finding the most relevant text based on a query, but they are structured differently and may offer different functionalities based on their implementation.

#### `.similarity_search`

This method directly performs a similarity search against a vector database, which in your first code snippet is managed by the `Chroma` class. The process includes:
- Embedding the input query using the same model that was used to embed the document chunks.
- Searching the vector database for the closest vectors to the query's embedding.
- Returning the most relevant chunks based on their semantic similarity to the query.

This method is straightforward and typically used when you need to quickly find and retrieve the text segments that best match the query.

#### `.as_retriever()`

This method involves a different approach:
1. **Retriever Setup**: In the second code snippet, `vector_db.as_retriever()` converts the vector database (managed by `FAISS` in this case) into a retriever object. This object abstracts the similarity search into a retriever model that can be used in more complex retrieval-augmented generation (RAG) tasks.
2. **Invoke Method**: The `invoke()` function on the retriever is then used to perform the query. This method can be part of a larger system where the retriever is integrated with other components (like a language model) to generate answers or further process the retrieved documents.

#### Key Differences

- **Flexibility**: `.as_retriever()` provides a more flexible interface that can be integrated into larger, more complex systems, potentially combining retrieval with generation (like in RAG setups). This method is beneficial in applications where the retrieved content might be used as input for further processing or answer generation.
- **Backend Implementation**: While `.similarity_search` directly accesses the vector database, `.as_retriever()` encapsulates this access within a retriever object, which might have additional functionalities or optimizations for specific retrieval tasks.
- **Use Cases**: The direct `.similarity_search` might be faster and more straightforward for simple query-to-document retrieval tasks. In contrast, `.as_retriever()` could be used in scenarios requiring additional steps after retrieval, like feeding the retrieved information into a language model for generating coherent and context-aware responses.

Both methods are useful, but their appropriateness depends on the specific requirements of your application, such as whether you need straightforward retrieval or a more complex retrieval-augmented generation process.

In [45]:
retriever = vector_db.as_retriever()

In [46]:
# Simple use case without LCEL
response = retriever.invoke("what did he say about ketanji brown jackson?")

In [47]:
len(response)

4

In [48]:
response[0]

Document(metadata={'source': 'data/state_of_the_union.txt'}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.')

In [49]:
response

[Document(metadata={'source': 'data/state_of_the_union.txt'}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'),
 Document(metadata={'source': 'data/state_of_the_union.txt'}, page_content='A former top litigator in private practice. A forme

# Top K
* Decide how many embeddings will be retrieved to build the answer to your question

In [50]:
question = "What did the president say about the John Lewis Voting Rights Act?"

response = vector_db.similarity_search(question)

print(response[0].page_content)

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.


## Retriever: returns a response given a question
* A retriever is an interface that returns documents given an unstructured query. It is more general than a vector store.
* A retriever does not need to be able to store documents, only to return (or retrieve) them.
* Vector stores can be used as the backbone of a retriever, but there are other types of retrievers as well.
* See the documentation page [here](https://python.langchain.com/v0.1/docs/modules/data_connection/retrievers/).
* See the list of third-party retrievers [here](https://python.langchain.com/v0.1/docs/integrations/retrievers/).

## Specify Top K

Here's a simple explanation of what's happening:

- **`search_kwargs={"k": 1}`**: This parameter is being set when the retriever object is created from the vector database. Here, `k` stands for the number of results or documents to retrieve that are most similar to the query. By setting `k` to 1, the code specifies that the retriever should return only the single most relevant document or chunk of text in response to a query. 

So, when you use this parameter:
- **Purpose**: It tells the retriever to limit the number of retrieved documents to the top 1 most relevant result.
- **Usage**: This is particularly useful when you are only interested in the very best match for a query and not in other closely related matches.

In the example, when the retriever is invoked with the query about what was said regarding "ketanji brown jackson", it will only return the single most relevant piece of information or document from the database, rather than providing several possible answers.

This approach is useful for applications where clarity and precision are more valuable than breadth, such as when you need a concise answer to a specific question without additional context that might be slightly less relevant.

In [53]:
retriever = vector_db.as_retriever(search_kwargs={"k": 1})

response = retriever.invoke("what did he say about ketanji brown jackson?")

len(response)

1

In [54]:
response

[Document(metadata={'source': 'data/state_of_the_union.txt'}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.')]

# RAG with LCEL

## Simple use with LCEL and input and output formatters

Here’s what each part of the chain does, in simple terms:

1. **Retrieving Relevant Documents**: First, the system retrieves documents that are relevant to the question you ask. It uses a retriever which is designed to find the most relevant information based on the query.

2. **Formatting Documents**: Once the relevant documents are retrieved, the next step is to format them into a readable format. The function `format_docs` does this by taking the content of each document and separating them with two newline spaces. This creates a clear and organized chunk of text that serves as the context for answering the question.

3. **Preparing the Prompt**: With the context formatted, the system uses a template to prepare the prompt for the AI model. The template structures the input by placing the formatted context first and then the question. This way, the AI model knows exactly what the background information is and what it needs to answer.

4. **Generating the Answer**: The structured prompt is then fed into an AI model, in this case, ChatOpenAI. The AI reads the combined context and question and generates an answer based on what it knows from the provided text.

5. **Extracting the Final Answer**: Finally, the response from the AI is parsed into a straightforward text format using `StrOutputParser`, making it easy to read and understand. This step ensures that what you get as the output is just the answer, cleaned up from any formatting or raw data that the AI might output.

In essence, this code automates the process of fetching relevant information, preparing it, and asking an AI to provide a clear answer based on that information. It's like setting up a mini question-answering system where the AI has all the necessary context to provide accurate responses.

In [55]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """Answer the question based only on the following context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI(model="gpt-4o-mini")

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

response = chain.invoke("what did he say about ketanji brown jackson?")

In [56]:
response

"He referred to Ketanji Brown Jackson as one of the nation's top legal minds and stated that she will continue Justice Breyer’s legacy of excellence."