# RAG for RFP Answer Generation using LangChain

## Notebook setup

In [None]:
import pandas as pd

In [None]:
%pip install -qU langchain langchain-openai langchain-cohere

In [None]:
import os

import dotenv

dotenv.load_dotenv()

if os.getenv("OPENAI_API_KEY") is None:
    raise Exception("OPENAI_API_KEY not found")

In [None]:
import textwrap
from IPython.display import HTML, display
from tabulate import tabulate


def _format_cell_text(text, width=50):
    """Private function to format a cell's text."""
    return "\n".join([textwrap.fill(line, width=width) for line in text.split("\n")])


def _format_dataframe_for_tabulate(df):
    """Private function to format the entire DataFrame for tabulation."""
    df_out = df.copy()

    # Format all string columns
    for column in df_out.columns:
        # Check if column is of type object (likely strings)
        if df_out[column].dtype == object:
            df_out[column] = df_out[column].apply(_format_cell_text)
    return df_out


def _dataframe_to_html_table(df):
    """Private function to convert a DataFrame to an HTML table."""
    headers = df.columns.tolist()
    table_data = df.values.tolist()
    return tabulate(table_data, headers=headers, tablefmt="html")


def display_nice(df, num_rows=None):
    """Primary function to format and display a DataFrame."""
    if num_rows is not None:
        df = df.head(num_rows)
    formatted_df = _format_dataframe_for_tabulate(df)
    html_table = _dataframe_to_html_table(formatted_df)
    display(HTML(html_table))

In [None]:
def print_dict_keys(data, indent=0):
    for key, value in data.items():
        print(" " * indent + str(key))
        if isinstance(value, dict):  # if the value is another dictionary, recurse
            print_dict_keys(value, indent + 4)

## Data preparation

### Load Existing RFPs

In [None]:
# List of CSV file paths
existing_rfp_paths = [
    "datasets/rag/rfp_existing_questions_client_1.csv",
    "datasets/rag/rfp_existing_questions_client_2.csv",
    "datasets/rag/rfp_existing_questions_client_3.csv",
    "datasets/rag/rfp_existing_questions_client_4.csv",
    "datasets/rag/rfp_existing_questions_client_5.csv",
]

existing_rfp_df = [pd.read_csv(file_path) for file_path in existing_rfp_paths]

# Concatenate all DataFrames into one
existing_rfp_df = pd.concat(existing_rfp_df, ignore_index=True)

In [None]:
existing_rfp_df

## Convert Questions and Answers to Embeddings 

In [None]:
# Add unique identifier to each row in the rfp df
existing_rfp_df["unique_id"] = existing_rfp_df.index.astype(str)
existing_rfp_df.head()

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")


# Embed a single text item and return the embedding
def get_embedding(text):
    return embeddings_model.embed_query(text)

In [None]:
run = True
if run:
    # Apply the function to each question and answer and create new columns
    existing_rfp_df["Question_Embeddings_LC"] = existing_rfp_df["RFP_Question"].apply(
        get_embedding
    )
    existing_rfp_df["Answer_Embeddings_LC"] = existing_rfp_df["RFP_Answer"].apply(
        get_embedding
    )

In [None]:
existing_rfp_df.head()

## Store RFP Questions and Answers in the Vectorstore

In [None]:
import chromadb
from langchain.vectorstores.chroma import Chroma

persistent_client = chromadb.PersistentClient()
collection = persistent_client.get_or_create_collection(
    name="rfp_qa_collection",
)

In [None]:
# Initialize lists to store data for batch addition
all_embeddings = []
all_metadatas = []
all_documents = []
all_ids = []

# Loop through the DataFrame rows
for index, row in existing_rfp_df.iterrows():
    # Append each piece of data to its respective list
    all_embeddings.append(row["Question_Embeddings_LC"])
    all_metadatas.append(
        {
            "Model_Title": row["Model_Title"],
            "RFP_Question_ID": row["RFP_Question_ID"],
            "RFP_Question": row["RFP_Question"],
            "RFP_Answer": row["RFP_Answer"],
            "Area": row["Area"],
            "Last_Accessed_At": row["Last_Accessed_At"],
            "Requester": row["Requester"],
            "Status": row["Status"],
            "hnsw:space": "cosine",
        }
    )
    all_documents.append(row["RFP_Question"])
    all_ids.append(row["unique_id"])

# Add all data to the collection in a single operation
collection.add(
    ids=all_ids,
    documents=all_documents,
    embeddings=all_embeddings,
    metadatas=all_metadatas,
)

In [None]:
langchain_chroma = Chroma(
    client=persistent_client,
    collection_name="rfp_qa_collection",
)

print("There are", langchain_chroma._collection.count(), "documents in the collection")

## Retrieval

We test the retrieval step by inputting a new RFP question and checking the top k most similar questions, including similarity scores for each retrieved question

In [None]:
query = existing_rfp_df["RFP_Question"][0]
documents = langchain_chroma.similarity_search_by_vector_with_relevance_scores(
    get_embedding(query), k=10
)

In [None]:
number_of_documents = 10

print(f"New RFP Question:\n{query}")
print()
print(f"Top {number_of_documents} most similar existing RFP questions:")
print()

context = ""

for i, document in enumerate(documents[:number_of_documents]):
    page_content = document[
        0
    ].page_content  # This is where the content of the page is stored.
    metadata = document[
        0
    ].metadata  # This is where the metadata of the document is stored.
    score = document[1]  # This is the score at the end of the tuple.

    # Extracting the metadata
    rfp = metadata["Model_Title"]
    question = metadata["RFP_Question"]
    answer = metadata["RFP_Answer"]
    metric = metadata["hnsw:space"]

    context += f"Question: {question}\nAnswer: {answer}\n"

    # Print formatted output
    print(f"Document {i + 1}")
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print(f"Score: {1-score} ({metric})\n")

## Augmentation

In [None]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based only on the following context.
If you cannot answer the question with the context, please respond with 'I don't know':

### CONTEXT
{context}

### QUESTION
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

## Generation

In [None]:
from langchain_openai import ChatOpenAI

gpt4 = "gpt-4o"
gpt3 = "gpt-3.5-turbo-16k"
llm = ChatOpenAI(model=gpt4)

In [None]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": RunnablePassthrough(), "question": RunnablePassthrough()} | prompt | llm
)

In [None]:
question = existing_rfp_df["RFP_Question"][0]

# Generate an answer using the RAG chain
response = rag_chain.invoke({"question": question, "context": context})

In [None]:
print(question)

In [None]:
print(context)

In [None]:
print(response.content)

In [None]:
response

We now inspect the `response_metadata` object to understand its contents and identify what could be useful to incorporate in our RAG evaluation dataset:

In [None]:
print(response.response_metadata)

In [None]:
print_dict_keys(response.response_metadata)

Extracting the LLM used:

In [None]:
print(f"Model: {response.response_metadata['model_name']}")

As we showed earlier, we can also extract some token usage statistics that can help us understand and optimize our interactions with the language model for cost-effectiveness and efficiency.

- **Prompt tokens**: tokens that form the input text sent to the language model. This includes all the text provided to the LLM to generate a response.
- **Completion tokens**: number of tokens in the generated text or output from the model.
- **Total tokens**: total number of tokens processed by the model. It is the sum of both `prompt_tokens` and `completion_tokens`. 

In [None]:
print(
    f"Completion tokens: {response.response_metadata['token_usage']['completion_tokens']}"
)
print(f"Prompt tokens: {response.response_metadata['token_usage']['prompt_tokens']}")
print(f"Total tokens: {response.response_metadata['token_usage']['total_tokens']}")

## Evaluation using RAGAS

Load the dataset prepared for RAGAS evaluation:

In [None]:
# Load the new RFP questions
rag_evaluation_df = pd.read_csv("datasets/rag/rag_evaluation_dataset_03.csv")

# Set the constant variable to the number of rows in the DataFrame
NUM_OF_NEW_RFP_QUESTIONS = len(rag_evaluation_df)

print("Number of New RFP Questions:", NUM_OF_NEW_RFP_QUESTIONS)

rag_evaluation_df.head()

After testing our retrieved context, we will now create a simple helper to format the context so it can be directly passed to the RAG chain. This will be useful for creating our RAGAS evaluation dataset.

In [None]:
def get_context(question_embeddings, number_of_documents=10):
    # Placeholder for the real similarity search function.
    documents = langchain_chroma.similarity_search_by_vector_with_relevance_scores(
        question_embeddings, k=number_of_documents
    )

    context = ""
    for i, document in enumerate(documents):
        metadata = document[0].metadata

        # Extract and compile context from metadata.
        rfp_question = metadata.get("RFP_Question", "No question found")
        rfp_answer = metadata.get("RFP_Answer", "No answer provided")

        context += f"Question: {rfp_question}\nAnswer: {rfp_answer}\n"

    return context

Generate responses for each question in every row, using the context extracted from the vector store based on question similarity.

In [None]:
import time
import numpy as np


# Number of questions to process by the RAG model
number_of_rows_to_process = NUM_OF_NEW_RFP_QUESTIONS

rag_evaluation_df["question_embeddings"] = ""

for i, (index, row) in enumerate(
    rag_evaluation_df.iloc[:number_of_rows_to_process].iterrows()
):
    print(f"Processing row {i}...")

    # Check if the 'answer' field is 'None' (as a string) for the current row
    if row["answer"] == "None":
        print(f"Answer is 'None' for question ID {index}. Invoking RAG model...")

        print(f"Computing embeddings for question ID {index}...")
        question_embeddings = get_embedding(row["question"])

        rag_evaluation_df.at[index, "question_embeddings"] = question_embeddings

        print(f"Extracting context for question ID {index}...")
        context = get_context(question_embeddings)

        # Ensure that context is a list
        rag_evaluation_df.at[index, "contexts"] = [context]

        start_time = time.time()  # Start timing

        # Invoke the RAG model with the question from the current row
        response = rag_chain.invoke({"question": row["question"], "context": context})

        end_time = time.time()  # End timing

        # Calculate the response time and store it
        rag_evaluation_df.at[index, "response_time"] = round(end_time - start_time, 1)

        # Store whatever response comes from the LLM
        rag_evaluation_df.at[index, "answer"] = response.content
        print(
            f"Question ID {index} answer updated with the response from the RAG model."
        )

        # Store some metadata such as model name and tokens statistics
        rag_evaluation_df.at[index, "model"] = response.response_metadata["model_name"]
        rag_evaluation_df.at[index, "completion_tokens"] = response.response_metadata[
            "token_usage"
        ]["completion_tokens"]
        rag_evaluation_df.at[index, "prompt_tokens"] = response.response_metadata[
            "token_usage"
        ]["prompt_tokens"]
        rag_evaluation_df.at[index, "total_tokens"] = response.response_metadata[
            "token_usage"
        ]["total_tokens"]

print("Processing complete.")

In [None]:
rag_evaluation_df

In [None]:
# Convert the 'contexts' column from a string to a list of strings for each row
# Assume all other data processing has been completed.
# Now adjust the 'contexts' column to be a list of strings.

rag_evaluation_df["contexts"] = rag_evaluation_df["contexts"].apply(
    lambda x: [x] if not isinstance(x, list) else x
)

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    context_relevancy,
    answer_correctness,
    answer_similarity,
)

from ragas import evaluate


def evaluate_ragas_dataset(ragas_dataset):
    result = evaluate(
        ragas_dataset,
        metrics=[
            context_precision,
            faithfulness,
            answer_relevancy,
            context_recall,
            context_relevancy,
            answer_correctness,
            answer_similarity,
        ],
    )
    return result

In [None]:
from datasets import Dataset

required_fields = ["question", "answer", "contexts", "ground_truth"]
metrics = [
    "context_precision",
    "faithfulness",
    "answer_relevancy",
    "context_recall",
    "context_relevancy",
    "answer_correctness",
    "answer_similarity",
]

# Set the variable to the number of rows, limited to a maximum of NUM_OF_NEW_RFP_QUESTIONS
number_of_rows_to_process = min(len(rag_evaluation_df), NUM_OF_NEW_RFP_QUESTIONS)

# Mapping of metric names to their respective functions, assuming these functions are predefined
metrics_functions = {
    "context_precision": context_precision,
    "faithfulness": faithfulness,
    "answer_relevancy": answer_relevancy,
    "context_recall": context_recall,
    "context_relevancy": context_relevancy,
    "answer_correctness": answer_correctness,
    "answer_similarity": answer_similarity,
}

In [None]:
# This loop processes each row up to a predefined number of rows, evaluating them with specified metrics and storing the results
for i, (index, row) in enumerate(
    rag_evaluation_df.iloc[:number_of_rows_to_process].iterrows()
):
    print(f"Processing RFP question {i+1}...")
    print(f"Question: {rag_evaluation_df.iloc[i]['question']}")
    print(f"Answer: {rag_evaluation_df.iloc[i]['answer']}")

    # Create a temporary Dataset for the current row
    ragas_dataset = Dataset.from_pandas(
        rag_evaluation_df.iloc[i : i + 1][required_fields]
    )

    # Evaluate using RAGAS metrics
    evaluation_result = evaluate(
        ragas_dataset,
        [
            metrics_functions[metric]
            for metric in metrics
            if metric in metrics_functions
        ],
    )
    print("Evaluation completed.")

    # Store evaluation results back into the DataFrame
    for metric in metrics:
        if metric in evaluation_result:
            rag_evaluation_df.at[i, metric] = evaluation_result[metric]
            print(f"{metric}: {evaluation_result[metric]}")

print("All RFP questions processed.")

In [None]:
rag_evaluation_df