# Initial Framework RAG Model Support

## Pre-requisites

In [1]:
%pip install -q qdrant-client

Note: you may need to restart the kernel to use updated packages.


In [2]:
# load openai api key
import os

from dotenv import load_dotenv
load_dotenv()

if not 'OPENAI_API_KEY' in os.environ:
    raise ValueError('OPENAI_API_KEY is not set')

## Dataset Loader

In [3]:
# load documents
import os
from csv import DictReader
from uuid import uuid4

import pandas as pd


column_map = {"RFP_Question": "question", "RFP_Answer": "ground_truth"}


def load_documents(prefix):
    documents = []
    root_dir = "datasets/rag/"
    for file in os.listdir(root_dir):
        if file.startswith(prefix) and file.endswith(".csv"):
            # use csv dict reader to load the csv file
            with open(os.path.join(root_dir, file)) as f:
                reader = DictReader(f)
                for row in reader:
                    # add a unique id to the row
                    row["id"] = str(uuid4())
                    documents.append(row)

    df = pd.DataFrame(documents)
    df = df[["id", "RFP_Question", "RFP_Answer"]]
    # df.rename(columns=column_map, inplace=True)

    return df

def load_dataset_split(limit=None):
    df = load_documents("rfp_existing_questions")

    if limit:
        df = df.head(limit)

    # split the dataset into a "train" - which gets inserted into the vector store
    # and a "test" - which is used to evaluate the search results
    train_df = df.sample(frac=0.8)
    test_df = df.drop(train_df.index)

    return train_df, test_df

## Embedding Model Selection

First let's setup our embedding model and run some tests to make sure its working well.

In [4]:
from openai import OpenAI

from validmind.models import FunctionModel

client = OpenAI()


def embed(input):
    """Returns a text embedding for the given text"""
    return (
        client.embeddings.create(
            input=input["RFP_Question"],
            model="text-embedding-3-small",
        )
        .data[0]
        .embedding
    )

vm_embedder = FunctionModel(input_id="embedding_model", predict_fn=embed)

Let's create our test dataset so we can run it through our different models.

In [5]:
import validmind as vm

train_df, test_df = load_dataset_split(20)

vm_test_ds = vm.init_dataset(
    test_df,
    text_column="RFP_Question", # some NLP which work with text data require a `text_column` to be specified
    target_column="RFP_Answer",
    __log=False,
)

vm_test_ds.df.head()

2024-05-07 15:11:44,183 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...


Unnamed: 0,id,RFP_Question,RFP_Answer
0,881f050a-874d-4863-a5ce-f802cf4469a4,Please share your experience with developing A...,Our company has 15 years of experience in deve...
10,7ae0aeca-4fe5-495b-b95a-2a24a8f828e0,What measures do you employ to ensure your LLM...,We prioritize transparency and explainability ...
13,1e1b0a2a-1d01-4a7b-af63-53a397c90263,Describe your strategy for integrating LLMs in...,Our approach involves conducting a thorough an...
16,4023a94a-41c6-4390-9adc-babd738aafcb,Can you discuss your governance framework for ...,We have established an AI Risk Council that pl...


In [6]:
vm_test_ds.assign_predictions(vm_embedder)

2024-05-07 15:11:44,194 - INFO(validmind.vm_models.dataset.utils): Running predict_proba()... This may take a while
2024-05-07 15:11:44,195 - INFO(validmind.vm_models.dataset.utils): Not running predict_proba() for unsupported models.
2024-05-07 15:11:44,195 - INFO(validmind.vm_models.dataset.utils): Running predict()... This may take a while
2024-05-07 15:11:44,937 - INFO(validmind.vm_models.dataset.utils): Done running predict()


In [7]:
vm_test_ds.df.head()

Unnamed: 0,id,RFP_Question,RFP_Answer,embedding_model_prediction
0,881f050a-874d-4863-a5ce-f802cf4469a4,Please share your experience with developing A...,Our company has 15 years of experience in deve...,"[0.006856707856059074, -0.04714655876159668, 0..."
10,7ae0aeca-4fe5-495b-b95a-2a24a8f828e0,What measures do you employ to ensure your LLM...,We prioritize transparency and explainability ...,"[0.010077687911689281, 0.02444615587592125, 0...."
13,1e1b0a2a-1d01-4a7b-af63-53a397c90263,Describe your strategy for integrating LLMs in...,Our approach involves conducting a thorough an...,"[0.010061484761536121, 0.022379260510206223, 0..."
16,4023a94a-41c6-4390-9adc-babd738aafcb,Can you discuss your governance framework for ...,We have established an AI Risk Council that pl...,"[0.014880189672112465, 0.03505474328994751, 0...."


Let's go ahead and run one of the ValidMind embeddings stability analysis tests to make sure our embeddings model is working properly.

In [8]:
from validmind.tests import run_test

result = run_test(
    "validmind.model_validation.embeddings.StabilityAnalysisRandomNoise",
    inputs={"model": vm_embedder, "dataset": vm_test_ds},
    params={"probability": 0.3},
)

VBox(children=(HTML(value='\n            <h1>Stability Analysis Random Noise ✅</h1>\n            <p>Evaluate r…

## Setup Vector Store

#### Insert embeddings and questions into Vector DB

> Note: We use the name `train_df` to refer to the dataset that is loaded into the vector store and used as context. This is not a great name but its consistent with data science terminology.

In [9]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, PointStruct, VectorParams

qdrant = QdrantClient(":memory:")
qdrant.recreate_collection(
    "rfp_rag_collection",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)
qdrant.upsert(
    "rfp_rag_collection",
    points=[
        PointStruct(
            id=row["id"],
            vector=embed(row),
            payload={"RFP_Question": row["RFP_Question"], "RFP_Answer": row["RFP_Answer"]},
        )
        for _, row in train_df.iterrows()
    ],
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

## Setup Retrieval Model

In [10]:
def retrieve(input):
    contexts = []

    for result in qdrant.search(
        "rfp_rag_collection",
        # notice the key we are using to get the embedding model's output
        query_vector=input["embedding_model"],
        limit=input.get(
            "limit", 10
        ),  # we could add a row to the dataset to specify a limit
    ):
        context = f"Q: {result.payload['RFP_Question']}\n"
        context += f"A: {result.payload['RFP_Answer']}\n"

        contexts.append(context)

    return contexts


vm_retriever = FunctionModel(input_id="retrieval_model", predict_fn=retrieve)

Now if we try to run `predict()` on this model directly or compute predictions for our test dataset using `assign_predictions()`, we will run into an error. This is because the retriever function expects that the input contains an embedding which has to be computed by the embedding model. So what we can do is create a PipelineModel that contains the embedding model and the retrieval model and run predictions on that. This is because the intermediate output of each "component" model in a PipelineModel is passed as part of the input to the next model (the retrieval model). The key for the model's output is the `input_id` you specify when creating the model.

To demonstate this, let's create a pipeline model that contains the embedding model and the retrieval model.

Here is how the pipeline model prediction will work:

embed_retrieve_pipeline.predict():
```
{"question": "What is the capital of France?"}
|
embedder.predict() -> [0.1, 0.2, 0.3, ...]
|
{"question": "What is the capital of France?", "embedding_model": [0.1, 0.2, 0.3, ...]}
|
retriever.predict() -> ["Capital of France is Paris.", "Paris is the capital of France."]
|
["Capital of France is Paris.", "Paris is the capital of France."]
```

In [11]:
from validmind.models import PipelineModel

embed_retrieve_pipeline = PipelineModel(vm_embedder | vm_retriever, input_id="embed_retrieve_pipeline")

In [12]:
embed_retrieve_pipeline.predict({"RFP_Question": "What is your experience with AI?"})

['Q: How do you maintain your AI applications with the newest AI technologies and advancements?\nA: We maintain a dedicated R&D team focused on integrating the latest AI advancements into our applications. This includes regular updates and feature enhancements based on cutting-edge technologies such as GPT (Generative Pre-trained Transformer) for natural language understanding, CNNs (Convolutional Neural Networks) for advanced image recognition tasks, and DQN (Deep Q-Networks) for decision-making processes in complex environments. Our commitment to these AI methodologies ensures that our applications remain innovative, with capabilities that adapt to evolving market demands and client needs. This approach has enabled us to enhance the predictive accuracy of our financial forecasting tools by 25% and improve the efficiency of our educational content personalization by 40%\n',
 'Q: What considerations do you take into account for user interface and user experience design in your AI appli

## Setup Generation Model

In [13]:
system_prompt = """
You are an expert RFP AI assistant.
You are tasked with answering new RFP questions based on existing RFP questions and answers.
You will be provided with the existing RFP questions and answer pairs that are the most relevant to the new RFP question.
After that you will be provided with a new RFP question.
You will generate an answer and respond only with the answer.
Ignore your pre-existing knowledge and answer the question based on the provided context.
""".strip()


def generate(input):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": "\n\n".join(input["retrieval_model"])},
            {"role": "user", "content": input["RFP_Question"]},
        ],
    )

    return response.choices[0].message.content

vm_generator = FunctionModel(input_id="generation_model", predict_fn=generate)

## Setup RAG Model (Pipeline of "Component" Models)

Now that we have our individual models setup, let's create a `RAGModel` instance that will chain them together and give us a single model that can be evalated end-to-end.

In [14]:
from validmind.models import PipelineModel

vm_rag_model = PipelineModel(vm_embedder | vm_retriever | vm_generator, input_id="rag_model")

Let's run the test dataset through the entire pipeline. It will overwrite the current predictions that we generated from the individual models, but the key here is that calling `predict` on the `RAGModel` will run the entire pipeline and store the intermediate predictions in the dataframe.

In [15]:
vm_rag_model.predict({"RFP_Question": "What is your experience with AI?"})

'We maintain a dedicated R&D team focused on integrating the latest AI advancements into our applications. Our commitment to utilizing cutting-edge technologies such as GPT (Generative Pre-trained Transformer) for natural language understanding, CNNs (Convolutional Neural Networks) for advanced image recognition tasks, and DQN (Deep Q-Networks) for decision-making processes sets us apart in the AI landscape. Our track record includes enhancing the predictive accuracy of financial forecasting tools by 25% and improving the efficiency of educational content personalization by 40%, showcasing our expertise and success in implementing AI solutions.'

In [16]:
vm_test_ds.assign_predictions(vm_rag_model)

2024-05-07 15:11:52,715 - INFO(validmind.vm_models.dataset.utils): Running predict_proba()... This may take a while
2024-05-07 15:11:52,716 - INFO(validmind.vm_models.dataset.utils): Not running predict_proba() for unsupported models.
2024-05-07 15:11:52,716 - INFO(validmind.vm_models.dataset.utils): Running predict()... This may take a while
2024-05-07 15:12:16,003 - INFO(validmind.vm_models.dataset.utils): Done running predict()


In [17]:
vm_test_ds.df.head()

Unnamed: 0,id,RFP_Question,RFP_Answer,embedding_model_prediction,rag_model_prediction
0,881f050a-874d-4863-a5ce-f802cf4469a4,Please share your experience with developing A...,Our company has 15 years of experience in deve...,"[0.006856707856059074, -0.04714655876159668, 0...",We have a strong track record in developing AI...
10,7ae0aeca-4fe5-495b-b95a-2a24a8f828e0,What measures do you employ to ensure your LLM...,We prioritize transparency and explainability ...,"[0.010077687911689281, 0.02444615587592125, 0....",We prioritize transparency by incorporating ex...
13,1e1b0a2a-1d01-4a7b-af63-53a397c90263,Describe your strategy for integrating LLMs in...,Our approach involves conducting a thorough an...,"[0.010061484761536121, 0.022379260510206223, 0...",Our strategy for integrating Large Language Mo...
16,4023a94a-41c6-4390-9adc-babd738aafcb,Can you discuss your governance framework for ...,We have established an AI Risk Council that pl...,"[0.014880189672112465, 0.03505474328994751, 0....",Our governance framework for managing AI risks...


## Experiment with some RAGAS Metrics

Below I am just experimenting to see how the RAGAS metrics can work with the `RAGModel` instance. This is not a full implementation of the RAGAS metrics but just a poc. We'll want to make this work in a more general way so that the columns can be properly mapped from the user-provided `predict_col` or the default `predict_col` to the column names that RAGAS expects i.e. `question`, `contexts`, `answer`, `ground_truth`.

In [18]:
vm_ragas_ds = vm.init_dataset(result_df, __log=False)

NameError: name 'result_df' is not defined

In [None]:
import plotly.express as px

def plot_distribution(scores):
    # plot distribution of scores (0-1) from ragas metric
    # scores is a list of floats
    fig = px.histogram(x=scores, nbins=10)
    fig.show()

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
result = run_test(
    "validmind.model_validation.ragas.AnswerSimilarity",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextEntityRecall",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextPrecision",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextRelevancy",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)