# Initial Framework RAG Model Support

## Pre-requisites

In [1]:
%pip install -q qdrant-client

Note: you may need to restart the kernel to use updated packages.


In [2]:
# load openai api key
import os

from dotenv import load_dotenv
load_dotenv()

if not 'OPENAI_API_KEY' in os.environ:
    raise ValueError('OPENAI_API_KEY is not set')

## Dataset Loader

In [3]:
# load documents
import os
from csv import DictReader
from uuid import uuid4

import pandas as pd


column_map = {"RFP_Question": "question", "RFP_Answer": "ground_truth"}


def load_documents(prefix):
    documents = []
    root_dir = "datasets/rag/"
    for file in os.listdir(root_dir):
        if file.startswith(prefix) and file.endswith(".csv"):
            # use csv dict reader to load the csv file
            with open(os.path.join(root_dir, file)) as f:
                reader = DictReader(f)
                for row in reader:
                    # add a unique id to the row
                    row["id"] = str(uuid4())
                    documents.append(row)

    df = pd.DataFrame(documents)
    df = df[["id", "RFP_Question", "RFP_Answer"]]
    df.rename(columns=column_map, inplace=True)

    return df

def load_dataset_split():
    df = load_documents("rfp_existing_questions")

    # split the dataset into a "train" - which gets inserted into the vector store
    # and a "test" - which is used to evaluate the search results
    train_df = df.sample(frac=0.8)
    test_df = df.drop(train_df.index)

    return train_df, test_df

## Embedding Model Selection

First let's setup our embedding model and run some tests to make sure its working well.

In [4]:
from openai import OpenAI

from validmind.models import EmbeddingModel

client = OpenAI()


def embed(question):
    """Returns a text embedding for the given text"""
    return (
        client.embeddings.create(
            input=question,
            model="text-embedding-3-small",
        )
        .data[0]
        .embedding
    )


vm_embedder = EmbeddingModel(input_id="embedding_model", predict_fn=embed)

In [5]:
import validmind as vm

train_df, test_df = load_dataset_split()

vm_test_ds = vm.init_dataset(test_df, text_column="question", __log=False)

test_df.head()

2024-05-01 00:08:00,513 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...


Unnamed: 0,id,question,ground_truth
0,e1d0102e-241c-435a-b126-fcbffd84f357,Please share your experience with developing A...,Our company has 15 years of experience in deve...
1,26034283-0644-48b8-b788-3694f716c934,How do you maintain your AI applications with ...,We maintain a dedicated R&D team focused on in...
6,71601973-e95d-479e-908f-634706f5fb6f,How do you evaluate the success of your AI app...,Success measurement is tailored to each projec...
9,2bd586ef-b46b-469d-91f6-395381253f1d,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...
15,f77af7fa-5bf3-4ec7-87f5-d9fc8280e78f,How does your AI strategy align with the NIST ...,Our AI solution is meticulously designed to al...


In [6]:
test_df[vm_embedder.output_column] = vm_embedder.predict(test_df)
test_df.head()

Unnamed: 0,id,question,ground_truth,embedding
0,e1d0102e-241c-435a-b126-fcbffd84f357,Please share your experience with developing A...,Our company has 15 years of experience in deve...,"[0.006856707856059074, -0.04714655876159668, 0..."
1,26034283-0644-48b8-b788-3694f716c934,How do you maintain your AI applications with ...,We maintain a dedicated R&D team focused on in...,"[0.011783392168581486, 0.010354681871831417, 0..."
6,71601973-e95d-479e-908f-634706f5fb6f,How do you evaluate the success of your AI app...,Success measurement is tailored to each projec...,"[0.014263585209846497, 0.022252684459090233, 0..."
9,2bd586ef-b46b-469d-91f6-395381253f1d,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...,"[-0.010829819366335869, 0.029368551447987556, ..."
15,f77af7fa-5bf3-4ec7-87f5-d9fc8280e78f,How does your AI strategy align with the NIST ...,Our AI solution is meticulously designed to al...,"[-0.0057174162939190865, 0.022510522976517677,..."


In [7]:
from validmind.tests import run_test

result = run_test(
    "validmind.model_validation.embeddings.StabilityAnalysisRandomNoise",
    inputs={"model": vm_embedder, "dataset": vm_test_ds},
    params={"probability": 0.3},
)

VBox(children=(HTML(value='\n            <h1>Stability Analysis Random Noise ✅</h1>\n            <p>Evaluate r…

## Setup Vector Store

#### Generate embeddings for the questions

In [8]:
train_df[vm_embedder.output_column] = vm_embedder.predict(train_df)
train_df.head()

Unnamed: 0,id,question,ground_truth,embedding
63,d09906ca-5d8b-49f3-a6df-5ba1109df76d,How do you identify and assess AI risks in lin...,We conduct thorough assessments of AI systems ...,"[-0.018808960914611816, 0.042478520423173904, ..."
19,4d77c706-46b6-48c8-bf29-eb82b529d68e,How do you monitor and assess AI risk exposure...,We have developed a set of Key Performance Ind...,"[0.005055764224380255, 0.03331145644187927, 0...."
43,ed28ec11-d857-45a6-be3e-a5f5fcda5eb4,Explain how you manage and mitigate identified...,We implement and maintain robust risk manageme...,"[0.007622845005244017, 0.0593453086912632, 0.0..."
52,8cd9475f-b2e4-4446-b7e4-1d873e0b67e8,How do you measure the success and impact of y...,Success measurement is tailored to each projec...,"[0.008592551574110985, 0.01133714523166418, 0...."
82,e8dc7566-f59a-4367-b4bf-9d4a3c2fa86b,What is your strategy for integrating LLMs smo...,Our approach involves conducting a thorough an...,"[0.010894115082919598, 0.01403286773711443, 0...."


#### Insert embeddings and questions into Vector DB

In [9]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, PointStruct, VectorParams

qdrant = QdrantClient(":memory:")
qdrant.recreate_collection(
    "rfp_rag_collection",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)
qdrant.upsert(
    "rfp_rag_collection",
    points=[
        PointStruct(
            id=row["id"],
            vector=row[vm_embedder.output_column],
            payload={"question": row["question"], "ground_truth": row["ground_truth"]},
        )
        for _, row in train_df.iterrows()
    ],
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

## Setup Retrieval Model

In [10]:
from validmind.models import RetrievalModel

def retrieve(embedding):
    contexts = []

    for result in qdrant.search(
        "rfp_rag_collection",
        query_vector=embedding,
        limit=10,
    ):
        context = f"Q: {result.payload['question']}\n"
        context += f"A: {result.payload['ground_truth']}\n"

        contexts.append(context)

    return contexts

vm_retriever = RetrievalModel(input_id="retrieval_model", predict_fn=retrieve)

In [11]:
test_df[vm_retriever.output_column] = vm_retriever.predict(test_df)
test_df.head()

Unnamed: 0,id,question,ground_truth,embedding,contexts
0,e1d0102e-241c-435a-b126-fcbffd84f357,Please share your experience with developing A...,Our company has 15 years of experience in deve...,"[0.006856707856059074, -0.04714655876159668, 0...",[Q: What is your experience in developing AI-b...
1,26034283-0644-48b8-b788-3694f716c934,How do you maintain your AI applications with ...,We maintain a dedicated R&D team focused on in...,"[0.011783392168581486, 0.010354681871831417, 0...",[Q: How do you keep your AI applications curre...
6,71601973-e95d-479e-908f-634706f5fb6f,How do you evaluate the success of your AI app...,Success measurement is tailored to each projec...,"[0.014263585209846497, 0.022252684459090233, 0...",[Q: How do you assess the effectiveness and su...
9,2bd586ef-b46b-469d-91f6-395381253f1d,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...,"[-0.010829819366335869, 0.029368551447987556, ...",[Q: How do you ensure your LLMs continuously l...
15,f77af7fa-5bf3-4ec7-87f5-d9fc8280e78f,How does your AI strategy align with the NIST ...,Our AI solution is meticulously designed to al...,"[-0.0057174162939190865, 0.022510522976517677,...",[Q: How does your AI solution comply with the ...


## Setup Generation Model

In [12]:
from validmind.models import GenerationModel

system_prompt = """
You are an expert RFP AI assistant.
You are tasked with answering new RFP questions based on existing RFP questions and answers.
You will be provided with the existing RFP questions and answer pairs that are the most relevant to the new RFP question.
After that you will be provided with a new RFP question.
You will generate an answer and respond only with the answer.
Ignore your pre-existing knowledge and answer the question based on the provided context.
""".strip()


def generate(question, contexts):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": "\n\n".join(contexts)},
            {"role": "user", "content": question},
        ],
    )

    return response.choices[0].message.content

vm_generator = GenerationModel(input_id="generation_model", predict_fn=generate)

In [13]:
test_df[vm_generator.output_column] = vm_generator.predict(test_df)
test_df.head()

Unnamed: 0,id,question,ground_truth,embedding,contexts,answer
0,e1d0102e-241c-435a-b126-fcbffd84f357,Please share your experience with developing A...,Our company has 15 years of experience in deve...,"[0.006856707856059074, -0.04714655876159668, 0...",[Q: What is your experience in developing AI-b...,Our company has 15 years of experience develop...
1,26034283-0644-48b8-b788-3694f716c934,How do you maintain your AI applications with ...,We maintain a dedicated R&D team focused on in...,"[0.011783392168581486, 0.010354681871831417, 0...",[Q: How do you keep your AI applications curre...,We maintain our AI applications by having a de...
6,71601973-e95d-479e-908f-634706f5fb6f,How do you evaluate the success of your AI app...,Success measurement is tailored to each projec...,"[0.014263585209846497, 0.022252684459090233, 0...",[Q: How do you assess the effectiveness and su...,Success measurement is tailored to each projec...
9,2bd586ef-b46b-469d-91f6-395381253f1d,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...,"[-0.010829819366335869, 0.029368551447987556, ...",[Q: How do you ensure your LLMs continuously l...,We implement advanced continuous learning mech...
15,f77af7fa-5bf3-4ec7-87f5-d9fc8280e78f,How does your AI strategy align with the NIST ...,Our AI solution is meticulously designed to al...,"[-0.0057174162939190865, 0.022510522976517677,...",[Q: How does your AI solution comply with the ...,Our AI strategy is meticulously crafted to ali...


## Setup RAG Model (Pipeline of "Component" Models)

In [14]:
from validmind.models import RAGModel

vm_rag_model = RAGModel(
    embedder=vm_embedder,
    retriever=vm_retriever,
    generator=vm_generator,
    input_id="rag_pipeline",
)

In [15]:
result_df = vm_rag_model.predict(test_df)
result_df.head()

Unnamed: 0,id,question,ground_truth,embedding,contexts,answer
0,e1d0102e-241c-435a-b126-fcbffd84f357,Please share your experience with developing A...,Our company has 15 years of experience in deve...,"[0.006856707856059074, -0.04714655876159668, 0...",[Q: What is your experience in developing AI-b...,Our company has 15 years of experience in deve...
1,26034283-0644-48b8-b788-3694f716c934,How do you maintain your AI applications with ...,We maintain a dedicated R&D team focused on in...,"[0.011901024729013443, 0.010153321549296379, 0...",[Q: How do you keep your AI applications curre...,We maintain our AI applications with the lates...
6,71601973-e95d-479e-908f-634706f5fb6f,How do you evaluate the success of your AI app...,Success measurement is tailored to each projec...,"[0.014263585209846497, 0.022252684459090233, 0...",[Q: How do you assess the effectiveness and su...,Success measurement is tailored to each projec...
9,2bd586ef-b46b-469d-91f6-395381253f1d,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...,"[-0.010829819366335869, 0.029368551447987556, ...",[Q: How do you ensure your LLMs continuously l...,We implement advanced continuous learning mech...
15,f77af7fa-5bf3-4ec7-87f5-d9fc8280e78f,How does your AI strategy align with the NIST ...,Our AI solution is meticulously designed to al...,"[-0.0057174162939190865, 0.022510522976517677,...",[Q: How does your AI solution comply with the ...,Our AI strategy is crafted to closely align wi...


In [16]:
vm_ragas_ds = vm.init_dataset(result_df, __log=False)

2024-05-01 00:10:33,192 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...


In [17]:
import plotly.express as px

def plot_distribution(scores):
    # plot distribution of scores (0-1) from ragas metric
    # scores is a list of floats
    fig = px.histogram(x=scores, nbins=10)
    fig.show()

In [18]:
import warnings

warnings.filterwarnings("ignore")

In [19]:
result = run_test(
    "validmind.model_validation.ragas.AnswerSimilarity",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextEntityRecall",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextPrecision",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextRelevancy",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)