# RAG Model using Langchain library

### Installation of libraries

In [4]:
%pip install -q qdrant-client


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Read openai key

In [8]:
# load openai api key
import os
from dotenv import load_dotenv
load_dotenv()
import validmind as vm
import pandas as pd

if not 'OPENAI_API_KEY' in os.environ:
    raise ValueError('OPENAI_API_KEY is not set')
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

## Dataset Loader

In [9]:
# Import the sample dataset from the library

from validmind.datasets.llm.rag import rfp
raw_df = rfp.load_data()
train_df, test_df = rfp.preprocess(raw_df)

In [10]:
vm_train_ds = vm.init_dataset(
    train_df,
    text_column="question",
    target_column = "ground_truth",
    __log=False
)

vm_test_ds = vm.init_dataset(
    test_df,
    text_column="question",
    target_column = "ground_truth",
    __log=False
)

test_df.head()

2024-05-03 14:45:33,719 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...
2024-05-03 14:45:33,720 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...


Unnamed: 0,Project_Title,question,ground_truth,Area,Requester,Status,id
91,Implementation of AI Chatbots for Enhanced Cus...,How do you contribute to the ongoing improveme...,We actively participate in industry working gr...,AI Regulation,Bank C,Awarded,d17dc2d4-3d51-476e-b618-b9844f9ae5d7
1,Generative AI Solutions for Fraud Detection an...,How do you maintain your AI applications with ...,We maintain a dedicated R&D team focused on in...,General,Bank E,Under Review,2367fe74-b9a4-461a-a8d7-b87a370cdd78
75,Implementation of AI Chatbots for Enhanced Cus...,How do you assess the effectiveness and succes...,Success measurement is tailored to each projec...,General,Bank C,Awarded,3fa160f3-e827-4cae-b4f8-dc98aeb945b9
8,Generative AI Solutions for Fraud Detection an...,Please outline the training process for your L...,Our LLM training process begins with the metic...,Large Language Models,Bank E,Under Review,27a173e7-b16b-4df7-a3da-ddf3695dbf7e
54,Gen AI-Driven Financial Advisory System,Can you describe the process of training your ...,Our LLM training process begins with the metic...,Large Language Models,Bank A,Under Review,742b66b8-9e2d-4c09-a601-6bb225cb8212


## Embedding Model Selection

First let's setup our embedding model and run some tests to make sure its working well.

In [11]:
from langchain_openai import OpenAIEmbeddings

from validmind.models import EmbeddingModel

embedding_client = OpenAIEmbeddings(model="text-embedding-3-small")

def embed(question):
    """Returns a text embedding for the given text"""
    return embedding_client.embed_query(question)

vm_embedder = EmbeddingModel(input_id="embedding_model", predict_fn=embed)

In [12]:
vm_test_ds.assign_predictions(vm_embedder)
print(vm_test_ds)

2024-05-03 14:45:38,048 - INFO(validmind.vm_models.dataset): Running predict()... This may take a while


VMDataset object: 
Input ID: dataset
Target Column: ground_truth
Feature Columns: ['Project_Title', 'question', 'Area', 'Requester', 'Status', 'id']
Text Column: question
Extra Columns: {'prediction_columns': {'embedding_model': 'embedding_model_prediction'}, 'probability_columns': {}, 'group_by_column': None}
Type: generic
Target Class Labels: None
Columns: ['Project_Title', 'question', 'ground_truth', 'Area', 'Requester', 'Status', 'id', 'embedding_model_prediction']
Index Name: None
Index: [ 91   1  75   8  54   4  58  19  17  94  36  42  78 110  49 114  92  44
  56  86  93 108 106]



### Run tests

In [13]:
from validmind.tests import run_test

result = run_test(
    "validmind.model_validation.embeddings.StabilityAnalysisRandomNoise",
    inputs={"model": vm_embedder, "dataset": vm_test_ds},
    params={"probability": 0.3},
)

VBox(children=(HTML(value='\n            <h1>Stability Analysis Random Noise ✅</h1>\n            <p>Evaluate r…

## Setup Vector Store

#### Generate embeddings for the questions

In [14]:
vm_train_ds.assign_predictions(vm_embedder)
print(vm_train_ds)

2024-05-03 14:46:29,024 - INFO(validmind.vm_models.dataset): Running predict()... This may take a while


VMDataset object: 
Input ID: dataset
Target Column: ground_truth
Feature Columns: ['Project_Title', 'question', 'Area', 'Requester', 'Status', 'id']
Text Column: question
Extra Columns: {'prediction_columns': {'embedding_model': 'embedding_model_prediction'}, 'probability_columns': {}, 'group_by_column': None}
Type: generic
Target Class Labels: None
Columns: ['Project_Title', 'question', 'ground_truth', 'Area', 'Requester', 'Status', 'id', 'embedding_model_prediction']
Index Name: None
Index: [ 28  61   6  99  13  50 113  25  65 111   5  20  38  21 100  71  87  29
  23  96  14  33  26  30  73  16  84  52  12  45  15  43 105 109  27  63
  85   2  53  74 102  39  80  98  59  90  89   3  82  66  60   0  55  40
  97  41  37 112  72  48  35  64  22 104   9  76  68  51 107  79  34  31
  67 103 101  88   7  69  47  70  24  83  18  10  81  11  77  32  46  95
  57  62]



#### Insert embeddings and questions into Vector DB

In [15]:
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import DataFrameLoader

# load documents from dataframe
loader = DataFrameLoader(train_df, page_content_column="question")
docs = loader.load()
# choose model using embedding client
embedding_client = OpenAIEmbeddings(model="text-embedding-3-small")

# setup vector datastore
qdrant = Qdrant.from_documents(
    docs,
    embedding_client,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="rfp_rag_collection",
)

## Setup Retrieval Model

In [16]:
from validmind.models import RetrievalModel

def retrieve(question):
    contexts = []

    for result in qdrant.similarity_search_with_score(question):
        document, score = result
        context = f"Q: {document.page_content}\n"
        context += f"A: {document.metadata['ground_truth']}\n"

        contexts.append(context)
    return contexts

vm_retriever = RetrievalModel(
    input_id="retrieval_model",
    predict_fn=retrieve,
)

In [17]:
vm_test_ds.assign_predictions(model=vm_retriever)
print(vm_test_ds)

2024-05-03 14:49:04,189 - INFO(validmind.vm_models.dataset): Running predict()... This may take a while


['Q: How do you contribute to the ongoing development of AI risk management practices following the NIST AI RMF?\nA: We actively participate in industry working groups and public-private partnerships to contribute to the continual improvement of AI risk management practices. Our engagement in these collaborative efforts not only allows us to share our insights and strategies but also enables us to learn from the collective experiences of the industry, helping to elevate the standards of AI safety and reliability across the board. Additionally, we stay abreast of updates to the NIST AI Risk Management Framework (RMF) and adjust our practices accordingly. This commitment to staying current ensures that our risk management approaches align with the latest guidelines and best practices, reinforcing our dedication to leading-edge, responsible AI development and deployment.\n', 'Q: In what ways do you contribute to the continual improvement of AI risk management practices, as envisioned by t

## Setup Generation Model

In [18]:
from validmind.models import GenerationModel
from openai import OpenAI


system_prompt = """
You are an expert RFP AI assistant.
You are tasked with answering new RFP questions based on existing RFP questions and answers.
You will be provided with the existing RFP questions and answer pairs that are the most relevant to the new RFP question.
After that you will be provided with a new RFP question.
You will generate an answer and respond only with the answer.
Ignore your pre-existing knowledge and answer the question based on the provided context.
""".strip()

openai_client = OpenAI()

def generate(question, retrieval_model_prediction):
    response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": "\n\n".join(retrieval_model_prediction)},
            {"role": "user", "content": question},
        ],
    )

    return response.choices[0].message.content

vm_generator = GenerationModel(input_id="generation_model", predict_fn=generate)

In [19]:
vm_test_ds.assign_predictions(model=vm_generator)
print(vm_test_ds)

2024-05-03 14:50:50,941 - INFO(validmind.vm_models.dataset): Running predict()... This may take a while


VMDataset object: 
Input ID: dataset
Target Column: ground_truth
Feature Columns: ['Project_Title', 'question', 'Area', 'Requester', 'Status', 'id']
Text Column: question
Extra Columns: {'prediction_columns': {'embedding_model': 'embedding_model_prediction', 'retrieval_model': 'retrieval_model_prediction', 'generation_model': 'generation_model_prediction'}, 'probability_columns': {}, 'group_by_column': None}
Type: generic
Target Class Labels: None
Columns: ['Project_Title', 'question', 'ground_truth', 'Area', 'Requester', 'Status', 'id', 'embedding_model_prediction', 'retrieval_model_prediction', 'generation_model_prediction']
Index Name: None
Index: [ 91   1  75   8  54   4  58  19  17  94  36  42  78 110  49 114  92  44
  56  86  93 108 106]



## Setup RAG Model (Pipeline of "Component" Models)

In [20]:
from validmind.models import RAGModel

vm_rag_model = RAGModel(
    embedder=vm_embedder,
    retriever=vm_retriever,
    generator=vm_generator,
    input_id="rag_pipeline",
)

In [21]:
vm_test_ds.assign_predictions(model=vm_rag_model)
print(vm_test_ds)

2024-05-03 14:52:01,759 - INFO(validmind.vm_models.dataset): Running predict()... This may take a while


['Q: How do you contribute to the ongoing development of AI risk management practices following the NIST AI RMF?\nA: We actively participate in industry working groups and public-private partnerships to contribute to the continual improvement of AI risk management practices. Our engagement in these collaborative efforts not only allows us to share our insights and strategies but also enables us to learn from the collective experiences of the industry, helping to elevate the standards of AI safety and reliability across the board. Additionally, we stay abreast of updates to the NIST AI Risk Management Framework (RMF) and adjust our practices accordingly. This commitment to staying current ensures that our risk management approaches align with the latest guidelines and best practices, reinforcing our dedication to leading-edge, responsible AI development and deployment.\n', 'Q: In what ways do you contribute to the continual improvement of AI risk management practices, as envisioned by t

# Run tests

In [22]:
import plotly.express as px

def plot_distribution(scores):
    # plot distribution of scores (0-1) from ragas metric
    # scores is a list of floats
    fig = px.histogram(x=scores, nbins=10)
    fig.show()

In [23]:
import warnings

warnings.filterwarnings("ignore")

In [33]:
result = vm.tests.run_test(
    "validmind.model_validation.ragas.AnswerSimilarity",
    inputs={"dataset": vm_test_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

In [34]:
result = run_test(
    "validmind.model_validation.ragas.ContextEntityRecall",
    inputs={"dataset": vm_test_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

In [37]:
result = run_test(
    "validmind.model_validation.ragas.ContextPrecision",
    inputs={"dataset": vm_test_ds},
    show=False,
)
 
plot_distribution(result.metric.summary.results[0].data)

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

In [38]:
result = run_test(
    "validmind.model_validation.ragas.ContextRelevancy",
    inputs={"dataset": vm_test_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]