# Initialize Chromadb and create the indexing

In [1]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Document

# Load or create your document

with open("./book.txt") as f:
    doc = f.read() 
text = doc


document = Document(text=text)

# Initialize Chroma client
chroma_client = chromadb.EphemeralClient()

# Create a collection for storing vectors
chroma_collection = chroma_client.get_or_create_collection("book_collection")

# Create the vector store
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

from llama_index.core import StorageContext

# Initialize the storage context
storage_context = StorageContext.from_defaults(vector_store=vector_store)

embed_model = OpenAIEmbedding(model="text-embedding-3-small")

# Create a sentence splitter for chunking text
parser = SentenceSplitter(chunk_size=1024, chunk_overlap=20)

# Build the index
index = VectorStoreIndex.from_documents([document], storage_context=storage_context, 
                                        transformations=[parser], show_progress=True)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/45 [00:00<?, ?it/s]

# Initialize RAG and run a query

In [2]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.llms.openai import OpenAI

retriever = VectorIndexRetriever(index, similarity_top_k=3, filter=None)
llm = OpenAI(model="gpt-3.5-turbo")
query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)

from llama_index.core import PromptTemplate

new_prompt_template_str = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context and not prior knowledge, "
    "answer the query in less than 15 words.\n"
    "Query: {query_str}\n"
    "Answer: "
)

new_prompt_template = PromptTemplate(new_prompt_template_str)
query_engine.update_prompts({"response_synthesizer:text_qa_template": new_prompt_template})

query = """"How does Dickens establish Scrooge's character through 
environmental imagery rather than direct description? 
Make sure the answer does not exceed 300 characters."""

response = query_engine.query(query)
print(str(response))

Dickens uses settings like a bleak moor and a desolate lighthouse to reflect Scrooge's cold and isolated personality.


# Load the golden QnA data generated by Claude 3.7 Sonnet

In [4]:
import pandas as pd

df = pd.read_json("golden_data.json")

df.head(5)

Unnamed: 0,reference_question,reference_answer
0,What literary device does Dickens use in the o...,"Repetition (""Marley was dead"") and paradox (""d..."
1,What is the symbolic significance of Scrooge k...,It symbolizes Scrooge's inability to let go of...
2,How does Dickens establish Scrooge's character...,"Through cold imagery: he ""iced his office,"" ca..."
3,What is the thematic purpose of the contrast b...,It juxtaposes institutional cruelty with famil...
4,What narrative technique does Dickens use when...,"Contradictory descriptors (""like a child; yet ..."


# Lets call RAG to get the answers

In [5]:
import tqdm

reference_questions = df["reference_question"].tolist()
reference_answers = df["reference_answer"].tolist()
rag_answers = list()

for i in tqdm.tqdm(range(len(reference_questions))):
    rag_answer = query_engine.query(reference_questions[i])
    rag_answers.append(rag_answer.response)

df["simple_rag_answer"] = rag_answers
df.to_json("result_simple_rag.json")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [01:59<00:00,  1.22s/it]


# Define the evaluation code using GPT-4

In [6]:
import openai

# First, set the API key
openai_client = openai.OpenAI()  # <-- create a client

def evaluate_with_llm(question, golden, prediction):
    prompt = f"""
    Question: {question}
    Golden Answer: {golden}
    Model Answer: {prediction}

    Evaluate the model answer against the golden answer. 
    Respond with a score between 1 (poor) and 5 (perfect) based on accuracy, relevance, and completeness.
    """

    response = openai_client.chat.completions.create(
        model="gpt-4",  # or "gpt-3.5-turbo"
        messages=[
            {"role": "system", "content": "You are an expert evaluator."},
            {"role": "user", "content": prompt}
        ]
    )
    
    result_text = response.choices[0].message.content
    return result_text


# Call the Evaluation method for all the golden examples and store the scores

In [7]:
from tqdm import tqdm

df = pd.read_json("result_simple_rag.json")
reference_questions = df["reference_question"].tolist()
reference_answers = df["reference_answer"].tolist()
rag_answers = df["simple_rag_answer"].tolist()
eval_scores = list()

for reference_question,reference_answer,rag_answer in tqdm(zip(reference_questions,reference_answers,rag_answers)):
    eval_scores.append(evaluate_with_llm(reference_question,reference_answer,rag_answer))

df["gpt4_score"] = eval_scores
df.to_json("result_simple_rag_score.json")

98it [01:14,  1.32it/s]


# Mean score for all the examples in the golden dataset

In [8]:
df = pd.read_json("result_simple_rag_score.json")
df["gpt4_score"].mean()

np.float64(3.683928571428572)