In [1]:
%pip install -q llama-index deeplake openai cohere


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from dotenv import load_dotenv


load_dotenv("../.env")
assert os.getenv("OPENAI_API_KEY")
assert os.getenv("ACTIVELOOP_TOKEN")

In [3]:
from llama_index.llms.openai import OpenAI


# build service context
llm = OpenAI(model="gpt-4", temperature=0.0)

# FaithfulnessEvaluator

In [4]:
from llama_index.vector_stores.deeplake import DeepLakeVectorStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex


vector_store = DeepLakeVectorStore(dataset_path="hub://yaroslava/tesla_quarterly_2023", overwrite=False)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

[S3] Failed to get bucket region for URL: snark-hub/protected/yaroslava/tesla_quarterly_2023/ with error: [S3] INVALID_ACCESS_KEY_ID snark-hub The AWS Access Key Id you provided does not exist in our records. 


In [5]:
from llama_index.core.evaluation import FaithfulnessEvaluator


# define evaluator
evaluator = FaithfulnessEvaluator(llm=llm)

In [6]:
# query index
query_engine = index.as_query_engine()
response = query_engine.query(
    "What are the trends in vehicle deliveries on quarter 3?"
)

eval_result = evaluator.evaluate_response(response=response)

In [7]:
print("> response: ", response)
print("> evaluator result: ", eval_result.passing)

> response:  The trends in vehicle deliveries in quarter 3 show a continued increase in the number of units delivered compared to previous quarters.
> evaluator result:  True


# RAGAS

In [41]:
%pip install html2text ragas llama-index-readers-web wikipedia

Collecting wikipedia
  Using cached wikipedia-1.4.0-py3-none-any.whl
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [8]:
from llama_index.core import Document
import wikipedia

def load_wikipedia_api_data():
    titles = ["New York City", "New York (city)", "New York, New York"]
    
    for title in titles:
        try:
            page = wikipedia.page(title, auto_suggest=False)
            return [Document(
                text=page.content,
                metadata={"title": page.title, "url": page.url, "summary": page.summary}
            )]
        except:
            continue
    
    # Fallback to search results
    for title in wikipedia.search("New York City")[1:]:
        try:
            page = wikipedia.page(title)
            return [Document(
                text=page.content,
                metadata={"title": page.title, "url": page.url, "summary": page.summary}
            )]
        except:
            continue

documents = load_wikipedia_api_data()
print(f"Loaded: {documents[0].metadata['title']}")
print(f"Length: {len(documents[0].text)} characters")

Loaded: New York City
Length: 93891 characters


In [9]:
documents

[Document(id_='89daa97c-2407-450e-a2ed-9bcedbfedafd', embedding=None, metadata={'title': 'New York City', 'url': 'https://en.wikipedia.org/wiki/New_York_City', 'summary': "New York, often called New York City (NYC), is the most populous city in the United States. It is located at the southern tip of New York State on one of the world's largest natural harbors. The city comprises five boroughs, each coextensive with its respective county. The city is the geographical and demographic center of both the Northeast megalopolis and the New York metropolitan area, the largest metropolitan area in the United States by both population and urban area. New York is a global center of finance and commerce, culture, technology, entertainment and media, academics and scientific output, the arts and fashion, and, as home to the headquarters of the United Nations, international diplomacy.\nWith an estimated population in July 2024 of 8,478,072, distributed over 300.46 square miles (778.2 km2), the city

In [10]:
from llama_index.core import VectorStoreIndex


vector_index = VectorStoreIndex.from_documents(
    documents, chunk_size=512
)
query_engine = vector_index.as_query_engine()

In [11]:
response_vector = query_engine.query("How did New York City get its name?")

print(response_vector)

New York City was named after King Charles II granted the lands to his brother, the Duke of York, in 1664. It was temporarily renamed New York and then permanently renamed New York in 1674.


In [12]:
from datasets import Dataset


eval_questions = [
    "What is the population of New York City as of 2020?",
    "Which borough of New York City has the highest population?",
    "What is the economic significance of New York City?",
    "How did New York City get its name?",
    "What is the significance of the Statue of Liberty in New York City?",
]

eval_answers = [
    "8,804,000",
    "Queens",
    "New York City's economic significance is vast, as it serves as the global financial capital, housing Wall Street and major financial institutions. Its diverse economy spans technology, media, healthcare, education, and more, making it resilient to economic fluctuations. NYC is a hub for international business, attracting global companies, and boasts a large, skilled labor force. Its real estate market, tourism, cultural industries, and educational institutions further fuel its economic prowess. The city's transportation network and global influence amplify its impact on the world stage, solidifying its status as a vital economic player and cultural epicenter.",
    "New York City got its name when it came under British control in 1664. King Charles II of England granted the lands to his brother, the Duke of York, who named the city New York in his own honor.",
    "The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It greeted millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries, representing hope and freedom for those seeking a better life. It has since become an iconic landmark and a global symbol of cultural diversity and freedom.",
]

# Generate responses and contexts for evaluation
responses = []
contexts = []

for question in eval_questions:
    response = query_engine.query(question)
    responses.append(str(response))
    # Extract contexts from response source nodes
    context_list = [node.node.text for node in response.source_nodes] if hasattr(response, 'source_nodes') else str(response)[:500]
    contexts.append(context_list)

# Create dataset in the format expected by RAGAS v0.2+
eval_dataset = Dataset.from_dict({
    "question": eval_questions,
    "answer": responses,
    "contexts": contexts,
    "ground_truth": eval_answers
})

In [14]:
len(responses), len(contexts)

(5, 5)

In [19]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from ragas import evaluate


# Define metrics to use
metrics = [
    faithfulness,
    answer_relevancy, 
    context_precision,
    context_recall,
]

# Run evaluation
result = evaluate(
    dataset=eval_dataset,
    metrics=metrics,
)

Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Exception raised in Job[1]: IndexError(list index out of range)
Evaluating:   5%|▌         | 1/20 [00:01<00:23,  1.24s/it]Exception raised in Job[5]: IndexError(list index out of range)
Exception raised in Job[13]: IndexError(list index out of range)
Evaluating:  15%|█▌        | 3/20 [00:01<00:06,  2.74it/s]Exception raised in Job[9]: IndexError(list index out of range)
Evaluating: 100%|██████████| 20/20 [00:33<00:00,  1.68s/it]


In [20]:

print("Evaluation Results:")
print(result)

Evaluation Results:
{'faithfulness': 0.6455, 'answer_relevancy': 0.8905, 'context_precision': 0.4000, 'context_recall': 0.0400}


In [21]:

# Convert to DataFrame for better visualization
result.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,answer_relevancy,context_precision,context_recall
0,What is the population of New York City as of ...,[== Demographics ==\n\nNew York City is the mo...,The population of New York City as of 2020 was...,8804000,1.0,,1.0,0.0
1,Which borough of New York City has the highest...,[The highest point in the city is Todt Hill on...,Brooklyn (Kings County) is the borough of New ...,Queens,1.0,,0.0,0.0
2,What is the economic significance of New York ...,[The Port of New York and New Jersey is a majo...,New York City is a global center for finance a...,"New York City's economic significance is vast,...",0.727273,,1.0,0.2
3,How did New York City get its name?,[The first documented visit to New York Harbor...,New York City was named after King Charles II'...,New York City got its name when it came under ...,0.0,,0.0,0.0
4,What is the significance of the Statue of Libe...,[Gateway National Recreation Area contains ove...,The Statue of Liberty in New York City is mana...,The Statue of Liberty in New York City holds g...,0.5,0.890456,0.0,0.0


# Custom RAG pipeline

In [24]:
!curl "https://raw.githubusercontent.com/idontcalculate/data-repo/main/venus_transmission.txt" -o "data/venus_transmission.txt"


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 19241  100 19241    0     0   182k      0 --:--:-- --:--:-- --:--:--  182k


In [None]:
from llama_index import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_files=["/content/venus_transmission.txt"])

docs = reader.load_data()
print(f"Loaded {len(docs)} docs")

Loaded 1 docs


In [None]:
from llama_index.node_parser import SimpleNodeParser
from llama_index import VectorStoreIndex

# Build index with a chunk_size of 512
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(docs)
vector_index = VectorStoreIndex(nodes)

In [None]:
query_engine = vector_index.as_query_engine()

response_vector = query_engine.query("What was The first beings to inhabit the planet?")

In [None]:
response_vector.response

'The first beings to inhabit the planet were a dinoid and reptoid race from two different systems outside our solar system.'

In [None]:
# First retrieved node
response_vector.source_nodes[0].get_text()

"They had heard of this beautiful new planet. At this time, Earth had two moons to harmonize the weather conditions and control the tides of the large bodies of water.\nThe first beings to inhabit the planet were a dinoid and reptoid race from two different systems outside our solar system. They were intelligent and walked on two legs like humans and were war-like considering themselves to be superior to all other life forms. In the past, the four races of humans had conflicts with them before they outgrew such behavior. They arrived on Earth to rob it of its minerals and valuable gems. Soon they had created a terrible war. They were joined by re-\n1\nenforcements from their home planets. One set up its base on one of the Earth's moons, the other on Earth. It was a terrible war with advanced nuclear and laser weapons like you see in your science fiction movies. It lasted very long. Most of the life forms lay in singed waste and the one moon was destroyed. No longer interested in Earth,

In [None]:
# Second retrieved node
response_vector.source_nodes[1].get_text()

"Due to the radiation, the survivors of the dinoids and reptoids mutated into the Dinosaurs and giant reptilians you know of in your history. The humans that were trapped there mutated into what you call Neanderthals.\nThe Earth remained a devastated ruin, covered by a huge dark nuclear cloud and what vegetation was left was being devoured by the giant beings, also humans and animals by some. It was this way for hundreds of years before a giant comet crashed into one of the oceans and created another huge cloud. This created such darkness that the radiating heat of the Sun could not interact with Earth's gravitational field and an ice age was created. This destroyed the mutated life forms and gave the four races the chance to cleanse and heal the Earth with technology and their energy.\nOnce again, they brought various forms of life to the Earth, creating again a paradise, except for extreme weather conditions and extreme tidal activities.\nDuring this time they realized that their pla

In [None]:
from llama_index.llms import OpenAI
from llama_index.evaluation import generate_question_context_pairs

# Define an LLM
llm = OpenAI(model="gpt-3.5-turbo")

qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=2
)

queries = list(qa_dataset.queries.values())
print( queries )

100%|██████████| 13/13 [00:27<00:00,  2.13s/it]

['Explain the role of different alien races in the history of our solar system according to the information provided. How did these races contribute to the transformation process and why was Earth considered a special planet?', 'Describe the advanced abilities and technology possessed by the Masters and beings mentioned in the context. How did their understanding of creation and their eternal nature shape their perspective on life and death?', 'Explain the concept of creativity as understood by the advanced races mentioned in the context. How did they use their creative energy and what were the responsibilities associated with it?', 'Describe the initial state of Earth before it became a planet and settled into an orbit around the Sun. How did the four races contribute to the development of life on Earth?', 'How did the arrival of the dinoid and reptoid races on Earth lead to a devastating war? Discuss the reasons behind their conflict with the four races of humans and the impact it ha




In [None]:
retriever = vector_index.as_retriever(similarity_top_k=2)

In [None]:
from llama_index.evaluation import RetrieverEvaluator

retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)

In [None]:
# Evaluate
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

In [None]:
def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"Retriever Name": [name], "Hit Rate": [hit_rate], "MRR": [mrr]}
    )

    return metric_df

In [None]:
import pandas as pd

display_results("OpenAI Embedding Retriever", eval_results)

Unnamed: 0,Retriever Name,Hit Rate,MRR
0,OpenAI Embedding Retriever,0.884615,0.788462


In [None]:
from llama_index import ServiceContext

# gpt-3.5-turbo
gpt35 = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context_gpt35 = ServiceContext.from_defaults(llm=gpt35)

# gpt-4
gpt4 = OpenAI(temperature=0, model="gpt-4")
service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)

In [None]:
vector_index = VectorStoreIndex(nodes, service_context = service_context_gpt35)
query_engine = vector_index.as_query_engine()

In [None]:
eval_query = queries[10]

response_vector = query_engine.query(eval_query)

In [None]:
print( "> eval_query: ", eval_query )
print( "> response_vector:", response_vector )

> eval_query:  How did the colonies respond to the declaration of war by the dark forces, and what measures did they take to protect their knowledge and technology?
> response_vector: The colonies did not fight back against the dark forces when they declared war. Instead, they sent most of their people into hiding in order to rebuild the colonies later. They also destroyed everything to ensure that their knowledge and technology would not fall into the hands of the dark forces. Additionally, Lemuria and Atlantis were destroyed by their inhabitants to prevent the misuse of their knowledge and technology by the dark forces.


In [None]:
from llama_index.evaluation import RelevancyEvaluator
from llama_index.evaluation import FaithfulnessEvaluator

relevancy_gpt4 = RelevancyEvaluator(service_context=service_context_gpt4)
faithfulness_gpt4 = FaithfulnessEvaluator(service_context=service_context_gpt4)

True

In [None]:
# Compute faithfulness evaluation

eval_result = faithfulness_gpt4.evaluate_response(response=response_vector)
# check passing parameter in eval_result if it passed the evaluation.
print( eval_result.passing )

# Relevancy evaluation
eval_result = relevancy_gpt4.evaluate_response(
    query=eval_query, response=response_vector
)
# You can check passing parameter in eval_result if it passed the evaluation.
print( eval_result.passing )

True
True


In [None]:
#Batch Evaluator:
#BatchEvalRunner to compute multiple evaluations in batch wise manner.

from llama_index.evaluation import BatchEvalRunner

# Let's pick top 10 queries to do evaluation
batch_eval_queries = queries[:10]

# Initiate BatchEvalRunner to compute FaithFulness and Relevancy Evaluation.
runner = BatchEvalRunner(
    {"faithfulness": faithfulness_gpt4, "relevancy": relevancy_gpt4},
    workers=8,
)

# Compute evaluation
eval_results = await runner.aevaluate_queries(
    query_engine, queries=batch_eval_queries
)
# Let's get faithfulness score
faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])
# get relevancy score
relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])

print( "> faithfulness_score", faithfulness_score )
print( "> relevancy_score", relevancy_score )

> faithfulness_score 1.0
> relevancy_score 1.0
