In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
# !pip install ragas langchain_openai python-dotenv



In [14]:
from langchain_ollama import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings

llm_json_mode = ChatOllama(model="llama3.1:latest", format="json", temperature=0)
llm_generation = ChatOllama(model="llama3.1:latest", temperature=0)
embedding_engine = OllamaEmbeddings(model="nomic-embed-text:latest")  # 768 dims

**Sample format**
```json
data_samples = {
    'question': ['When was the first super bowl?', 'Who won the most super bowls?'],
    'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],
    'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'], 
    ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']],
    'ground_truth': []
}
```

In [31]:
def reformat_dataset(dataset):
    """Reformat the output from the eval set to match the expected format for RAGAS

    Args:
        dataset (_type_): _description_

    Returns:
        _type_: _description_
    """
    question = []
    answer = []
    contexts = []
    ground_truth = []
    for item in dataset:
        question.append(item.get('question'))
        answer.append(item.get('generated_answer'))
        contexts.append([" ".join(item.get('retrieved_docs'))])
        ground_truth.append(item.get('true_answer'))
    reformatted_dataset = {
        'question':question,
        'answer':answer,
        'contexts':contexts,
        'ground_truth':ground_truth
    }
    return reformatted_dataset

In [32]:
# Load the eval sets
import json
with open("agent_test.json") as f:
    agent_test = json.load(f)
    agent_test = reformat_dataset(agent_test)

with open("tfidf_test.json") as f:
    tfidf_test = json.load(f)
    tfidf_test = reformat_dataset(tfidf_test)


with open("faiss_test.json") as f:
    faiss_test = json.load(f)
    faiss_test = reformat_dataset(faiss_test)

with open("chroma_test.json") as f:
    chroma_test = json.load(f)
    chroma_test = reformat_dataset(chroma_test)

eval_set = {
    "agent_test": agent_test,
    "tfidf_test": tfidf_test,
    "chroma_test": chroma_test,
    "faiss_test": faiss_test,
}

In [33]:
from datasets import Dataset 
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_similarity,
    context_precision,
    context_recall,
)
from ragas import evaluate


dataset = Dataset.from_dict(agent_test)
score = evaluate(dataset,metrics=[answer_relevancy, answer_similarity], llm=llm_json_mode, embeddings=embedding_engine)
score.to_pandas()

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Failed to parse output. Returning None.
Failed to parse output. Returning None.
Failed to parse output. Returning None.


Unnamed: 0,question,answer,contexts,ground_truth,answer_relevancy,answer_similarity
0,What type of camel was known to be an especial...,The heavy camel was known to be an especially ...,[The heavy camel was an especially experienced...,The heavy camel was an especially experienced ...,0.743739,0.934699
1,What are the standard ways to win a Random Map...,You can win a Random Map or Death Match game b...,[16 Chapter II - Setting Up a GameHow to win...,You can win any Random Map or Death Match game...,0.0,0.987623
2,What types of units can Non-upgraded Monks con...,Non-upgraded Monks can convert enemy villagers...,"[color) so you can control them. For example, ...",Non-upgraded Monks can convert enemy villagers...,0.831009,0.995968
3,What military tradition did the Byzantine army...,The Byzantine army carried on the military tra...,"[Armor — Scale Barding Armor, Chain Barding Ar...",The Byzantine army carried on many of the mili...,0.773122,0.95023
4,What countries did the Normans invade besides ...,The Normans invaded Wales and Ireland in addit...,[invasion of 1066. The Normans also invaded S...,The Normans also invaded Sicily and southern I...,0.869014,0.85891
5,What impact did Viking sea raiders have on the...,The Viking sea raiders disrupted the progress ...,[Late in the eighth century Viking sea raiders...,"The Vikings caused significant turmoil, which ...",0.792761,0.949424
6,In what state were Viking warriors known to en...,Viking warriors were known to enter battle in ...,[Vikings were known for their great seamanship...,Viking warriors were known to enter a state of...,0.720605,0.927416
7,What shape were guard towers usually in the co...,Guard towers were usually round in shape. This...,[for fighting defense. Whereas some earlier wa...,Guard towers were usually round.,0.825821,0.937901
8,What type of unit is created at the Stable in ...,"In Age of Empires 2, a Stable produces Cavalry...",[you set a gather\npoint there while the units...,Cavalry,0.728853,0.837939
9,What can move a Relic in this context?,,[],A Relic can only be moved by a Monk.,,0.736023


In [57]:
score

{'answer_relevancy': 0.7157, 'answer_similarity': 0.9047}

In [51]:
dataset_tfidf = Dataset.from_dict(tfidf_test)
score_tfidf = evaluate(dataset_tfidf, metrics=[
    answer_relevancy,
    answer_similarity,
], llm=llm_json_mode, embeddings=embedding_engine)
score_tfidf.to_pandas()

Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]

Unnamed: 0,question,answer,contexts,ground_truth,answer_similarity
0,What type of camel was known to be an especial...,The Heavy Camel (page 78).,[The heavy camel was an especially experienced...,The heavy camel was an especially experienced ...,0.864201
1,What are the standard ways to win a Random Map...,"According to page 18, in a Random Map or Death...","[Death Match game\nIn a Death Match, all playe...",You can win any Random Map or Death Match game...,0.976388
2,What types of units can Non-upgraded Monks con...,Non-upgraded Monks can convert enemy buildings...,"[color) so you can control them. For example, ...",Non-upgraded Monks can convert enemy villagers...,0.947448
3,What military tradition did the Byzantine army...,The Byzantine army carried on many of the mili...,"[Armor — Scale Barding Armor, Chain Barding Ar...",The Byzantine army carried on many of the mili...,0.972137
4,What countries did the Normans invade besides ...,"According to page 86 of the context, the Norma...","[The lord could withdraw into the tower, putti...",The Normans also invaded Sicily and southern I...,0.9508
5,What impact did Viking sea raiders have on the...,The Viking sea raiders' raids for 150 years er...,[Late in the eighth century Viking sea raiders...,"The Vikings caused significant turmoil, which ...",0.931252
6,In what state were Viking warriors known to en...,"They would go ""berserk"" and attack with nearly...",[Vikings were known for their great seamanship...,Viking warriors were known to enter a state of...,0.807066
7,What shape were guard towers usually in the co...,Guard towers were usually round (Extracted fro...,[for fighting defense. Whereas some earlier wa...,Guard towers were usually round.,0.933264
8,What type of unit is created at the Stable in ...,Cavalry. \n\nExtracted from page 56,[type of unit in the building)\nHold down CTRL...,Cavalry,0.929457
9,What can move a Relic in this context?,A Monk (page 18) can move a relic.,[Chapter I: Getting Started\nWhat’s new in Age...,A Relic can only be moved by a Monk.,0.939438


In [54]:
score_tfidf

{'answer_similarity': 0.8958}

In [55]:
dataset_chroma = Dataset.from_dict(chroma_test)
score_chroma = evaluate(dataset_chroma, metrics=[
    answer_similarity,
], llm=llm_json_mode, embeddings=embedding_engine)
score_chroma.to_pandas()

Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]

Unnamed: 0,question,answer,contexts,ground_truth,answer_similarity
0,What type of camel was known to be an especial...,A heavy camel. (Extracted from page 78),[The heavy camel was an especially experienced...,The heavy camel was an especially experienced ...,0.864825
1,What are the standard ways to win a Random Map...,"To win a Random Map or Death Match game, you c...",[the game. You can display the objectives agai...,You can win any Random Map or Death Match game...,0.975089
2,What types of units can Non-upgraded Monks con...,"Non-upgraded Monks can convert most buildings,...",[You can convert and use a building you have n...,Non-upgraded Monks can convert enemy villagers...,0.963749
3,What military tradition did the Byzantine army...,The Byzantine army carried on many of the mili...,"[Armor — Scale Barding Armor, Chain Barding Ar...",The Byzantine army carried on many of the mili...,0.972142
4,What countries did the Normans invade besides ...,"According to the context (page 86), besides En...",[invasion of 1066. The Normans also invaded S...,The Normans also invaded Sicily and southern I...,0.951751
5,What impact did Viking sea raiders have on the...,The Viking raids erased most of the progress m...,[Late in the eighth century Viking sea raiders...,"The Vikings caused significant turmoil, which ...",0.949386
6,In what state were Viking warriors known to en...,"Viking warriors would occasionally go ""berserk...",[Vikings were known for their great seamanship...,Viking warriors were known to enter a state of...,0.955507
7,What shape were guard towers usually in the co...,"Guard towers were usually round, as it elimina...",[for fighting defense. Whereas some earlier wa...,Guard towers were usually round.,0.923923
8,What type of unit is created at the Stable in ...,"According to page 56, mounted warriors are cre...",[type of unit in the building)\nHold down CTRL...,Cavalry,0.8061
9,What can move a Relic in this context?,"According to page 90, a Monk can move a relic.",[are special objects (similar to trophies) tha...,A Relic can only be moved by a Monk.,0.950778


In [56]:
score_chroma

{'answer_similarity': 0.9128}

In [59]:
dataset_faiss = Dataset.from_dict(faiss_test)
score_faiss = evaluate(dataset_faiss, metrics=[
    answer_similarity,
], llm=llm_json_mode, embeddings=embedding_engine)
score_faiss.to_pandas()

Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]

Unnamed: 0,question,answer,contexts,ground_truth,answer_similarity
0,What type of camel was known to be an especial...,"According to Context from page 78, the ""Heavy ...",[The heavy camel was an especially experienced...,The heavy camel was an especially experienced ...,0.963969
1,What are the standard ways to win a Random Map...,To win a Random Map or Death Match game in Age...,[the game. You can display the objectives agai...,You can win any Random Map or Death Match game...,0.915963
2,What types of units can Non-upgraded Monks con...,Non-upgraded Monks can convert most buildings ...,[You can convert and use a building you have n...,Non-upgraded Monks can convert enemy villagers...,0.951167
3,What military tradition did the Byzantine army...,The Byzantine army carried on many of the mili...,"[Armor — Scale Barding Armor, Chain Barding Ar...",The Byzantine army carried on many of the mili...,0.977842
4,What countries did the Normans invade besides ...,"According to page 86 of the extracted text, th...",[invasion of 1066. The Normans also invaded S...,The Normans also invaded Sicily and southern I...,0.9568
5,What impact did Viking sea raiders have on the...,The Viking sea raiders caused most of the prog...,[Late in the eighth century Viking sea raiders...,"The Vikings caused significant turmoil, which ...",0.942184
6,In what state were Viking warriors known to en...,"Viking warriors would occasionally go ""berserk...",[Vikings were known for their great seamanship...,Viking warriors were known to enter a state of...,0.955511
7,What shape were guard towers usually in the co...,Guard towers were usually round. (Extracted fr...,[for fighting defense. Whereas some earlier wa...,Guard towers were usually round.,0.956817
8,What type of unit is created at the Stable in ...,"According to page 56, horses are bred and main...",[type of unit in the building)\nHold down CTRL...,Cavalry,0.781255
9,What can move a Relic in this context?,A Monk can move a relic. (Page 18),[are special objects (similar to trophies) tha...,A Relic can only be moved by a Monk.,0.935338


In [60]:
score_faiss

{'answer_similarity': 0.9227}