In [3]:
import pandas as pd
from sentence_transformers import SentenceTransformer

  from tqdm.autonotebook import tqdm, trange


In [4]:
github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"

In [5]:
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [6]:
df = df.iloc[:300]
df

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp
...,...,...,...,...,...
295,An alternative way to load the data using the ...,Above users showed how to load the dataset dir...,8d209d6d,What is an alternative way to load the data us...,machine-learning-zoomcamp
296,You can directly download the dataset from Git...,Above users showed how to load the dataset dir...,8d209d6d,How can I directly download the dataset from G...,machine-learning-zoomcamp
297,You can fetch data for homework using the `req...,Above users showed how to load the dataset dir...,8d209d6d,Could you share a method to fetch data for hom...,machine-learning-zoomcamp
298,If the status code is 200 when downloading dat...,Above users showed how to load the dataset dir...,8d209d6d,What should I do if the status code is 200 whe...,machine-learning-zoomcamp


In [7]:
answer_llm = df.iloc[0].answer_llm
answer_llm

'You can sign up for the course by visiting the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/).'

In [9]:
model_name = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)
embedding = embedding_model.encode(answer_llm)
embedding[0]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

-0.42244688

In [None]:
#1 -0.42

In [10]:
import numpy as np

evaluations = []

for _, row in df.iterrows():
    v1 = embedding_model.encode(row.answer_llm)
    v2 = embedding_model.encode(row.answer_orig)
    
    score = np.dot(v1, v2)
    evaluations.append(score)

print(np.percentile(evaluations, 75))  

31.674306869506836


In [None]:
#2 31.67

In [11]:
def normalize(v):
    norm = np.sqrt((v * v).sum())
    return v / norm

cosine_scores = []

for _, row in df.iterrows():
    v1 = embedding_model.encode(row.answer_llm)
    v2 = embedding_model.encode(row.answer_orig)
    
    v1_norm = normalize(v1)
    v2_norm = normalize(v2)
    
    cosine = np.dot(v1_norm, v2_norm)
    cosine_scores.append(cosine)

print(np.percentile(cosine_scores, 75)) 

0.8362348675727844


In [None]:
#3 0.83

In [23]:
from rouge import Rouge

rouge_scorer = Rouge()

r = df.iloc[10]  # Index 10, doc_id=5170565b
scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

print(scores['rouge-1']['f'])  

0.45454544954545456


In [24]:
#4 0.45

In [25]:
scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

avg_f_score = (scores['rouge-1']['f'] + scores['rouge-2']['f'] + scores['rouge-l']['f']) / 3
print(avg_f_score)

0.35490034990035496


In [26]:
#5 0.35

In [27]:
all_rouge_scores = []

for _, row in df.iterrows():
    scores = rouge_scorer.get_scores(row['answer_llm'], row['answer_orig'])[0]
    all_rouge_scores.append(scores)

rouge_df = pd.DataFrame(all_rouge_scores)

average_rouge_2 = rouge_df['rouge-2'].apply(lambda x: x['f']).mean()
print(average_rouge_2)

0.20696501983423318


In [None]:
#6 0.20