## Retrieval Evaluation

In [1]:
import pandas as pd
from helper import get_data_path, get_openai_api_key

data_path = get_data_path()
df = pd.read_csv(data_path)

df.head()


Unnamed: 0,question,answer,id,topic
0,How can I create an account?,"To create an account, click on the 'Sign Up' b...",a5825c73,Account & Registration
1,What payment methods do you accept?,"We accept major credit cards, debit cards, and...",db025979,Payments & Pricing
2,How can I track my order?,You can track your order by logging into your ...,f936d8f6,Order Management
3,What is your return policy?,Our return policy allows you to return product...,ae7b5c39,Returns & Refunds
4,Can I cancel my order?,You can cancel your order if it has not been s...,0ea444c1,Order Management


In [2]:
documents= df.to_dict(orient='records')
documents[0]

{'question': 'How can I create an account?',
 'answer': "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.",
 'id': 'a5825c73',
 'topic': 'Account & Registration'}

In [3]:
prompt_template="""
You emulate a user of our fitness assistant.

Formulate 5 questions this user might ask based on a provided topic.
The records should contain the answer to the questions, and the questions should.
be complete and not too short. If possible, use as fewer words as possible from the record.

The record:

question: {question}
answer: {answer}
topic: {topic}

Provide the output in parsable JSON without using code blocks:

{{"question": ["question1", "question2", "question3", ..., "question5"]}}            
""".strip()

prompt = prompt_template.format(**documents[0])


In [8]:
from openai import OpenAI

openai_api_key = get_openai_api_key()
client= OpenAI(api_key=openai_api_key)
model= "gpt-4o-mini"
response= client.chat.completions.create(
        model=model,
       messages=[
        {"role":"user", "content": prompt}
    ],
)

In [12]:
import json

questions= json.loads(response.choices[0].message.content)

In [6]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [5]:
from tqdm.auto import tqdm

In [14]:
import json
results = {}

for doc in tqdm(documents):
     doc_id = doc['id']
     if doc_id in results:
         continue
     questions_raw = generate_questions(doc)
     questions = json.loads(questions_raw)
     results[doc_id] = questions['question']

# with open(data_path.parent / "retrieval_eval.json", "w") as f:
#     json.dump(results, f, indent=4)

  0%|          | 0/79 [00:00<?, ?it/s]

In [18]:
final_results = []

for doc_id, questions in results.items():

    for q in questions:
        final_results.append(( doc_id,q))

In [19]:
final_results[0]

('a5825c73', 'What steps do I need to follow to create a new account?')

In [20]:
new_df= pd.DataFrame(final_results, columns=['id','question'])

In [21]:
new_df.to_csv('ground-truth-retrieval.csv', index=False)

In [22]:
!head ground-truth-retrieval.csv

id,question
a5825c73,What steps do I need to follow to create a new account?
a5825c73,Where can I find the option to sign up for an account?
a5825c73,Is there a specific button I should click to start the registration process?
a5825c73,Can you guide me through the registration process for an account?
a5825c73,What is the first action I should take to register for an account?
db025979,What are the accepted payment options for my subscription?
db025979,Can I use PayPal for my online order?
db025979,Do you take debit cards as a payment method?
db025979,Are there any specific credit cards you do not accept?


In [27]:
ground_truth = new_df.to_dict(orient='records')

In [29]:
import minsearch
index= minsearch.Index(
    text_fields = ['question', 'answer', 'id', 'topic'],
    keyword_fields = ['id']
)

In [30]:
index.fit(documents)

<minsearch.Index at 0x16823d7d0>

In [31]:
def hit_rate(relevance_total):
    cnt = 0
    
    for line in relevance_total:
        if True in line:
            cnt = cnt + 1
    
    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0
    
    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)
                break
    
    return total_score / len(relevance_total)

In [32]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [33]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [34]:
from tqdm.auto import tqdm

evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/395 [00:00<?, ?it/s]

{'hit_rate': 0.8936708860759494, 'mrr': 0.6489963833634717}

## Optimise parameters for retrieval

In [38]:
# Import optimisation libraries
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

# split data into train, validation and test sets
df_validation= new_df[:100]
df_test = new_df[100:]