In [1]:
import pandas as pd

In [23]:
df = pd.read_csv("../data/data.csv")
df.head()

Unnamed: 0,id,recipe_name,type_of_dish,main_ingredient,cuisine,cooking_method,prep_time,cook_time,instructions
0,0,Spaghetti Bolognese,Main Course,Beef,Italian,Simmering,10 minutes,40 minutes,Heat olive oil in a large pan over medium heat...
1,1,Chicken Curry,Main Course,Chicken,Indian,Sautéing,15 minutes,30 minutes,Heat oil in a large pan over medium heat. Add ...
2,2,Caesar Salad,Appetizer,Lettuce,Western,Tossing,10 minutes,0 minutes,"In a large bowl, toss chopped romaine lettuce ..."
3,3,Chocolate Cake,Dessert,Chocolate,Western,Baking,20 minutes,35 minutes,Preheat oven to 180°C (350°F). In a large bowl...
4,4,Grilled Salmon,Main Course,Salmon,Western,Grilling,5 minutes,10 minutes,Preheat grill to high heat. Rub salmon fillets...


In [3]:
# !wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

In [4]:
df.columns

Index(['id', 'recipe_name', 'type_of_dish', 'main_ingredient', 'cuisine',
       'cooking_method', 'prep_time', 'cook_time', 'instructions'],
      dtype='object')

### Ingestion

In [5]:
documents = df.to_dict(orient="records")
documents

[{'id': 0,
  'recipe_name': 'Spaghetti Bolognese',
  'type_of_dish': 'Main Course',
  'main_ingredient': 'Beef',
  'cuisine': 'Italian',
  'cooking_method': 'Simmering',
  'prep_time': '10 minutes',
  'cook_time': '40 minutes',
  'instructions': 'Heat olive oil in a large pan over medium heat. Add finely chopped onions, carrots, and celery. Cook for 5-7 minutes until softened. Add minced beef and cook until browned. Pour in canned tomatoes, beef broth, and Italian herbs. Simmer uncovered for 30-40 minutes. Stir occasionally and season with salt and pepper. Serve over cooked spaghetti and garnish with fresh basil.'},
 {'id': 1,
  'recipe_name': 'Chicken Curry',
  'type_of_dish': 'Main Course',
  'main_ingredient': 'Chicken',
  'cuisine': 'Indian',
  'cooking_method': 'Sautéing',
  'prep_time': '15 minutes',
  'cook_time': '30 minutes',
  'instructions': 'Heat oil in a large pan over medium heat. Add finely chopped onions and sauté until golden brown. Stir in minced garlic, ginger, and s

#### Minisearch

In [7]:
import minsearch

index = minsearch.Index(
    text_fields=['recipe_name', 'type_of_dish', 'main_ingredient', 'cuisine',
       'cooking_method', 'prep_time', 'cook_time', 'instructions'],
    keyword_fields=['id']
)

In [8]:
index.fit(documents)

<minsearch.Index at 0x74cadd3373b0>

In [9]:
query = "give me how to cook spaghetti bolognese"

index.search(query=query, num_results=10)

[{'id': 0,
  'recipe_name': 'Spaghetti Bolognese',
  'type_of_dish': 'Main Course',
  'main_ingredient': 'Beef',
  'cuisine': 'Italian',
  'cooking_method': 'Simmering',
  'prep_time': '10 minutes',
  'cook_time': '40 minutes',
  'instructions': 'Heat olive oil in a large pan over medium heat. Add finely chopped onions, carrots, and celery. Cook for 5-7 minutes until softened. Add minced beef and cook until browned. Pour in canned tomatoes, beef broth, and Italian herbs. Simmer uncovered for 30-40 minutes. Stir occasionally and season with salt and pepper. Serve over cooked spaghetti and garnish with fresh basil.'},
 {'id': 211,
  'recipe_name': 'Spaghetti Aglio e Olio',
  'type_of_dish': 'Main Course',
  'main_ingredient': 'Pasta',
  'cuisine': 'Italian',
  'cooking_method': 'Sautéing',
  'prep_time': '15 minutes',
  'cook_time': '15 minutes',
  'instructions': 'Cook spaghetti and toss with sautéed garlic, red pepper flakes, and olive oil. Garnish with parsley and Parmesan cheese.'},


### RAG flow

In [10]:
from openai import OpenAI

client = OpenAI()

In [11]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [12]:
prompt_template = """
You're a culinary expert. Answer the QUESTION based on the CONTEXT from our recipes database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()


entry_template = """
recipe_name: {recipe_name}
type_of_dish: {type_of_dish}
main_ingredient: {main_ingredient}
cuisine: {cuisine}
cooking_method: {cooking_method}
prep_time: {prep_time}
cook_time: {cook_time}
instructions: {instructions}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [13]:
search_results = search(query)
prompt = build_prompt(query, search_results)

In [14]:
print(prompt)

You're a culinary expert. Answer the QUESTION based on the CONTEXT from our recipes database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: give me how to cook spaghetti bolognese

CONTEXT:
recipe_name: Spaghetti Bolognese
type_of_dish: Main Course
main_ingredient: Beef
cuisine: Italian
cooking_method: Simmering
prep_time: 10 minutes
cook_time: 40 minutes
instructions: Heat olive oil in a large pan over medium heat. Add finely chopped onions, carrots, and celery. Cook for 5-7 minutes until softened. Add minced beef and cook until browned. Pour in canned tomatoes, beef broth, and Italian herbs. Simmer uncovered for 30-40 minutes. Stir occasionally and season with salt and pepper. Serve over cooked spaghetti and garnish with fresh basil.

recipe_name: Spaghetti Aglio e Olio
type_of_dish: Main Course
main_ingredient: Pasta
cuisine: Italian
cooking_method: Sautéing
prep_time: 15 minutes
cook_time: 15 minutes
instructions: Cook spaghetti and toss with sautéed g

In [15]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [16]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [17]:
question = 'What is the main ingredient of Spaghetti Bolognese? and how to make it?'
answer = rag(question)
print(answer)

The main ingredient of Spaghetti Bolognese is beef. 

To make Spaghetti Bolognese, follow these instructions:

1. Heat olive oil in a large pan over medium heat.
2. Add finely chopped onions, carrots, and celery, and cook for 5-7 minutes until softened.
3. Add minced beef and cook until browned.
4. Pour in canned tomatoes, beef broth, and Italian herbs.
5. Simmer uncovered for 30-40 minutes, stirring occasionally, and season with salt and pepper.
6. Serve over cooked spaghetti and garnish with fresh basil.


### Retrieval evaluation

In [18]:
df_question = pd.read_csv('../data/ground-truth-retrieval.csv')

In [19]:
df_question.head()

Unnamed: 0,id,question
0,0,"What should I do after cooking the onions, car..."
1,0,How long should I simmer the mixture after add...
2,0,What ingredients do I need to season the Bolog...
3,0,How long does it take to prepare the ingredien...
4,0,What type of pasta is recommended to serve wit...


In [20]:
ground_truth = df_question.to_dict(orient='records')

In [21]:
ground_truth[0]

{'id': 0,
 'question': 'What should I do after cooking the onions, carrots, and celery?'}

In [22]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [23]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [26]:
from tqdm.auto import tqdm

def evaluate(ground_truth, search_function):
    relevance_total = []
    
    for q in tqdm(ground_truth):
        doc_id = q["id"]
        results = search_function(q)
        relevance = [d["id"] == doc_id for d in results]
        relevance_total.append(relevance)
        
    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total)
    }

In [27]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/910 [00:00<?, ?it/s]

{'hit_rate': 0.8626373626373627, 'mrr': 0.7083765044479331}

### Finding the best parameters

In [28]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [29]:
df_validation = df_question[:100]
df_test = df_question[100:]

In [30]:
gt_val = df_validation.to_dict(orient='records')

In [31]:
# Mock of the minsearch_search function
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}
    
    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )
    return results

In [33]:
# Define the search space using hyperopt's hp module
param_ranges = {
    'recipe_name': hp.uniform('recipe_name', 0.0, 3.0),   # Assuming you have a numerical representation or scores
    'type_of_dish': hp.uniform('type_of_dish', 0.0, 3.0),
    'main_ingredient': hp.uniform('main_ingredient', 0.0, 3.0),
    'cuisine': hp.uniform('cuisine', 0.0, 3.0),
    'cooking_method': hp.uniform('cooking_method', 0.0, 3.0),
    'prep_time': hp.uniform('prep_time', 0.0, 120.0),     # Assuming time is in minutes
    'cook_time': hp.uniform('cook_time', 0.0, 180.0),     # Assuming time is in minutes
    'instructions': hp.uniform('instructions', 0.0, 3.0)  # Assuming you have a numerical score or similarity score
}

# Objective function for optimization
def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    
    # Assuming MRR is what we're optimizing and we're maximizing it
    return {'loss': -results['mrr'], 'status': STATUS_OK}  # Hyperopt minimizes, so negate the MRR

In [34]:
# Run hyperopt optimization
trials = Trials()

best = fmin(
    fn=objective, 
    space=param_ranges, 
    algo=tpe.suggest, 
    max_evals=20, 
    trials=trials
)

print("Best parameters found: ", best)

  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]

  0%|          | 0/100 [00:00<?, ?it/s]

  5%|▌         | 1/20 [00:00<00:08,  2.25trial/s, best loss: -0.6567500000000002]

  0%|          | 0/100 [00:00<?, ?it/s]

 10%|█         | 2/20 [00:00<00:08,  2.24trial/s, best loss: -0.6784722222222223]

  0%|          | 0/100 [00:00<?, ?it/s]

 15%|█▌        | 3/20 [00:01<00:07,  2.31trial/s, best loss: -0.6784722222222223]

  0%|          | 0/100 [00:00<?, ?it/s]

 20%|██        | 4/20 [00:01<00:06,  2.36trial/s, best loss: -0.6784722222222223]

  0%|          | 0/100 [00:00<?, ?it/s]

 25%|██▌       | 5/20 [00:02<00:06,  2.35trial/s, best loss: -0.6784722222222223]

  0%|          | 0/100 [00:00<?, ?it/s]

 30%|███       | 6/20 [00:02<00:06,  2.31trial/s, best loss: -0.6784722222222223]

  0%|          | 0/100 [00:00<?, ?it/s]

 35%|███▌      | 7/20 [00:03<00:05,  2.33trial/s, best loss: -0.6784722222222223]

  0%|          | 0/100 [00:00<?, ?it/s]

 40%|████      | 8/20 [00:03<00:05,  2.33trial/s, best loss: -0.6837063492063493]

  0%|          | 0/100 [00:00<?, ?it/s]

 45%|████▌     | 9/20 [00:03<00:04,  2.29trial/s, best loss: -0.6837063492063493]

  0%|          | 0/100 [00:00<?, ?it/s]

 50%|█████     | 10/20 [00:04<00:04,  2.31trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 55%|█████▌    | 11/20 [00:04<00:03,  2.28trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 60%|██████    | 12/20 [00:05<00:03,  2.24trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 65%|██████▌   | 13/20 [00:05<00:03,  2.21trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 70%|███████   | 14/20 [00:06<00:02,  2.22trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 75%|███████▌  | 15/20 [00:06<00:02,  2.21trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 80%|████████  | 16/20 [00:07<00:01,  2.16trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 85%|████████▌ | 17/20 [00:07<00:01,  2.23trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 90%|█████████ | 18/20 [00:07<00:00,  2.28trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

 95%|█████████▌| 19/20 [00:08<00:00,  2.27trial/s, best loss: -0.6866944444444445]

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:08<00:00,  2.26trial/s, best loss: -0.6866944444444445]
Best parameters found:  {'cook_time': np.float64(86.54318555259674), 'cooking_method': np.float64(0.039267515269042486), 'cuisine': np.float64(0.14584725698741952), 'instructions': np.float64(2.235829124458284), 'main_ingredient': np.float64(0.7923264133676281), 'prep_time': np.float64(97.16195429708318), 'recipe_name': np.float64(2.290315090834727), 'type_of_dish': np.float64(1.2413235584483393)}


In [36]:
def minsearch_improved(query):
    boost = {
        'recipe_name': 2.290315090834727,
        'type_of_dish': 1.2413235584483393,
        'main_ingredient': 0.7923264133676281,
        'cuisine': 0.14584725698741952,
        'cooking_method': 0.039267515269042486,
        'prep_time': 97.16195429708318,
        'cook_time': 86.54318555259674,
        'instructions': 2.235829124458284
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

# Assuming you have a ground truth set and the evaluation function
evaluate(ground_truth, lambda q: minsearch_improved(q['question']))

  0%|          | 0/910 [00:00<?, ?it/s]

{'hit_rate': 0.8681318681318682, 'mrr': 0.7364242107099256}

### RAG Evaluation

In [37]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [38]:
len(ground_truth)

910

In [39]:
record = ground_truth[0]
question = record['question']
answer_llm = rag(question)

In [40]:
print(answer_llm)

After cooking the onions, carrots, and celery, the next steps will depend on the specific recipe you are following, as the provided context does not include a recipe that specifically instructs on these ingredients being cooked together. However, if you are following a recipe that includes these ingredients (often referred to as a mirepoix), typically you would proceed by adding the next ingredient or liquid as specified in your recipe, such as broth or another main ingredient, to continue cooking the dish.


In [41]:
prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What should I do after cooking the onions, carrots, and celery?
Generated Answer: After cooking the onions, carrots, and celery, the next steps will depend on the specific recipe you are following, as the provided context does not include a recipe that specifically instructs on these ingredients being cooked together. However, if you are following a recipe that includes these ingredients (often referred to as a mirepoix), typically you would proceed by adding the next ingredient or liquid as specified in your recipe, such as broth or another main ingredient, to continue cooking the dish.

Please analyze the content and context of the generated answer in relation to the question
and provide you

In [42]:
import json

df_sample = df_question.sample(n=10, random_state=1)
sample = df_sample.to_dict(orient='records')
sample

[{'id': 38,
  'question': 'How long do I need to bake the stuffed bell peppers?'},
 {'id': 248,
  'question': 'What cooking method is used for making the Lemon Bars?'},
 {'id': 22,
  'question': 'How long does it take to prepare the Sweet and Sour Pork before cooking?'},
 {'id': 52,
  'question': 'What cooking method is used for preparing the Chicken Shawarma?'},
 {'id': 180, 'question': 'What should I serve with the Thai Green Curry?'},
 {'id': 236,
  'question': 'What is the main ingredient used in the Beef and Ale Pie recipe?'},
 {'id': 115,
  'question': 'What type of cheese do I need to use for this Margherita Pizza recipe?'},
 {'id': 45,
  'question': 'What filling ingredients are included in the beef mixture for the empanadas?'},
 {'id': 248, 'question': 'What should I do before serving the Lemon Bars?'},
 {'id': 41,
  'question': 'What is the main cuisine type for this Beef Kebabs recipe?'}]

In [46]:
evaluations = []

for record in tqdm(ground_truth):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/910 [00:00<?, ?it/s]

In [49]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [50]:
df_eval

Unnamed: 0,answer,id,question,relevance,explanation
0,"After cooking the onions, carrots, and celery,...",0,"What should I do after cooking the onions, car...",PARTLY_RELEVANT,The generated answer addresses the question by...
1,The context provided does not specify a simmer...,0,How long should I simmer the mixture after add...,PARTLY_RELEVANT,The generated answer addresses the question by...
2,"To season the Bolognese sauce, you need salt a...",0,What ingredients do I need to season the Bolog...,RELEVANT,The generated answer provides specific ingredi...
3,To prepare the ingredients for the Chicken and...,0,How long does it take to prepare the ingredien...,RELEVANT,The generated answer directly addresses the qu...
4,The context provided does not specify which ty...,0,What type of pasta is recommended to serve wit...,NON_RELEVANT,The generated answer does not address the ques...
...,...,...,...,...,...
905,The main ingredient used in the Chicken Tortil...,257,What is the main ingredient used in the Chicke...,RELEVANT,The generated answer directly addresses the qu...
906,"To prepare the Chicken Tortilla Soup, it takes...",257,How long does it take to prepare the Chicken T...,RELEVANT,The generated answer directly addresses the qu...
907,The cooking method used for making the Chicken...,257,What cooking method is used for making the Chi...,RELEVANT,The generated answer directly addresses the qu...
908,The suggested garnishes for the Chicken Tortil...,257,What garnishes are suggested for the Chicken T...,RELEVANT,The generated answer directly addresses the qu...


In [51]:
df_eval.relevance.value_counts()

relevance
RELEVANT           807
PARTLY_RELEVANT     91
NON_RELEVANT        12
Name: count, dtype: int64

In [52]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.886813
PARTLY_RELEVANT    0.100000
NON_RELEVANT       0.013187
Name: proportion, dtype: float64

In [53]:
df_eval.to_csv('../data/rag-eval-gpt-4o-mini.csv', index=False)

In [54]:
df_eval[df_eval.relevance == 'NON_RELEVANT']

Unnamed: 0,answer,id,question,relevance,explanation
4,The context provided does not specify which ty...,0,What type of pasta is recommended to serve wit...,NON_RELEVANT,The generated answer does not address the ques...
47,The cooking method used to make the sushi roll...,9,What cooking method is used to make the sushi ...,NON_RELEVANT,The generated answer incorrectly identifies 'R...
269,The Macaroni and Cheese should be baked until ...,53,At what temperature should the Macaroni and Ch...,NON_RELEVANT,The generated answer does not provide the spec...
362,The cooking method used for cooking the crepes...,73,What cooking method is used for cooking the cr...,NON_RELEVANT,The generated answer incorrectly states that c...
410,The context does not specify a specific durati...,84,How long should I marinate the salmon in teriy...,NON_RELEVANT,The generated answer does not provide any spec...
436,The provided recipes do not include specific i...,89,"How long should I sauté the onions, garlic, an...",NON_RELEVANT,The generated answer fails to address the ques...
527,The cooking method used for the Greek Salad is...,117,What cooking method is used for the Greek Salad?,NON_RELEVANT,The question asks about the cooking method use...
561,The CONTEXT does not provide specific informat...,129,How long should I bake the French fries to mak...,NON_RELEVANT,The generated answer does not provide any info...
623,The CONTEXT provided does not specify any ingr...,167,What ingredients are mixed with ricotta cheese...,NON_RELEVANT,The generated answer does not provide any rele...
629,The context provided does not specify a partic...,168,Is there a specific type of olive oil that you...,NON_RELEVANT,The generated answer does not address the spec...
