In [None]:
import json
with open ("../data/baking_cleaned.json", "r") as f:
    baking_recipes = json.load(f)

In [44]:
from openai import OpenAI
import pandas as pd
from api_key import openAI_api_key 

client = OpenAI(api_key = openAI_api_key)

In [60]:
prompt_temp = """
You are given a structured recipe record in JSON format.

Your task: Generate 5 natural, user-style search questions that someone might ask in a cooking app when looking for this recipe.

Guidelines:
- Questions should sound like something a home cook or food enthusiast would type or say.
- Avoid quoting exact instructions or step-by-step details.
- Include 1–2 distinctive features from the recipe to make it identifiable among similar recipes. Distinctive features can include:
-- Specific ingredients or combination of ingredients
-- Cooking/prep/total time
-- Serving size or number of portions
-- Nutritional highlights (calories, protein, etc.)
-- Difficulty or skill level
-- Rating
-- Description or unique traits (flavor, texture, style)
- Make questions practical and search-friendly (but do not use recipes ID, since question should come for a normal user).
-The goal is to make questions realistic and helpful for finding this recipe in a search system.

Now generate 5 unique questions for the following recipe:  

  "id"                :   {id},
  "name"              :   {name},
  "description"       :   {description},
  "dish_type"         :   {dish_type},
  "difficult"         :   {difficult},
  "ingredients"       :   {ingredients},
  "preparation_min"   :   {prep_mins}, 
  "cooking_min"       :   {cook_mins}, 
  "total_cooking_min" :   {total_mins},
  "kcal"              :   {kcal}, 
  "fat"               :   {fat}, 
  "saturated fat"     :   {saturates}, 
  "carbohydrates"     :   {carbs}, 
  "sugars"            :   {sugars}, 
  "fibre"             :   {fibre}, 
  "protein"           :   {protein}, 
  "salt"              :   {salt},
  "ratting"           :   {rattings}

Return your output in JSON format (but not in a code block) as:  
{{
  "id": "<recipe id>",
  "questions": [
    "question 1",
    "question 2",
    "question 3",
    "question 4",
    "question 5"
  ]

}}
"""

In [61]:
def llm_prompt(prompt):
    response = client.chat.completions.create(
            model='gpt-5-mini',
            messages=[{"role": "user", "content": prompt}]
        )
    
    result = response.choices[0].message.content # extract the results message out
    result_js = json.loads(result) # converted into json format
    return result_js


In [62]:
format_error = [] # error in reformat prompt template
prompt_unsuccess = [] # error in llm_prompt
results = [] # results storage

import time
from tqdm.auto import tqdm

for recipe in tqdm(baking_recipes):
    
    id_ = recipe["id"]
    try:
        # reformat promt with the template
        prompt = prompt_temp.format(**recipe)
        try:
            result_js = llm_prompt(prompt)
            results.append(result_js)
        except:
            prompt_unsuccess.append(prompt)
            print(f"LLM API error : {id_}")
    except:
        format_error.append(recipe)
        print(f"prompt format error : {id_}")

    time.sleep(0.2)


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 614/614 [2:17:00<00:00, 13.39s/it]  


In [68]:
# convert result json into dataframe for storage
df = pd.DataFrame(results) 
df_expand = df.explode("questions") # expand the questions column
df_expand.head(10)

Unnamed: 0,id,questions
0,16a94310-cea8-435f-90e8-10f8b02b7bfe,Easy white bread rolls for sandwiches or burge...
0,16a94310-cea8-435f-90e8-10f8b02b7bfe,Simple homemade rolls using strong white bread...
0,16a94310-cea8-435f-90e8-10f8b02b7bfe,Beginner-friendly bread rolls (easy) made with...
0,16a94310-cea8-435f-90e8-10f8b02b7bfe,Quick roll recipe with about 246 kcal per serv...
0,16a94310-cea8-435f-90e8-10f8b02b7bfe,Where can I find a simple soft roll recipe usi...
1,5edaf257-7a3b-4878-bf3c-99922f319e7a,"Easy pizza rolls made with crusty bread rolls,..."
1,5edaf257-7a3b-4878-bf3c-99922f319e7a,Quick snack for kids: pizza-style rolls using ...
1,5edaf257-7a3b-4878-bf3c-99922f319e7a,Can I make these pizza rolls with black olives...
1,5edaf257-7a3b-4878-bf3c-99922f319e7a,"Simple, kid-friendly bread roll pizza that's o..."
1,5edaf257-7a3b-4878-bf3c-99922f319e7a,Crowd-friendly pizza roll recipe that can be d...


In [67]:
df_expand.to_csv("../data/ground_truth_retrieval.csv") # export into csv