In [1]:

import json
import os
from dotenv import load_dotenv
from openai import OpenAI
import time
from typing import List, Dict

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
openai = OpenAI(api_key = api_key)
# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [2]:
# Test if it works
try:
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",  # Using cheaper model for testing
        messages=[{"role": "user", "content": "Say hello"}],
        max_tokens=10
    )
    print("✅ OpenAI connected successfully!")
    print(f"Response: {response.choices[0].message.content}")
except Exception as e:
    print(f"❌ Error: {e}")

✅ OpenAI connected successfully!
Response: Hello! How can I assist you today?


In [3]:
# This is the structure we want Qwen to learn
example_state = {
    "name_or_description": "",
    "include": [],              # Must include ALL of these
    "include_groups": [],       # Must include ANY group (OR logic)
    "exclude": [],              # Must NOT include these
    "tags": [],                 # Must have ALL tags
    "tags_or": [],              # Must have ANY group (OR logic)
    "exclude_tags": [],         # Must NOT have these tags
    "count": 5                  # Number of results
}

print("Our JSON structure:")
print(json.dumps(example_state, indent=2))

Our JSON structure:
{
  "name_or_description": "",
  "include": [],
  "include_groups": [],
  "exclude": [],
  "tags": [],
  "tags_or": [],
  "exclude_tags": [],
  "count": 5
}


In [4]:
def create_prompt_for_tag(tag: str) -> str:
    """
    Create a detailed prompt for OpenAI to generate a conversation
    """
    prompt = f"""Generate a realistic multi-turn conversation about searching for '{tag}' recipes.

IMPORTANT: Return ONLY valid JSON, no other text.

The conversation should have 3-5 turns showing how a user refines their search.
Each turn should build on the previous state.

Use this exact JSON structure:
{{
    "tag": "{tag}",
    "turns": [
        {{
            "user_says": "what the user types",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": [],
                "tags": [],
                "tags_or": [],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "what changed and why"
        }}
    ]
}}

Rules for the conversation:
1. Start simple (usually just asking for the {tag})
2. Add constraints progressively
3. Use natural language patterns like:
   - "vegetarian or vegan" → tags_or: [["vegetarian", "vegan"]]
   - "with chicken and tomato or beef and tomato" → include_groups: [["chicken", "tomato"], ["beef", "tomato"]]
   - "no nuts" → exclude: ["nuts"]
   - "show me 10" → count: 10

Example phrases users might say:
- "I want {tag} recipes"
- "make it quick" 
- "either X or Y"
- "no dairy, I'm lactose intolerant"
- "something healthy"
- "for dinner"
"""
    
    return prompt

# Test the prompt
test_prompt = create_prompt_for_tag("vegetarian")
print("Sample prompt for 'vegetarian':")
print(test_prompt[:500] + "...")  # Show first 500 chars

Sample prompt for 'vegetarian':
Generate a realistic multi-turn conversation about searching for 'vegetarian' recipes.

IMPORTANT: Return ONLY valid JSON, no other text.

The conversation should have 3-5 turns showing how a user refines their search.
Each turn should build on the previous state.

Use this exact JSON structure:
{
    "tag": "vegetarian",
    "turns": [
        {
            "user_says": "what the user types",
            "state_after": {
                "name_or_description": "",
                "include": [],
...


In [5]:
def generate_conversation_with_openai(tag: str, model: str = "gpt-3.5-turbo", temperature: float = 0.8) -> Dict:
    """
    Call OpenAI to generate one conversation
    """
    prompt = create_prompt_for_tag(tag)
    
    try:
        # Call OpenAI (using openai directly)
        response = openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that generates training data in JSON format."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,  # Now configurable
            response_format={"type": "json_object"}  # Force JSON output
        )
        
        # Parse the response
        conversation_data = json.loads(response.choices[0].message.content)
        
        print(f"✅ Generated conversation for '{tag}' (temp={temperature})")
        return conversation_data
        
    except Exception as e:
        print(f"❌ Error generating conversation for '{tag}': {e}")
        return None

# Test with different temperatures
temperatures = [0.2, 0.4, 0.6, 0.8]
test_conversations = {}

for temp in temperatures:
    print(f"\n--- Testing with temperature {temp} ---")
    conversation = generate_conversation_with_openai("breakfast", temperature=temp)
    
    if conversation:
        test_conversations[temp] = conversation
        # Show first user message to see variety
        first_user_msg = conversation["turns"][0]["user_says"]
        print(f"First user says: '{first_user_msg}'")


--- Testing with temperature 0.2 ---
✅ Generated conversation for 'breakfast' (temp=0.2)
First user says: 'I want breakfast recipes'

--- Testing with temperature 0.4 ---
✅ Generated conversation for 'breakfast' (temp=0.4)
First user says: 'I want breakfast recipes'

--- Testing with temperature 0.6 ---
✅ Generated conversation for 'breakfast' (temp=0.6)
First user says: 'I want breakfast recipes'

--- Testing with temperature 0.8 ---
✅ Generated conversation for 'breakfast' (temp=0.8)
First user says: 'I want breakfast recipes'


In [6]:
# Compare the conversations
print("\n=== COMPARING TEMPERATURE EFFECTS ===")
for temp, conv in test_conversations.items():
    print(f"\nTemperature {temp}:")
    print(f"  Number of turns: {len(conv['turns'])}")
    print(f"  First message: {conv['turns'][0]['user_says']}")
    if len(conv['turns']) > 1:
        print(f"  Second message: {conv['turns'][1]['user_says']}")


=== COMPARING TEMPERATURE EFFECTS ===

Temperature 0.2:
  Number of turns: 3
  First message: I want breakfast recipes
  Second message: make it quick

Temperature 0.4:
  Number of turns: 3
  First message: I want breakfast recipes
  Second message: make it quick

Temperature 0.6:
  Number of turns: 3
  First message: I want breakfast recipes
  Second message: make it quick

Temperature 0.8:
  Number of turns: 3
  First message: I want breakfast recipes
  Second message: make it quick


In [7]:
# Let's examine the full conversations to see where differences appear
print("=== DETAILED COMPARISON ===\n")

for temp, conv in test_conversations.items():
    print(f"Temperature {temp}:")
    print("-" * 40)
    
    for i, turn in enumerate(conv['turns']):
        print(f"Turn {i+1}: {turn['user_says']}")
        
        # Check what tags were added
        state = turn['state_after']
        if state['tags']:
            print(f"  Tags: {state['tags']}")
        if state['include']:
            print(f"  Include: {state['include']}")
        if state['tags_or']:
            print(f"  Tags OR: {state['tags_or']}")
    
    print()

=== DETAILED COMPARISON ===

Temperature 0.2:
----------------------------------------
Turn 1: I want breakfast recipes
Turn 2: make it quick
Turn 3: something healthy
  Tags: ['healthy']

Temperature 0.4:
----------------------------------------
Turn 1: I want breakfast recipes
Turn 2: make it quick
Turn 3: something healthy
  Tags: ['healthy']

Temperature 0.6:
----------------------------------------
Turn 1: I want breakfast recipes
Turn 2: make it quick
Turn 3: something healthy
  Tags: ['healthy']

Temperature 0.8:
----------------------------------------
Turn 1: I want breakfast recipes
Turn 2: make it quick
Turn 3: something healthy
  Tags: ['healthy']



In [8]:
VALID_TAGS = [ '1-day-or-more', '15-minutes-or-less', '3-steps-or-less', '30-minutes-or-less', '4-hours-or-less', '5-ingredients-or-less', '60-minutes-or-less', 'a1-sauce', 'african', 'american', 'amish-mennonite', 'angolan', 'appetizers', 'apples', 'april-fools-day', 'argentine', 'artichoke', 'asian', 'asparagus', 'australian', 'austrian', 'avocado', 'bacon', 'baja', 'baking', 'bananas', 'bar-cookies', 'barbecue', 'bass', 'bath-beauty', 'bean-soup', 'beans', 'beans-soups', 'bear', 'beef', 'beef-liver', 'beef-organ-meats', 'beef-ribs', 'beef-sauces', 'beef-sausage', 'beginner-cook', 'beijing', 'belgian', 'berries', 'beverages', 'birthday', 'biscotti', 'bisques-cream-soups', 'bizarre', 'black-beans', 'blueberries', 'bok-choys', 'brazilian', 'bread-machine', 'bread-pudding', 'breads', 'breakfast', 'breakfast-eggs', 'brewing', 'brisket', 'british-columbian', 'broccoli', 'broil', 'brown-bag', 'brown-rice', 'brownies', 'brunch', 'burgers', 'cabbage', 'cajun', 'cake-fillings-and-frostings', 'cakes', 'californian', 'cambodian', 'camping', 'canadian', 'candy', 'canning', 'cantonese', 'caribbean', 'carrots', 'casseroles', 'casseroles-one-dish-meal', 'catfish', 'cauliflower',  'central-american', 'chard', 'cheese', 'cheesecake', 'cherries', 'chick-peas-garbanzos', 'chicken', 'chicken-breasts', 'chicken-livers', 'chicken-thighs-legs', 'chilean', 'chili', 'chinese', 'chinese-new-year', 'chocolate', 'chocolate-chip-cookies', 'chowders', 'christmas', 'chutneys', 'cinco-de-mayo', 'citrus', 'clams', 'clear-soups', 'cobblers-and-crisps', 'cocktails', 'coconut', 'cod', 'coffee-cakes', 'collard-greens', 'college', 'colombian', 'comfort-food', 'condiments-etc', 'congolese', 'cookies-and-brownies', 'cookies-and-brownies-nuts', 'cooking-mixes', 'copycat', 'corn', 'costa-rican', 'crab', 'crawfish', 'creole', 'crock-pot-main-dish', 'crock-pot-slow-cooker', 'crusts-pastry-dough-2', 'cuban', 'cuisine', 'cupcakes', 'curries', 'czech', 'dairy-free', 'danish', 'deep-fry', 'deer', 'dehydrator', 'desserts', 'diabetic', 'dinner-party', 'dips', 'dips-lunch-snacks', 'drop-cookies', 'duck', 'duck-breasts', 'dutch', 'easter', 'easy', 'ecuadorean', 'egg-free', 'eggplant', 'eggs', 'eggs-breakfast', 'eggs-dairy', 'egyptian', 'elbow-macaroni', 'elk', 'english',  'ethiopian', 'european', 'fall', 'fathers-day', 'filipino', 'fillings-and-frostings-chocolate', 'fillings-and-frostings-fruit', 'finger-food', 'finnish', 'fish', 'flat-shapes', 'food-processor-blender', 'for-1-or-2', 'for-large-groups', 'fourth-of-july', 'freezer', 'french', 'freshwater-fish', 'from-scratch', 'frozen-desserts', 'fruit', 'fudge', 'garnishes', 'gelatin', 'gelatin-fruit', 'georgian', 'german', 'gifts', 'gluten-free', 'goose', 'grains', 'granola-and-porridge', 'grapes', 'greek', 'green-yellow-beans', 'greens', 'grilling', 'ground-beef', 'guatemalan', 'gumbo', 'halibut', 'halloween', 'halloween-cakes', 'halloween-cocktails', 'halloween-cupcakes', 'ham', 'hand-formed-cookies', 'hanukkah', 'hawaiian', 'healthy', 'healthy-2', 'heirloom-historical', 'heirloom-historical-recipes', 'herb-and-spice-mixes', 'hidden-valley-ranch', 'high-calcium', 'high-fiber', 'high-protein', 'holiday-event', 'homeopathy-remedies', 'honduran', 'household-cleansers', 'hunan', 'hungarian', 'ice-cream', 'icelandic', 'independence-day', 'indian', 'indonesian', 'inexpensive', 'infant-baby-friendly', 'iranian-persian', 'iraqi', 'irish', 'irish-st-patricks-day', 'italian', 'jams-and-preserves', 'japanese', 'jellies', 'jewish-ashkenazi', 'jewish-sephardi', 'kid-friendly', 'kiwifruit', 'korean', 'kosher', 'kwanzaa', 'labor-day', 'lactose', 'lamb-sheep', 'laotian', 'lasagna', 'lebanese', 'leftovers', 'lemon', 'lemon-cake', 'lemon-desserts', 'lentils', 'lettuces', 'libyan', 'lime', 'lobster', 'long-grain-rice', 'low-calorie', 'low-carb', 'low-cholesterol', 'low-fat', 'low-protein', 'low-saturated-fat', 'low-sodium', 'lunch', 'macaroni-and-cheese', 'mahi-mahi', 'main-dish', 'main-dish-beef', 'main-dish-chicken', 'main-dish-crock-pot',  'malaysian', 'mango', 'manicotti', 'mardi-gras-carnival', 'margarita', 'marinades-and-rubs', 'mashed-potatoes', 'meat', 'meatballs', 'meatloaf', 'medium-grain-rice', 'melons', 'memorial-day', 'mexican', 'micro-melanesia', 'microwave', 'middle-eastern', 'midwestern', 'mixer', 'mongolian', 'moose', 'moroccan', 'mothers-day', 'muffins', 'mushrooms', 'mussels', 'namibian', 'native-american', 'navy-bean-soup', 'nepalese', 'new-years', 'new-zealand', 'nigerian', 'no-cook', 'no-shell-fish', 'non-alcoholic', 'non-food-products', 'north-american', 'northeastern-united-states', 'norwegian', 'novelty', 'nut-free', 'nuts', 'oamc-freezer-make-ahead', 'oatmeal', 'oaxacan', 'octopus', 'omelets-and-frittatas', 'one-dish-meal', 'onions', 'ontario', 'orange-roughy', 'oranges', 'oven', 'oysters', 'pacific-northwest', 'pakistani', 'palestinian', 'pancakes-and-waffles', 'papaya', 'passover', 'pasta', 'pasta-elbow-macaroni', 'pasta-rice-and-grains', 'pasta-shells', 'peaches', 'peanut-butter', 'pears', 'penne', 'pennsylvania-dutch', 'peppers', 'perch', 'peruvian', 'pet-food', 'pheasant', 'pickeral', 'picnic', 'pies', 'pies-and-tarts', 'pineapple', 'pitted-fruit', 'pizza', 'plums', 'polish', 'polynesian', 'pork', 'pork-chops', 'pork-loins', 'pork-ribs', 'pork-sausage', 'portuguese', 'pot-pie', 'pot-roast', 'potato-soup', 'potatoes', 'potluck', 'poultry',  'prepared-potatoes', 'pressure-canning', 'pressure-cooker', 'puddings-and-mousses', 'puerto-rican', 'pumpkin', 'pumpkin-bread', 'punch', 'quail', 'quebec', 'queso-for-all', 'quiche', 'quick-breads', 'rabbit', 'ragu-recipe-contest', 'ramadan', 'raspberries', 'ravioli-tortellini', 'refrigerator', 'reynolds-wrap', 'rice', 'roast', 'roast-beef', 'roast-beef-comfort-food', 'rolled-cookies', 'rolls-biscuits', 'romantic', 'rosh-hashana', 'rosh-hashanah', 'russian', 'salad-dressings', 'salads', 'salmon', 'salsas', 'saltwater-fish', 'sandwiches', 'sauces', 'saudi-arabian', 'savory', 'savory-pies', 'savory-sauces', 'scallops', 'scandinavian', 'scones', 'scottish', 'seafood', 'seasonal', 'served-cold', 'served-hot', 'shakes', 'shellfish', 'short-grain-rice', 'shrimp', 'side-dishes', 'simply-potatoes', 'simply-potatoes2', 'small-appliance', 'smoker', 'smoothies', 'snacks', 'sole-and-flounder', 'somalian', 'soul', 'soups-stews', 'sourdough', 'south-african', 'south-american', 'south-west-pacific', 'southern-united-states', 'southwestern-united-states', 'soy-tofu', 'spaghetti', 'spaghetti-sauce', 'spanish', 'spicy', 'spinach', 'spreads', 'spreads-fruit', 'spring', 'squash', 'squid', 'st-patricks-day', 'steak', 'steaks', 'steam', 'stews', 'stir-fry', 'stocks', 'stove-top', 'strawberries', 'stuffings-dressings', 'sudanese', 'sugar-cookies', 'summer', 'super-bowl', 'superbowl', 'swedish', 'sweet', 'sweet-sauces', 'swiss', 'szechuan', 'tarts', 'tempeh', 'tex-mex', 'thai', 'thanksgiving', 'tilapia', 'toddler-friendly', 'tomatoes', 'tropical-fruit', 'trout', 'tuna', 'turkey', 'turkey-breasts', 'turkey-burgers', 'turkish', 'unprocessed-freezer', 'valentines-day', 'veal', 'vegan', 'vegetables', 'vegetarian', 'veggie-burgers', 'venezuelan', 'very-low-carbs', 'vietnamese', 'water-bath', 'wedding', 'weeknight', 'welsh', 'white-rice', 'whitefish', 'whole-chicken', 'whole-duck', 'whole-turkey', 'wild-game', 'wings', 'winter', 'yams-sweet-potatoes', 'yeast', 'zucchini'] 

In [9]:
def create_prompt_for_tag(tag: str) -> str:
    """
    Create a detailed prompt with VALID TAGS ONLY
    """
    # Convert list to string for prompt
    tags_list_str = ", ".join(VALID_TAGS)
    
    prompt = f"""Generate a realistic multi-turn conversation about searching for '{tag}' recipes.

CRITICAL: ONLY use tags from this list. DO NOT create new tags:
{tags_list_str}

IMPORTANT: Return ONLY valid JSON, no other text.

When user says things like:
- "quick" → use "30-minutes-or-less" or "15-minutes-or-less" (NOT "quick")
- "fast" → use "15-minutes-or-less" (NOT "fast")
- "healthy" → use "healthy" (this IS in our list)
- "spicy" → DO NOT add any tag (we don't have a "spicy" tag)
- "Mexican food" → use "mexican" (lowercase, as in our list)

The conversation should start with '{tag}' and build from there using ONLY valid tags.

Return this exact JSON structure:
{{
    "tag": "{tag}",
    "turns": [
        {{
            "user_says": "what the user types",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": [],
                "tags": [],  // ONLY tags from the valid list!
                "tags_or": [],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "what changed and why"
        }}
    ]
}}

Example conversation for '{tag}':
Turn 1: "I want {tag} recipes" → tags: ["{tag}"]  
Turn 2: Build on this with ONLY valid tags from our list
"""
    
    return prompt

In [10]:
# Test with specific prompt
test_conversation = generate_conversation_with_openai("breakfast", temperature=0.6)

if test_conversation:
    print("\nChecking if tags are valid:")
    for i, turn in enumerate(test_conversation['turns']):
        state_tags = turn['state_after']['tags']
        print(f"\nTurn {i+1}: {turn['user_says']}")
        print(f"Tags used: {state_tags}")
        
        # Check if all tags are valid
        invalid_tags = [t for t in state_tags if t not in VALID_TAGS]
        if invalid_tags:
            print(f"❌ INVALID TAGS FOUND: {invalid_tags}")
        else:
            print("✅ All tags are valid!")

✅ Generated conversation for 'breakfast' (temp=0.6)

Checking if tags are valid:

Turn 1: I'm looking for some quick breakfast recipes.
Tags used: ['breakfast', '30-minutes-or-less']
✅ All tags are valid!


In [11]:
# Let's see the complete conversation progression
print("\n=== FULL CONVERSATION ANALYSIS ===")
print(f"Starting tag: '{test_conversation['tag']}'")
print("-" * 50)

for i, turn in enumerate(test_conversation['turns']):
    print(f"\nTurn {i+1}:")
    print(f"User: \"{turn['user_says']}\"")
    
    state = turn['state_after']
    print(f"State changes:")
    
    # Show what's in the state
    if state['tags']:
        print(f"  Tags: {state['tags']}")
    if state['include']:
        print(f"  Include: {state['include']}")
    if state['exclude']:
        print(f"  Exclude: {state['exclude']}")
    if state['tags_or']:
        print(f"  Tags OR: {state['tags_or']}")
    
    print(f"Explanation: {turn['explanation']}")


=== FULL CONVERSATION ANALYSIS ===
Starting tag: 'breakfast'
--------------------------------------------------

Turn 1:
User: "I'm looking for some quick breakfast recipes."
State changes:
  Tags: ['breakfast', '30-minutes-or-less']
Explanation: Added '30-minutes-or-less' tag based on user request for quick recipes.


In [12]:
# Let's see the raw response structure
print("Raw conversation data:")
print(json.dumps(test_conversation, indent=2))

# Check how many turns we got
num_turns = len(test_conversation['turns'])
print(f"\nNumber of turns generated: {num_turns}")
print("Expected: 3-4 turns")

Raw conversation data:
{
  "tag": "breakfast",
  "turns": [
    {
      "user_says": "I'm looking for some quick breakfast recipes.",
      "state_after": {
        "name_or_description": "",
        "include": [],
        "include_groups": [],
        "exclude": [],
        "tags": [
          "breakfast",
          "30-minutes-or-less"
        ],
        "tags_or": [],
        "exclude_tags": [],
        "count": 5
      },
      "explanation": "Added '30-minutes-or-less' tag based on user request for quick recipes."
    }
  ]
}

Number of turns generated: 1
Expected: 3-4 turns


In [13]:
def create_prompt_for_tag(tag: str) -> str:
    """
    Create a detailed prompt with VALID TAGS ONLY
    """
    # Convert list to string for prompt
    tags_list_str = ", ".join(VALID_TAGS)
    
    prompt = f"""Generate a realistic MULTI-TURN conversation about searching for '{tag}' recipes.

CRITICAL: ONLY use tags from this list. DO NOT create new tags:
{tags_list_str}

IMPORTANT: 
1. Return ONLY valid JSON
2. Generate EXACTLY 3-4 turns (not just 1!)
3. Each turn builds on the previous state

Return this structure with MULTIPLE turns:
{{
    "tag": "{tag}",
    "turns": [
        {{  // Turn 1
            "user_says": "I want {tag} recipes",
            "state_after": {{"tags": ["{tag}"], "include": [], "exclude": [], "count": 5}},
            "explanation": "Started with {tag} tag"
        }},
        {{  // Turn 2 - USER REFINES THE SEARCH
            "user_says": "make it quick",
            "state_after": {{"tags": ["{tag}", "30-minutes-or-less"], "include": [], "exclude": [], "count": 5}},
            "explanation": "Added time constraint"
        }},
        {{  // Turn 3 - USER ADDS MORE CONSTRAINTS
            "user_says": "and healthy",
            "state_after": {{"tags": ["{tag}", "30-minutes-or-less", "healthy"], "include": [], "exclude": [], "count": 5}},
            "explanation": "Added healthy requirement"
        }}
    ]
}}

Create a conversation that shows:
- Turn 1: Initial request for '{tag}'
- Turn 2: Add a constraint (time, diet, etc.)
- Turn 3: Add another constraint or ingredient
- Turn 4 (optional): Further refinement
"""
    
    return prompt

# Test again
print("Testing with more explicit multi-turn prompt...")
test_conversation = generate_conversation_with_openai("breakfast", temperature=0.6)

Testing with more explicit multi-turn prompt...
✅ Generated conversation for 'breakfast' (temp=0.6)


In [15]:
# Check the new conversation with safe key access
print("\n=== FULL CONVERSATION ANALYSIS ===")
print(f"Starting tag: '{test_conversation['tag']}'")
print("-" * 50)

# First, check number of turns
num_turns = len(test_conversation['turns'])
print(f"Number of turns: {num_turns} ✅")
print("-" * 50)

for i, turn in enumerate(test_conversation['turns']):
    print(f"\nTurn {i+1}:")
    print(f"User: \"{turn['user_says']}\"")
    
    state = turn['state_after']
    print(f"State changes:")
    
    # Show what's in the state (using .get() to avoid KeyError)
    if state.get('tags'):
        print(f"  Tags: {state['tags']}")
    if state.get('include'):
        print(f"  Include: {state['include']}")
    if state.get('exclude'):
        print(f"  Exclude: {state['exclude']}")
    if state.get('tags_or'):
        print(f"  Tags OR: {state['tags_or']}")
    if state.get('include_groups'):
        print(f"  Include Groups: {state['include_groups']}")
    if state.get('exclude_tags'):
        print(f"  Exclude Tags: {state['exclude_tags']}")
    
    print(f"Explanation: {turn.get('explanation', 'No explanation provided')}")


=== FULL CONVERSATION ANALYSIS ===
Starting tag: 'breakfast'
--------------------------------------------------
Number of turns: 4 ✅
--------------------------------------------------

Turn 1:
User: "I want breakfast recipes"
State changes:
  Tags: ['breakfast']
Explanation: Started with breakfast tag

Turn 2:
User: "make it quick"
State changes:
  Tags: ['breakfast', '30-minutes-or-less']
Explanation: Added time constraint

Turn 3:
User: "and healthy"
State changes:
  Tags: ['breakfast', '30-minutes-or-less', 'healthy']
Explanation: Added healthy requirement

Turn 4:
User: "with fruits"
State changes:
  Tags: ['breakfast', '30-minutes-or-less', 'healthy', 'fruit']
Explanation: Added fruits as an ingredient


In [16]:
# Debug: Check what keys are in the first state
print("\n=== CHECKING STATE STRUCTURE ===")
first_state = test_conversation['turns'][0]['state_after']
print("Keys in state:", list(first_state.keys()))
print("\nFull first state:")
print(json.dumps(first_state, indent=2))


=== CHECKING STATE STRUCTURE ===
Keys in state: ['tags', 'include', 'exclude', 'count']

Full first state:
{
  "tags": [
    "breakfast"
  ],
  "include": [],
  "exclude": [],
  "count": 5
}


In [17]:
def create_prompt_for_tag(tag: str) -> str:
    """
    Create a detailed prompt with complete state structure
    """
    # Convert list to string for prompt
    tags_list_str = ", ".join(VALID_TAGS)
    
    # Define the COMPLETE state structure
    empty_state = {
        "name_or_description": "",
        "include": [],
        "include_groups": [],
        "exclude": [],
        "tags": [],
        "tags_or": [],
        "exclude_tags": [],
        "count": 5
    }
    
    prompt = f"""Generate a realistic MULTI-TURN conversation about searching for '{tag}' recipes.

CRITICAL: ONLY use tags from this list:
{tags_list_str}

EVERY state_after MUST have ALL these fields (even if empty):
{json.dumps(empty_state, indent=2)}

Generate EXACTLY 3-4 turns. Return ONLY this JSON:
{{
    "tag": "{tag}",
    "turns": [
        {{
            "user_says": "user input here",
            "state_after": {json.dumps(empty_state)},
            "explanation": "what changed"
        }}
    ]
}}

Example conversation flow:
Turn 1: "I want {tag} recipes" → tags: ["{tag}"]
Turn 2: "make it quick" → tags: ["{tag}", "30-minutes-or-less"]
Turn 3: "vegetarian or vegan" → tags_or: [["vegetarian", "vegan"]]
Turn 4: "no nuts" → exclude: ["nuts"]

Remember: ALL 8 fields must be present in EVERY state_after!
"""
    
    return prompt

# Test with complete structure
print("Testing with complete state structure...")
test_conversation = generate_conversation_with_openai("breakfast", temperature=0.6)

# Verify all fields are present
if test_conversation:
    print("\nChecking first turn's state has all fields:")
    first_state = test_conversation['turns'][0]['state_after']
    required_fields = ["name_or_description", "include", "include_groups", 
                      "exclude", "tags", "tags_or", "exclude_tags", "count"]
    
    for field in required_fields:
        if field in first_state:
            print(f"✅ {field}: {first_state[field]}")
        else:
            print(f"❌ {field}: MISSING!")

Testing with complete state structure...
✅ Generated conversation for 'breakfast' (temp=0.6)

Checking first turn's state has all fields:
✅ name_or_description: 
✅ include: []
✅ include_groups: []
✅ exclude: []
✅ tags: ['breakfast']
✅ tags_or: []
✅ exclude_tags: []
✅ count: 5


In [18]:
# Let's see the complete working conversation
print("\n=== COMPLETE CONVERSATION (FINAL CHECK) ===")
print(f"Starting tag: '{test_conversation['tag']}'")
print("-" * 50)

for i, turn in enumerate(test_conversation['turns']):
    print(f"\nTurn {i+1}: \"{turn['user_says']}\"")
    state = turn['state_after']
    
    # Only show non-empty fields
    print("State changes:")
    for key, value in state.items():
        if value and value != [] and value != "":
            print(f"  {key}: {value}")
    
    print(f"Explanation: {turn['explanation']}")

# Also validate the "fruit" tag issue
print("\n" + "="*50)
all_tags = []
for turn in test_conversation['turns']:
    all_tags.extend(turn['state_after']['tags'])

unique_tags = list(set(all_tags))
invalid = [t for t in unique_tags if t not in VALID_TAGS]

if invalid:
    print(f"⚠️  Unknown tags used: {invalid}")
    print("Add these to VALID_TAGS if they should be valid")
else:
    print("✅ All tags are valid!")


=== COMPLETE CONVERSATION (FINAL CHECK) ===
Starting tag: 'breakfast'
--------------------------------------------------

Turn 1: "I'm looking for some breakfast recipe ideas"
State changes:
  tags: ['breakfast']
  count: 5
Explanation: User is initiating the conversation about breakfast recipes

Turn 2: "I need something that can be prepared in under 30 minutes"
State changes:
  tags: ['breakfast', '30-minutes-or-less']
  count: 5
Explanation: User requested quick breakfast recipes

Turn 3: "I prefer vegetarian or vegan options"
State changes:
  tags: ['breakfast', '30-minutes-or-less']
  tags_or: [['vegetarian', 'vegan']]
  count: 5
Explanation: User specified a preference for vegetarian or vegan breakfast recipes

Turn 4: "Please exclude any recipes with nuts"
State changes:
  exclude: ['nuts']
  tags: ['breakfast', '30-minutes-or-less']
  tags_or: [['vegetarian', 'vegan']]
  count: 5
Explanation: User wants to exclude recipes with nuts

✅ All tags are valid!


In [19]:
def generate_all_conversations(tags_list, model="gpt-3.5-turbo", temperature=0.6, batch_size=10):
    """
    Generate conversations for all tags with progress tracking
    """
    all_conversations = []
    failed_tags = []
    
    print(f"🚀 Generating conversations for {len(tags_list)} tags...")
    print(f"   Model: {model}")
    print(f"   Temperature: {temperature}")
    print("="*50)
    
    for i, tag in enumerate(tags_list):
        # Progress indicator
        if i % batch_size == 0 and i > 0:
            print(f"\n--- Progress: {i}/{len(tags_list)} completed ---")
            print(f"Failed so far: {len(failed_tags)}")
            time.sleep(1)  # Small delay to avoid rate limits
        
        print(f"[{i+1}/{len(tags_list)}] {tag}...", end=" ")
        
        try:
            conversation = generate_conversation_with_openai(tag, model, temperature)
            if conversation:
                all_conversations.append(conversation)
                print("✅")
            else:
                failed_tags.append(tag)
                print("❌ Empty response")
        except Exception as e:
            failed_tags.append(tag)
            print(f"❌ Error: {str(e)[:50]}...")
    
    # Summary
    print("\n" + "="*50)
    print(f"✅ Successfully generated: {len(all_conversations)}/{len(tags_list)}")
    if failed_tags:
        print(f"❌ Failed tags ({len(failed_tags)}): {failed_tags[:5]}...")
    
    return all_conversations, failed_tags

# Test with a small subset first
test_tags = ["breakfast", "lunch", "dinner", "vegetarian", "mexican"]
print("Testing with 5 tags first...")

test_conversations, test_failed = generate_all_conversations(test_tags)

Testing with 5 tags first...
🚀 Generating conversations for 5 tags...
   Model: gpt-3.5-turbo
   Temperature: 0.6
[1/5] breakfast... ✅ Generated conversation for 'breakfast' (temp=0.6)
✅
[2/5] lunch... ✅ Generated conversation for 'lunch' (temp=0.6)
✅
[3/5] dinner... ✅ Generated conversation for 'dinner' (temp=0.6)
✅
[4/5] vegetarian... ✅ Generated conversation for 'vegetarian' (temp=0.6)
✅
[5/5] mexican... ✅ Generated conversation for 'mexican' (temp=0.6)
✅

✅ Successfully generated: 5/5


In [20]:
import json
from datetime import datetime

def save_conversations(conversations, filename=None):
    """Save conversations to a JSON file"""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"recipe_conversations_{timestamp}.json"
    
    with open(filename, 'w') as f:
        json.dump(conversations, f, indent=2)
    
    print(f"💾 Saved {len(conversations)} conversations to {filename}")
    return filename

# Save test conversations
if test_conversations:
    save_conversations(test_conversations, "test_conversations.json")

💾 Saved 5 conversations to test_conversations.json


In [None]:
# Update the generate function to use GPT-4.1
def generate_conversation_with_openai(tag: str, model="gpt-4.1", temperature=0.6) -> dict:
    """
    Generate a single conversation for a given tag using OpenAI API
    Now defaults to GPT-4.1 for better instruction following
    """
    prompt = create_prompt_for_tag(tag)
    
    try:
        response = openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that generates realistic recipe search conversations. Always return valid JSON."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_tokens=2000
        )
        
        # Extract JSON from response
        json_str = response.choices[0].message.content
        json_str = json_str.strip()
        if json_str.startswith("```json"):
            json_str = json_str[7:]
        if json_str.endswith("```"):
            json_str = json_str[:-3]
        
        # Parse and validate
        conversation = json.loads(json_str)
        
        # Quick validation
        if 'tag' in conversation and 'turns' in conversation:
            print(f"✅ Generated conversation for '{tag}' (temp={temperature})")
            return conversation
        else:
            print(f"❌ Invalid format for '{tag}'")
            return None
            
    except Exception as e:
        print(f"❌ Error for '{tag}': {str(e)}")
        return None

# Test with both models
print("=== COMPARING MODELS ===\n")

# Test with GPT-3.5-turbo
print("Testing GPT-3.5-turbo:")
conv_35 = generate_conversation_with_openai("breakfast", model="gpt-3.5-turbo", temperature=0.6)

# Test with GPT-4.1
print("\nTesting GPT-4.1:")
conv_41 = generate_conversation_with_openai("breakfast", model="gpt-4.1", temperature=0.6)

# Compare state management
def check_state_progression(conversation, model_name):
    print(f"\n=== {model_name} State Progression ===")
    prev_tags = []
    
    for i, turn in enumerate(conversation['turns']):
        current_tags = turn['state_after']['tags']
        print(f"Turn {i+1}: {len(current_tags)} tags - {current_tags}")
        
        if i > 0:
            lost = set(prev_tags) - set(current_tags)
            if lost:
                print(f"  ❌ Lost: {lost}")
        
        prev_tags = current_tags

if conv_35:
    check_state_progression(conv_35, "GPT-3.5-turbo")
    
if conv_41:
    check_state_progression(conv_41, "GPT-4.1")

=== COMPARING MODELS ===

Testing GPT-3.5-turbo:
✅ Generated conversation for 'breakfast' (temp=0.6)

Testing GPT-4.1:
✅ Generated conversation for 'breakfast' (temp=0.6)

=== GPT-3.5-turbo State Progression ===
Turn 1: 1 tags - ['breakfast']
Turn 2: 2 tags - ['breakfast', '30-minutes-or-less']
Turn 3: 2 tags - ['breakfast', 'vegetarian']
  ❌ Lost: {'30-minutes-or-less'}

=== GPT-4.1 State Progression ===
Turn 1: 1 tags - ['breakfast']
Turn 2: 2 tags - ['breakfast', '30-minutes-or-less']
Turn 3: 2 tags - ['breakfast', '30-minutes-or-less']


In [22]:
# Let's see what each model actually generated
def display_conversation(conversation, model_name):
    print(f"\n{'='*60}")
    print(f"{model_name} FULL CONVERSATION")
    print('='*60)
    
    for i, turn in enumerate(conversation['turns']):
        print(f"\nTurn {i+1}:")
        print(f"User: \"{turn['user_says']}\"")
        
        state = turn['state_after']
        print("State changes:")
        
        # Show only non-empty fields
        for key, value in state.items():
            if value and value != [] and value != "":
                print(f"  {key}: {value}")
        
        print(f"Explanation: {turn['explanation']}")

# Display both conversations
if conv_35:
    display_conversation(conv_35, "GPT-3.5-turbo")
    
if conv_41:
    display_conversation(conv_41, "GPT-4.1")

# Also check if they're using tags_or correctly
print("\n" + "="*60)
print("CHECKING FOR tags_or USAGE:")
print("="*60)

for model, conv in [("GPT-3.5-turbo", conv_35), ("GPT-4.1", conv_41)]:
    if conv:
        uses_tags_or = any(turn['state_after'].get('tags_or', []) for turn in conv['turns'])
        print(f"{model}: {'✅ Uses tags_or' if uses_tags_or else '❌ No tags_or usage'}")


GPT-3.5-turbo FULL CONVERSATION

Turn 1:
User: "I'm looking for some breakfast recipes"
State changes:
  tags: ['breakfast']
  count: 5
Explanation: Initial search for breakfast recipes

Turn 2:
User: "I prefer recipes that are ready in 30 minutes or less"
State changes:
  tags: ['breakfast', '30-minutes-or-less']
  count: 5
Explanation: Filtered for breakfast recipes that can be made in 30 minutes or less

Turn 3:
User: "I'd like some vegetarian breakfast options"
State changes:
  tags: ['breakfast', 'vegetarian']
  count: 5
Explanation: Narrowed down to vegetarian breakfast recipes

GPT-4.1 FULL CONVERSATION

Turn 1:
User: "I want breakfast recipes"
State changes:
  tags: ['breakfast']
  count: 5
Explanation: Added the 'breakfast' tag to search for breakfast recipes.

Turn 2:
User: "Can you show me ones that are quick to make?"
State changes:
  tags: ['breakfast', '30-minutes-or-less']
  count: 5
Explanation: Added the '30-minutes-or-less' tag to filter for quick breakfast recipes.


In [23]:
def create_prompt_for_tag(tag: str) -> str:
    """
    Create a more explicit prompt with clear examples
    """
    tags_list_str = ", ".join(VALID_TAGS)
    
    prompt = f"""Generate a realistic MULTI-TURN conversation about searching for '{tag}' recipes.

VALID TAGS (ONLY use these): {tags_list_str}

CRITICAL RULES:
1. Each turn MUST add something new (tags, exclude, tags_or, etc.)
2. NEVER remove previous state - only add to it
3. Use tags_or for "or" conditions (e.g., "vegetarian or vegan")
4. Generate EXACTLY 4 turns

GOOD EXAMPLE showing proper state progression:
Turn 1: "I want dinner recipes" 
  → tags: ["dinner"]
  
Turn 2: "make it quick"
  → tags: ["dinner", "30-minutes-or-less"]
  
Turn 3: "vegetarian or vegan please"
  → tags: ["dinner", "30-minutes-or-less"]
  → tags_or: [["vegetarian", "vegan"]]
  
Turn 4: "no nuts"
  → tags: ["dinner", "30-minutes-or-less"]
  → tags_or: [["vegetarian", "vegan"]]
  → exclude: ["nuts"]

Return ONLY this JSON structure:
{{
    "tag": "{tag}",
    "turns": [
        {{
            "user_says": "string",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": [],
                "tags": [],
                "tags_or": [],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "string"
        }}
    ]
}}

Generate a conversation for '{tag}' recipes with 4 turns, each adding something new.
"""
    
    return prompt

# Test with the improved prompt
print("=== TESTING IMPROVED PROMPT ===\n")

# Test GPT-4.1 with new prompt
print("GPT-4.1 with improved prompt:")
conv_improved = generate_conversation_with_openai("breakfast", model="gpt-4.1", temperature=0.6)

# Analyze the result
if conv_improved:
    print("\n=== ANALYSIS ===")
    for i, turn in enumerate(conv_improved['turns']):
        print(f"\nTurn {i+1}: \"{turn['user_says']}\"")
        
        state = turn['state_after']
        changes = []
        
        if state.get('tags'):
            changes.append(f"tags: {state['tags']}")
        if state.get('tags_or'):
            changes.append(f"tags_or: {state['tags_or']}")
        if state.get('exclude'):
            changes.append(f"exclude: {state['exclude']}")
        if state.get('include'):
            changes.append(f"include: {state['include']}")
            
        print("  → " + ", ".join(changes))
        
    # Check if each turn adds something
    print("\n=== TURN-BY-TURN ADDITIONS ===")
    prev_state = {}
    
    for i, turn in enumerate(conv_improved['turns']):
        curr_state = turn['state_after']
        additions = []
        
        for key in ['tags', 'tags_or', 'exclude', 'include']:
            curr_val = curr_state.get(key, [])
            prev_val = prev_state.get(key, [])
            
            if curr_val != prev_val:
                additions.append(f"{key} changed")
                
        if additions:
            print(f"Turn {i+1}: ✅ Added: {', '.join(additions)}")
        else:
            print(f"Turn {i+1}: ❌ No additions")
            
        prev_state = curr_state

=== TESTING IMPROVED PROMPT ===

GPT-4.1 with improved prompt:
✅ Generated conversation for 'breakfast' (temp=0.6)

=== ANALYSIS ===

Turn 1: "I'm looking for breakfast recipes."
  → tags: ['breakfast']

Turn 2: "Can you show me options that are easy to make?"
  → tags: ['breakfast', 'easy']

Turn 3: "I'd like them to be ready in 15 minutes or less."
  → tags: ['breakfast', 'easy', '15-minutes-or-less']

Turn 4: "No eggs, please."
  → tags: ['breakfast', 'easy', '15-minutes-or-less']

=== TURN-BY-TURN ADDITIONS ===
Turn 1: ✅ Added: tags changed
Turn 2: ✅ Added: tags changed
Turn 3: ✅ Added: tags changed
Turn 4: ❌ No additions


In [24]:
# First, check what invalid tags were used
print("=== CHECKING TAGS VALIDITY ===")
all_tags_used = []
for turn in conv_improved['turns']:
    all_tags_used.extend(turn['state_after'].get('tags', []))

unique_tags = list(set(all_tags_used))
print(f"Tags used: {unique_tags}")

invalid_tags = [tag for tag in unique_tags if tag not in VALID_TAGS]
if invalid_tags:
    print(f"❌ Invalid tags: {invalid_tags}")
else:
    print("✅ All tags valid")

# Now let's make an ULTRA-EXPLICIT prompt
def create_explicit_prompt_for_tag(tag: str) -> str:
    """
    Ultra-explicit prompt with exact examples
    """
    # Get a few valid tags for examples
    time_tags = [t for t in VALID_TAGS if "minutes" in t]
    diet_tags = [t for t in VALID_TAGS if t in ["vegetarian", "vegan", "gluten-free", "dairy-free"]]
    
    prompt = f"""Generate a conversation for '{tag}' recipes.

RULES:
1. Generate EXACTLY 4 turns
2. ONLY use these exact tags: {', '.join(VALID_TAGS[:20])}... (and others from the list)
3. Each turn MUST add something new to state_after
4. NEVER remove previous values

TURN-BY-TURN TEMPLATE for '{tag}':

Turn 1: User asks for {tag} → ADD tags: ["{tag}"]

Turn 2: User adds time constraint → ADD "30-minutes-or-less" or "5-ingredients-or-less" to tags

Turn 3: User mentions dietary preference → ADD tags_or: [["vegetarian", "vegan"]] (keep previous tags!)

Turn 4: User excludes ingredient → ADD exclude: ["nuts"] or ["dairy"] (keep everything else!)

EXACT JSON FORMAT:
{{
    "tag": "{tag}",
    "turns": [
        {{
            "user_says": "I need {tag} recipes",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": [],
                "tags": ["{tag}"],
                "tags_or": [],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "Initial {tag} request"
        }},
        {{
            "user_says": "something quick please",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": [],
                "tags": ["{tag}", "30-minutes-or-less"],
                "tags_or": [],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "Added time constraint"
        }},
        {{
            "user_says": "vegetarian or vegan options",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": [],
                "tags": ["{tag}", "30-minutes-or-less"],
                "tags_or": [["vegetarian", "vegan"]],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "Added dietary preference"
        }},
        {{
            "user_says": "no nuts please",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": ["nuts"],
                "tags": ["{tag}", "30-minutes-or-less"],
                "tags_or": [["vegetarian", "vegan"]],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "Excluded nuts"
        }}
    ]
}}

Generate similar but VARIED conversation for '{tag}'. Change the wording but follow the exact pattern!
"""
    
    return prompt

# Update the generation function to use the new prompt
def generate_conversation_with_openai(tag: str, model="gpt-4.1", temperature=0.6) -> dict:
    """Generate conversation with explicit prompt"""
    prompt = create_explicit_prompt_for_tag(tag)  # Use the new explicit prompt
    
    try:
        response = openai.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Generate the exact JSON requested. Follow the template precisely."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_tokens=2000
        )
        
        json_str = response.choices[0].message.content.strip()
        if json_str.startswith("```json"):
            json_str = json_str[7:]
        if json_str.endswith("```"):
            json_str = json_str[:-3]
        
        conversation = json.loads(json_str)
        
        if 'tag' in conversation and 'turns' in conversation:
            print(f"✅ Generated conversation for '{tag}'")
            return conversation
        else:
            print(f"❌ Invalid format for '{tag}'")
            return None
            
    except Exception as e:
        print(f"❌ Error for '{tag}': {str(e)}")
        return None

# Test with explicit prompt
print("\n=== TESTING EXPLICIT PROMPT ===")
test_explicit = generate_conversation_with_openai("lunch", model="gpt-4.1", temperature=0.3)

if test_explicit:
    print("\nGenerated conversation:")
    for i, turn in enumerate(test_explicit['turns']):
        print(f"\nTurn {i+1}: \"{turn['user_says']}\"")
        state = turn['state_after']
        if state.get('tags'): print(f"  tags: {state['tags']}")
        if state.get('tags_or'): print(f"  tags_or: {state['tags_or']}")
        if state.get('exclude'): print(f"  exclude: {state['exclude']}")

=== CHECKING TAGS VALIDITY ===
Tags used: ['breakfast', 'easy', '15-minutes-or-less']
✅ All tags valid

=== TESTING EXPLICIT PROMPT ===
✅ Generated conversation for 'lunch'

Generated conversation:

Turn 1: "Show me some ideas for lunch"
  tags: ['lunch']

Turn 2: "I don't have much time to cook"
  tags: ['lunch', '5-ingredients-or-less']

Turn 3: "Can you make it vegan or vegetarian?"
  tags: ['lunch', '5-ingredients-or-less']
  tags_or: [['vegetarian', 'vegan']]

Turn 4: "Please avoid dairy"
  tags: ['lunch', '5-ingredients-or-less']
  tags_or: [['vegetarian', 'vegan']]
  exclude: ['dairy']


In [25]:
# Install tiktoken if needed
try:
    import tiktoken
except ImportError:
    %pip install tiktoken
    import tiktoken

# Calculate token usage
def calculate_token_usage(tags_list):
    """Calculate how many tokens the tags list uses"""
    # Use the encoding for GPT-4
    encoding = tiktoken.encoding_for_model("gpt-4")
    
    # Join all tags as they would appear in prompt
    tags_string = ", ".join(tags_list)
    
    # Count tokens
    tokens = encoding.encode(tags_string)
    num_tokens = len(tokens)
    
    # Calculate size of full prompt
    base_prompt = """Generate a realistic MULTI-TURN conversation about searching for 'TAG' recipes.

VALID TAGS: [TAGS_HERE]

CRITICAL RULES:
1. Each turn MUST add something new to state_after
2. NEVER remove previous values
3. Use tags_or for "or" conditions
4. Generate EXACTLY 4 turns

[REST OF PROMPT...]"""
    
    # Estimate full prompt size
    base_tokens = len(encoding.encode(base_prompt))
    total_tokens = base_tokens + num_tokens
    
    return {
        "num_tags": len(tags_list),
        "tags_tokens": num_tokens,
        "base_prompt_tokens": base_tokens,
        "total_prompt_tokens": total_tokens,
        "tags_string_length": len(tags_string)
    }

# Calculate for your VALID_TAGS
token_info = calculate_token_usage(VALID_TAGS)

print("=== TOKEN USAGE ANALYSIS ===")
print(f"Number of tags: {token_info['num_tags']}")
print(f"Tags string length: {token_info['tags_string_length']} characters")
print(f"Tags tokens: {token_info['tags_tokens']:,}")
print(f"Base prompt tokens: {token_info['base_prompt_tokens']:,}")
print(f"Total prompt tokens: {token_info['total_prompt_tokens']:,}")

# Check against model limits
print("\n=== MODEL COMPATIBILITY ===")
models = {
    "gpt-3.5-turbo": 16_385,
    "gpt-4o": 128_000,
    "gpt-4.1": 1_047_576
}

for model, limit in models.items():
    if token_info['total_prompt_tokens'] < limit:
        percentage = (token_info['total_prompt_tokens'] / limit) * 100
        print(f"✅ {model}: Uses {percentage:.1f}% of {limit:,} token limit")
    else:
        print(f"❌ {model}: EXCEEDS {limit:,} token limit!")

# If tags fit, create optimized prompt
if token_info['total_prompt_tokens'] < models['gpt-4.1']:
    print("\n✅ All tags fit in GPT-4.1! Creating optimized prompt...")
    
    def create_complete_prompt_for_tag(tag: str) -> str:
        """Include ALL valid tags in the prompt"""
        all_tags = ", ".join(VALID_TAGS)
        
        prompt = f"""Generate a conversation for '{tag}' recipes.

VALID TAGS (use ONLY these exact tags): {all_tags}

RULES:
1. Generate EXACTLY 4 turns
2. Each turn MUST add something new to state_after
3. NEVER remove previous values
4. Use exact tags from the list above

TURN STRUCTURE:
Turn 1: Initial request → tags: ["{tag}"]
Turn 2: Add time/ease constraint → add a tag like "30-minutes-or-less"
Turn 3: Add dietary preference → use tags_or: [["vegetarian", "vegan"]]
Turn 4: Exclude ingredient → use exclude: ["nuts"] or similar

Generate this exact JSON:
{{
    "tag": "{tag}",
    "turns": [4 turns following the structure above]
}}"""
        
        return prompt
    
    # Test token count of complete prompt
    test_prompt = create_complete_prompt_for_tag("breakfast")
    test_tokens = len(tiktoken.encoding_for_model("gpt-4").encode(test_prompt))
    print(f"\nActual complete prompt tokens: {test_tokens:,}")
else:
    print("\n❌ Tags are too many even for GPT-4.1! Need chunking strategy.")

=== TOKEN USAGE ANALYSIS ===
Number of tags: 516
Tags string length: 6281 characters
Tags tokens: 1,913
Base prompt tokens: 75
Total prompt tokens: 1,988

=== MODEL COMPATIBILITY ===
✅ gpt-3.5-turbo: Uses 12.1% of 16,385 token limit
✅ gpt-4o: Uses 1.6% of 128,000 token limit
✅ gpt-4.1: Uses 0.2% of 1,047,576 token limit

✅ All tags fit in GPT-4.1! Creating optimized prompt...

Actual complete prompt tokens: 2,079


In [30]:
import random

def create_prompt_with_reset_option(tag: str, include_reset: bool = False) -> str:
    """Create prompt that can generate either accumulating or reset conversations"""
    all_tags = ", ".join(VALID_TAGS)
    
    if include_reset:
        # 20% - Conversation with a reset
        prompt = f"""Generate a conversation for '{tag}' recipes where the user RESETS their search midway.

VALID TAGS (use ONLY these exact tags): {all_tags}

CONVERSATION PATTERN WITH RESET:
- Turns 1-2: Build up search normally
- Turn 3: User says something like "actually, forget all that" or "let's start over" 
- Turn 3 state: RESET to empty state or just one new tag
- Turns 4-5: Build from the new starting point

RESET PHRASES EXAMPLES:
- "Actually, never mind all that. Let me start fresh with..."
- "Forget everything I said. Just show me..."
- "Let's clear and start over. I want..."
- "Scratch that. Instead, I'd like..."

EXAMPLE PROGRESSION:
Turn 1: "I want {tag} recipes" → tags: ["{tag}"]
Turn 2: "make it healthy" → tags: ["{tag}", "healthy"]
Turn 3: "Actually, forget all that. Just show me quick desserts" → tags: ["desserts", "30-minutes-or-less"]
Turn 4: "vegetarian options" → tags: ["desserts", "30-minutes-or-less", "vegetarian"]
Turn 5: "exclude chocolate" → tags: same, exclude: ["chocolate"]
"""
    else:
        # 80% - Normal accumulating conversation  
        prompt = f"""Generate a conversation for '{tag}' recipes where each turn ADDS to the previous state.

VALID TAGS (use ONLY these exact tags): {all_tags}

CONVERSATION PATTERN (ACCUMULATING):
Each turn builds on the previous state, adding new constraints.

ACCUMULATING PHRASES:
- "also make it..."
- "and I prefer..."
- "oh, and exclude..."
- "add some ... cuisine"

EXAMPLE PROGRESSION:
Turn 1: "I want {tag} recipes" → tags: ["{tag}"]
Turn 2: "also make it quick" → tags: ["{tag}", "30-minutes-or-less"]
Turn 3: "and vegetarian or vegan" → tags: same, tags_or: [["vegetarian", "vegan"]]
Turn 4: "oh, and no nuts please" → exclude: ["nuts"]
Turn 5: "maybe italian style" → tags: add "italian"
"""
    
    # Common ending for both
    prompt += f"""
Generate EXACTLY 5 turns:
{{
    "tag": "{tag}",
    "conversation_type": "{'reset' if include_reset else 'accumulating'}",
    "turns": [
        {{
            "user_says": "string",
            "state_after": {{
                "name_or_description": "",
                "include": [],
                "include_groups": [],
                "exclude": [],
                "tags": [],
                "tags_or": [],
                "exclude_tags": [],
                "count": 5
            }},
            "explanation": "string"
        }}
    ]
}}"""
    
    return prompt

def generate_mixed_conversations(tags_list, reset_percentage=0.2):
    """Generate conversations with mix of accumulating and reset patterns"""
    conversations = []
    
    print(f"\n🎲 Generating mixed conversations")
    print(f"   {int((1-reset_percentage)*100)}% accumulating, {int(reset_percentage*100)}% with resets")
    print("="*60)
    
    for i, tag in enumerate(tags_list):
        # Randomly decide if this should have a reset
        include_reset = random.random() < reset_percentage
        
        conversation_type = "reset" if include_reset else "accumulating"
        print(f"[{i+1}/{len(tags_list)}] {tag} ({conversation_type})...", end=" ", flush=True)
        
        prompt = create_prompt_with_reset_option(tag, include_reset)
        
        try:
            response = openai.chat.completions.create(
                model="gpt-4.1",
                messages=[
                    {"role": "system", "content": "Generate realistic recipe search conversations."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.4,
                max_tokens=2000
            )
            
            json_str = response.choices[0].message.content.strip()
            if json_str.startswith("```json"):
                json_str = json_str[7:]
            if json_str.endswith("```"):
                json_str = json_str[:-3]
            
            conversation = json.loads(json_str)
            conversations.append(conversation)
            print("✅")
            
        except Exception as e:
            print(f"❌ {str(e)[:30]}")
    
    return conversations

# Test with a small batch
test_tags = VALID_TAGS[:10]
test_conversations = generate_mixed_conversations(test_tags, reset_percentage=0.3)

# Analyze the results
print("\n=== ANALYZING CONVERSATION PATTERNS ===")
reset_count = 0
accumulating_count = 0

for conv in test_conversations:
    if conv.get('conversation_type') == 'reset':
        reset_count += 1
        # Find the reset turn
        for i in range(1, len(conv['turns'])):
            prev_tags = set(conv['turns'][i-1]['state_after'].get('tags', []))
            curr_tags = set(conv['turns'][i]['state_after'].get('tags', []))
            
            if not prev_tags.issubset(curr_tags):
                print(f"\n🔄 Reset in '{conv['tag']}' at turn {i+1}:")
                print(f"   User said: \"{conv['turns'][i]['user_says']}\"")
                print(f"   Tags before: {list(prev_tags)}")
                print(f"   Tags after: {list(curr_tags)}")
                break
    else:
        accumulating_count += 1

print(f"\n📊 Summary: {accumulating_count} accumulating, {reset_count} with resets")


🎲 Generating mixed conversations
   70% accumulating, 30% with resets
[1/10] 1-day-or-more (accumulating)... ✅
[2/10] 15-minutes-or-less (reset)... ✅
[3/10] 3-steps-or-less (accumulating)... ✅
[4/10] 30-minutes-or-less (accumulating)... ✅
[5/10] 4-hours-or-less (accumulating)... ✅
[6/10] 5-ingredients-or-less (accumulating)... ✅
[7/10] 60-minutes-or-less (accumulating)... ✅
[8/10] a1-sauce (reset)... ✅
[9/10] african (accumulating)... ✅
[10/10] american (accumulating)... ✅

=== ANALYZING CONVERSATION PATTERNS ===

🔄 Reset in '15-minutes-or-less' at turn 3:
   User said: "Actually, never mind all that. Let me start fresh with breakfast recipes."
   Tags before: ['vegetarian', '15-minutes-or-less']
   Tags after: ['breakfast']

🔄 Reset in 'a1-sauce' at turn 3:
   User said: "Actually, never mind all that. Let me start fresh with quick appetizers."
   Tags before: ['a1-sauce', 'beef']
   Tags after: ['appetizers', '30-minutes-or-less']

📊 Summary: 8 accumulating, 2 with resets


In [37]:
# Tag Categories for 516 Recipe Tags

# Complete Tag Grouping - All 516 Recipe Tags Categorized

TAG_GROUPS = {
    "TIME_DURATION": [
        '15-minutes-or-less', '30-minutes-or-less', '60-minutes-or-less', 
        '4-hours-or-less', '1-day-or-more'
    ],
    
    "COMPLEXITY_EASE": [
        '3-steps-or-less', '5-ingredients-or-less', 'easy', 'beginner-cook',
        'for-1-or-2', 'for-large-groups', 'one-dish-meal', 'from-scratch'
    ],
    
    "DIETARY_RESTRICTIONS": [
        'vegan', 'vegetarian', 'gluten-free', 'dairy-free', 'egg-free', 
        'nut-free', 'lactose', 'kosher', 'diabetic', 'no-shell-fish'
    ],
    
    "HEALTH_NUTRITION": [
        'healthy', 'healthy-2', 'low-calorie', 'low-carb', 'low-cholesterol',
        'low-fat', 'low-protein', 'low-saturated-fat', 'low-sodium', 
        'high-calcium', 'high-fiber', 'high-protein', 'very-low-carbs'
    ],
    
    "CUISINES": [
        'african', 'american', 'asian', 'australian', 'austrian', 'belgian',
        'brazilian', 'british-columbian', 'cajun', 'californian', 'cambodian',
        'canadian', 'cantonese', 'caribbean', 'central-american', 'chilean',
        'chinese', 'colombian', 'congolese', 'costa-rican', 'creole', 'cuban',
        'czech', 'danish', 'dutch', 'ecuadorean', 'egyptian', 'english',
        'ethiopian', 'european', 'filipino', 'finnish', 'french', 'georgian',
        'german', 'greek', 'guatemalan', 'hawaiian', 'honduran', 'hungarian',
        'icelandic', 'indian', 'indonesian', 'iranian-persian', 'iraqi', 'irish',
        'italian', 'japanese', 'jewish-ashkenazi', 'jewish-sephardi', 'korean',
        'laotian', 'lebanese', 'libyan', 'malaysian', 'mexican', 'middle-eastern',
        'mongolian', 'moroccan', 'namibian', 'native-american', 'nepalese',
        'new-zealand', 'nigerian', 'north-american', 'norwegian', 'oaxacan',
        'pakistani', 'palestinian', 'pennsylvania-dutch', 'peruvian', 'polish',
        'polynesian', 'portuguese', 'puerto-rican', 'russian', 'saudi-arabian',
        'scandinavian', 'scottish', 'somalian', 'soul', 'south-african',
        'south-american', 'south-west-pacific', 'spanish', 'sudanese', 'swedish',
        'swiss', 'thai', 'turkish', 'venezuelan', 'vietnamese', 'welsh',
        'amish-mennonite', 'angolan', 'argentine', 'baja', 'beijing', 'hunan',
        'midwestern', 'northeastern-united-states', 'southern-united-states',
        'southwestern-united-states', 'ontario', 'pacific-northwest', 'quebec',
        'szechuan', 'tex-mex'
    ],
    
    "MEAL_TYPES": [
        'appetizers', 'breakfast', 'brunch', 'lunch', 'dinner-party', 
        'desserts', 'snacks', 'beverages', 'cocktails', 'main-dish'
    ],
    
    "PROTEINS": [
        'beef', 'chicken', 'pork', 'lamb-sheep', 'turkey', 'duck', 'fish',
        'seafood', 'shellfish', 'eggs', 'soy-tofu', 'tempeh', 'beans',
        'beef-liver', 'beef-organ-meats', 'chicken-breasts', 'chicken-livers',
        'chicken-thighs-legs', 'duck-breasts', 'ground-beef', 'ham', 'veal',
        'whole-chicken', 'whole-duck', 'whole-turkey', 'turkey-breasts', 'goose'
    ],
    
    "SEAFOOD_FISH": [
        'bass', 'catfish', 'clams', 'cod', 'crab', 'crawfish', 'halibut',
        'lobster', 'mahi-mahi', 'mussels', 'octopus', 'orange-roughy', 'oysters',
        'perch', 'pickeral', 'salmon', 'scallops', 'shrimp', 'sole-and-flounder',
        'squid', 'tilapia', 'trout', 'tuna', 'whitefish', 'freshwater-fish',
        'saltwater-fish'
    ],
    
    "VEGETABLES": [
        'artichoke', 'asparagus', 'avocado', 'bok-choys', 'broccoli', 'cabbage',
        'carrots', 'cauliflower', 'chard', 'collard-greens', 'corn', 'eggplant',
        'green-yellow-beans', 'greens', 'lettuces', 'mushrooms', 'onions', 'peppers',
        'potatoes', 'spinach', 'squash', 'tomatoes', 'yams-sweet-potatoes', 'zucchini'
    ],
    
    "FRUITS": [
        'apples', 'bananas', 'berries', 'blueberries', 'cherries', 'citrus',
        'coconut', 'grapes', 'kiwifruit', 'lemon', 'lime', 'mango', 'melons',
        'oranges', 'papaya', 'peaches', 'pears', 'pineapple', 'plums',
        'raspberries', 'strawberries', 'tropical-fruit', 'pumpkin', 'fruit'
    ],
    
    "GRAINS_PASTA": [
        'pasta', 'rice', 'grains', 'brown-rice', 'white-rice', 'long-grain-rice',
        'medium-grain-rice', 'short-grain-rice', 'elbow-macaroni', 'penne',
        'spaghetti', 'lasagna', 'manicotti', 'ravioli-tortellini', 'pasta-shells',
        'pasta-elbow-macaroni', 'flat-shapes'
    ],
    
    "COOKING_METHODS": [
        'baking', 'barbecue', 'broil', 'deep-fry', 'grilling', 'oven', 'roast',
        'steam', 'stir-fry', 'stove-top', 'no-cook'
    ],
    
    "EQUIPMENT_APPLIANCES": [
        'bread-machine', 'crock-pot-slow-cooker', 'dehydrator', 'food-processor-blender',
        'freezer', 'microwave', 'mixer', 'pressure-cooker', 'refrigerator',
        'small-appliance', 'smoker', 'crock-pot-main-dish'
    ],
    
    "OCCASIONS_HOLIDAYS": [
        'april-fools-day', 'birthday', 'christmas', 'cinco-de-mayo', 'easter',
        'fathers-day', 'fourth-of-july', 'halloween', 'hanukkah', 'independence-day',
        'irish-st-patricks-day', 'kwanzaa', 'labor-day', 'mardi-gras-carnival',
        'memorial-day', 'mothers-day', 'new-years', 'passover', 'ramadan',
        'rosh-hashana', 'rosh-hashanah', 'st-patricks-day', 'super-bowl',
        'superbowl', 'thanksgiving', 'valentines-day', 'wedding', 'chinese-new-year',
        'holiday-event'
    ],
    
    "SEASONS": [
        'fall', 'spring', 'summer', 'winter'
    ],
    
    "DISH_TYPES": [
        'breads', 'cakes', 'casseroles', 'chili', 'chowders', 'cookies-and-brownies',
        'cupcakes', 'curries', 'dips', 'finger-food', 'fudge', 'ice-cream', 'jellies',
        'jams-and-preserves', 'meatballs', 'meatloaf', 'muffins', 'omelets-and-frittatas',
        'pancakes-and-waffles', 'pies', 'pizza', 'pot-pie', 'pot-roast', 'puddings-and-mousses',
        'quiche', 'salads', 'sandwiches', 'sauces', 'smoothies', 'soups-stews',
        'stews', 'stuffings-dressings', 'tarts', 'wings'
    ],
    
    "SPECIFIC_DISHES": [
        'biscotti', 'brownies', 'burgers', 'cheesecake', 'chocolate-chip-cookies',
        'cobblers-and-crisps', 'coffee-cakes', 'drop-cookies', 'gumbo',
        'hand-formed-cookies', 'macaroni-and-cheese', 'margarita', 'rolled-cookies',
        'sugar-cookies', 'turkey-burgers', 'veggie-burgers'
    ],
    
    "SOUPS": [
        'bean-soup', 'bisques-cream-soups', 'clear-soups', 'navy-bean-soup',
        'potato-soup', 'beans-soups'
    ],
    
    "BAKED_GOODS": [
        'bar-cookies', 'bread-pudding', 'crusts-pastry-dough-2', 'lemon-cake',
        'pies-and-tarts', 'pumpkin-bread', 'quick-breads', 'rolls-biscuits',
        'savory-pies', 'scones', 'sourdough', 'yeast', 'cookies-and-brownies-nuts'
    ],
    
    "INGREDIENTS_MISC": [
        'a1-sauce', 'bacon', 'black-beans', 'cheese', 'chick-peas-garbanzos',
        'chocolate', 'hidden-valley-ranch', 'lentils', 'nuts', 'oatmeal',
        'peanut-butter', 'pitted-fruit', 'prepared-potatoes', 'simply-potatoes',
        'simply-potatoes2', 'stocks'
    ],
    
    "SPECIAL_CATEGORIES": [
        'brown-bag', 'camping', 'college', 'comfort-food', 'copycat',
        'gifts', 'heirloom-historical', 'heirloom-historical-recipes', 'inexpensive',
        'kid-friendly', 'leftovers', 'novelty', 'oamc-freezer-make-ahead', 'picnic',
        'potluck', 'romantic', 'toddler-friendly', 'weeknight', 'infant-baby-friendly'
    ],
    
    "PREPARATION_STYLES": [
        'casseroles-one-dish-meal', 'served-cold', 'served-hot', 'spicy', 'sweet',
        'savory', 'bizarre'
    ],
    
    "CONDIMENTS_SAUCES": [
        'beef-sauces', 'chutneys', 'condiments-etc', 'dips-lunch-snacks',
        'fillings-and-frostings-chocolate', 'fillings-and-frostings-fruit',
        'garnishes', 'marinades-and-rubs', 'salad-dressings', 'salsas',
        'spaghetti-sauce', 'spreads', 'spreads-fruit', 'sweet-sauces',
        'savory-sauces', 'cake-fillings-and-frostings'
    ],
    
    "BEVERAGES_DRINKS": [
        'brewing', 'non-alcoholic', 'punch', 'shakes'
    ],
    
    "PROCESSING_PRESERVATION": [
        'canning', 'pressure-canning', 'unprocessed-freezer', 'water-bath'
    ],
    
    "WILD_GAME": [
        'bear', 'deer', 'elk', 'moose', 'pheasant', 'quail', 'rabbit', 'wild-game'
    ],
    
    "MISC_MEAT_CUTS": [
        'beef-ribs', 'beef-sausage', 'brisket', 'pork-chops', 'pork-loins',
        'pork-ribs', 'pork-sausage', 'roast-beef', 'roast-beef-comfort-food',
        'steak', 'steaks'
    ],
    
    "NON_FOOD": [
        'bath-beauty', 'cooking-mixes', 'herb-and-spice-mixes', 'homeopathy-remedies',
        'household-cleansers', 'non-food-products', 'pet-food'
    ],
    
    "BREAKFAST_SPECIFIC": [
        'breakfast-eggs', 'eggs-breakfast', 'eggs-dairy', 'granola-and-porridge',
        'mashed-potatoes'
    ],
    
    "DESSERT_TYPES": [
        'candy', 'frozen-desserts', 'gelatin', 'gelatin-fruit', 'lemon-desserts'
    ],
    
    "REGIONAL_US": [
        'micro-melanesia'
    ],
    
    "CONTEST_SPONSORED": [
        'queso-for-all', 'ragu-recipe-contest', 'reynolds-wrap'
    ],
    
    "MISC_CATEGORIES": [
        'cuisine', 'halloween-cakes', 'halloween-cocktails', 'halloween-cupcakes',
        'main-dish-beef', 'main-dish-chicken', 'main-dish-crock-pot',
        'meat', 'pasta-rice-and-grains', 'poultry', 'seasonal', 'side-dishes',
        'vegetables'
    ]
}

# Verify we have all 516 tags
all_grouped_tags = []
for group, tags in TAG_GROUPS.items():
    all_grouped_tags.extend(tags)

print(f"Total tags grouped: {len(all_grouped_tags)}")
print(f"Total unique tags: {len(set(all_grouped_tags))}")
print(f"Number of categories: {len(TAG_GROUPS)}")

# Check for any missing tags
original_tags = set(VALID_TAGS)  # Your original 516 tags
grouped_tags = set(all_grouped_tags)
missing = original_tags - grouped_tags
duplicates = [tag for tag in all_grouped_tags if all_grouped_tags.count(tag) > 1]

if missing:
    print(f"\nMissing tags: {missing}")
if duplicates:
    print(f"\nDuplicate tags: {set(duplicates)}")

Total tags grouped: 516
Total unique tags: 516
Number of categories: 33
