In [2]:
import os
import json
import sys
from typing import List, Dict, Any
from dotenv import load_dotenv
from datasets import load_dataset

notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(project_root)
from agents.gpt_analysis_agent import GPTAnalysisAgent

def load_recipe_dataset() -> List[Dict[str, Any]]:
    """Load the recipe dataset from Hugging Face."""
    try:
        dataset = load_dataset("Ashikan/diabetic-friendly-recipes")
        df = dataset['train'].to_pandas()
        
        recipes = []
        for _, row in df.iterrows():
            recipe = {
                'title': row['recipeName'],
                'ingredients': row['ingredients'],
                'instructions': row['steps']
            }
            recipes.append(recipe)
            
        print(f"Loaded {len(recipes)} recipes")
        return recipes
        
    except Exception as e:
        print(f"Error loading dataset: {str(e)}")
        return []

def create_training_example(recipe: Dict[str, Any], analysis: Dict[str, Any]) -> Dict[str, Any]:
    """Create a training example for RoBERTa."""
    return {
        "text": f"Recipe: {recipe['title']}\nIngredients: {', '.join(recipe['ingredients'])}\nInstructions: {' '.join(recipe['instructions'])}",
        "label": {
            "glycemic_load": analysis["glycemic_load"],
            "gl_analysis": analysis["gl_analysis"]
        }
    }


load_dotenv()
output_path = os.path.join("processed_data", "roberta_training_data.json")
    
# Initialize GPT Analysis Agent
gpt_agent = GPTAnalysisAgent()
    
# Load recipes
print("Loading recipes...")
recipes = load_recipe_dataset()
       
# Process recipes
print("Creating training examples...")
training_examples = []
    
# Process first 100 recipes
for recipe in recipes[:100]:
    try:
        # Get analysis using GPT agent
        analysis = gpt_agent.analyze_recipe(recipe)
            
        if analysis:
            example = create_training_example(recipe, analysis)
            training_examples.append(example)
            print(f"Created example for: {recipe['title']}")
    except Exception as e:
        print(f"Error processing {recipe['title']}: {str(e)}")
        continue
    
# Save dataset
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
    json.dump(training_examples, f, indent=2)
    
print(f"Saved {len(training_examples)} examples to {output_path}")

Loading recipes...
Loaded 718 recipes
Creating training examples...
Created example for: one pot creamy chicken pasta
Created example for: lemon blueberry poke cake
Created example for: tomato and red pepper risotto
Created example for: breakfast cookies
Created example for: spiced pear strudel bites
Created example for: tiramisu-style oat pot
Created example for: crispy sesame seed fish
Created example for: vegetable, lentil garbanzo stew
Created example for: smashed pea bruschetta
Created example for: crunchy carrot and apple salad
Created example for: moroccan okra stew with sweet potatoes
Created example for: spiced lentil soup
Created example for: kob stuffed with fennel and orange
Created example for: green vegan chicken risotto
Created example for: 6 grain hot cereal
Created example for: balsamic roasted vegetables with red lentil pasta
Created example for: mediterranean couscous salad
Created example for: oat bran pancake
Created example for: roasted spiced broccoli soup
Create