In [None]:
import pandas as pd
import numpy as np

class FoodCategorizer:
    def __init__(self, data_path=None, df=None):
        if df is not None:
            self.df = df.copy()
        elif data_path:
            self.df = pd.read_csv(data_path)
        else:
            raise ValueError("Either data_path or df must be provided.")
        
        self.thresholds = {
            'high_protein': 15,
            'high_carb': 20,
            'high_fat': 10,
            'high_fiber': 5,
            'high_vitamin_c': 30,
            'low_calorie': 100
        }
    
    def nutrient_based_categorization(self, food_row):
        categories = []
        
        if food_row['Protein(g)'] >= self.thresholds['high_protein']:
            categories.append('High-Protein')
        elif food_row['Protein(g)'] > 5:
            categories.append('Moderate-Protein')
        else:
            categories.append('Low-Protein')
        
        if food_row['Carbs(g)'] >= self.thresholds['high_carb']:
            categories.append('High-Carb')
        elif food_row['Carbs(g)'] > 10:
            categories.append('Moderate-Carb')
        else:
            categories.append('Low-Carb')
        
        if food_row['Fat(g)'] >= self.thresholds['high_fat']:
            categories.append('High-Fat')
        elif food_row['Fat(g)'] > 3:
            categories.append('Moderate-Fat')
        else:
            categories.append('Low-Fat')
        
        if food_row['Fiber(g)'] >= self.thresholds['high_fiber']:
            categories.append('High-Fiber')
        elif food_row['Fiber(g)'] > 2:
            categories.append('Moderate-Fiber')
        else:
            categories.append('Low-Fiber')
        
        if food_row['Vitamin C(mg)'] >= self.thresholds['high_vitamin_c']:
            categories.append('High-Vitamin-C')
        elif food_row['Vitamin C(mg)'] > 10:
            categories.append('Moderate-Vitamin-C')
        else:
            categories.append('Low-Vitamin-C')
        
        if food_row['Calories'] <= self.thresholds['low_calorie']:
            categories.append('Low-Calorie')
        elif food_row['Calories'] > 300:
            categories.append('High-Calorie')
        else:
            categories.append('Moderate-Calorie')
        
        if food_row['Sugar(g)'] < 5 and food_row['Carbs(g)'] < 10:
            categories.append('Low-Sugar')
        
        if food_row['Fat(g)'] < 3 and food_row['Calories'] < 150:
            categories.append('Lean')
        
        if food_row['Fiber(g)'] >= 8 and food_row['Carbs(g)'] >= 20:
            categories.append('High-Fiber-Carb')
        
        return ', '.join(categories)
    
    def macronutrient_profile(self, food_row):
        protein_ratio = food_row['Protein(g)'] / (food_row['Protein(g)'] + food_row['Carbs(g)'] + food_row['Fat(g)'] + 0.001)
        carb_ratio = food_row['Carbs(g)'] / (food_row['Protein(g)'] + food_row['Carbs(g)'] + food_row['Fat(g)'] + 0.001)
        fat_ratio = food_row['Fat(g)'] / (food_row['Protein(g)'] + food_row['Carbs(g)'] + food_row['Fat(g)'] + 0.001)
        
        ratios = {
            'Protein': protein_ratio,
            'Carbohydrates': carb_ratio,
            'Fat': fat_ratio
        }
        
        dominant_nutrient = max(ratios, key=ratios.get)
        
        if ratios[dominant_nutrient] > 0.6:
            return f"{dominant_nutrient}-Dominated"
        elif sum(list(ratios.values())[:2]) > 0.8: 
            top_two = sorted(ratios, key=ratios.get, reverse=True)[:2]
            return f"Balanced-{top_two[0]}-{top_two[1]}"
        else:
            return "Mixed-Macronutrients"
    
    def health_score_calculator(self, food_row):

        score = 0
        
        score += min(food_row['Protein(g)'] * 2, 20)
        score += min(food_row['Fiber(g)'] * 3, 15)
        score += min(food_row['Vitamin C(mg)'] / 10, 10)
        
        score -= min(food_row['Sugar(g)'], 15)
        score -= min(food_row['Fat(g)'] * 0.5, 10)
        
        calorie_density = food_row['Calories'] / max(food_row['Protein(g)'] + food_row['Carbs(g)'] + food_row['Fiber(g)'], 1)
        if calorie_density > 5:
            score -= 5
        
        return max(0, min(100, score + 50))  
    
    def categorize_all_foods(self):
        result_df = self.df.copy()
        
        result_df['Nutrient_Categories'] = result_df.apply(
            self.nutrient_based_categorization, axis=1
        )
        
        result_df['Macronutrient_Profile'] = result_df.apply(
            self.macronutrient_profile, axis=1
        )
        
        result_df['Health_Score'] = result_df.apply(
            self.health_score_calculator, axis=1
        )
        
        def get_health_category(score):
            if score >= 75:
                return 'Excellent'
            elif score >= 60:
                return 'Good'
            elif score >= 45:
                return 'Average'
            else:
                return 'Poor'
        
        result_df['Health_Category'] = result_df['Health_Score'].apply(get_health_category)
        
        return result_df
    
    def get_foods_by_category(self, category_type, category_value):
        categorized_df = self.categorize_all_foods()
        
        if category_type == 'Nutrient_Categories':
            return categorized_df[categorized_df['Nutrient_Categories'].str.contains(category_value)]
        elif category_type == 'Macronutrient_Profile':
            return categorized_df[categorized_df['Macronutrient_Profile'] == category_value]
        elif category_type == 'Health_Category':
            return categorized_df[categorized_df['Health_Category'] == category_value]
        elif category_type == 'Original_Category':
            return categorized_df[categorized_df['Category'] == category_value]
        else:
            raise ValueError("Invalid category type")
    
    def analyze_category_nutrition(self, category_type, category_value):
        category_foods = self.get_foods_by_category(category_type, category_value)
        
        if category_foods.empty:
            return f"No foods found in category: {category_value}"
        
        analysis = {
            'Category': category_value,
            'Number_of_Foods': len(category_foods),
            'Avg_Calories': category_foods['Calories'].mean(),
            'Avg_Protein': category_foods['Protein(g)'].mean(),
            'Avg_Carbs': category_foods['Carbs(g)'].mean(),
            'Avg_Fat': category_foods['Fat(g)'].mean(),
            'Avg_Fiber': category_foods['Fiber(g)'].mean(),
            'Avg_Vitamin_C': category_foods['Vitamin C(mg)'].mean(),
            'Avg_Health_Score': category_foods['Health_Score'].mean()
        }
        
        return analysis


def main():
    data = {
        'Food': ['Apple', 'Banana', 'Chicken Breast', 'Salmon', 'Broccoli', 'Almonds', 
                'Brown Rice', 'Milk', 'Lentils', 'Avocado'],
        'Category': ['Fruit', 'Fruit', 'Protein', 'Protein', 'Vegetable', 'Protein', 
                    'Grain', 'Dairy', 'Protein', 'Fruit'],
        'Calories': [95, 105, 165, 208, 31, 164, 216, 122, 230, 320],
        'Protein(g)': [0.5, 1.3, 31, 20, 2.6, 6, 5, 8, 18, 4],
        'Carbs(g)': [25, 27, 0, 0, 6, 6, 45, 12, 40, 17],
        'Fat(g)': [0.3, 0.4, 3.6, 13, 0.3, 14, 1.8, 5, 1, 29],
        'Fiber(g)': [4.4, 3.1, 0, 0, 2.4, 3.5, 3.5, 0, 16, 13],
        'Sugar(g)': [19, 14, 0, 0, 1.5, 1.2, 0.7, 12, 4, 1.3],
        'Vitamin C(mg)': [8.4, 10.3, 0, 0, 81, 0, 0, 0, 3, 20]
    }
    
    df = pd.DataFrame(data)
    
    categorizer = FoodCategorizer(df=df)
    
    categorized_foods = categorizer.categorize_all_foods()
    
    print("FOOD CATEGORIZATION RESULTS")
    print("\nCategorized Foods:")
    print(categorized_foods[['Food', 'Category', 'Nutrient_Categories', 'Macronutrient_Profile', 'Health_Score', 'Health_Category']].to_string(index=False))
    
    print("\nEXAMPLE QUERIES")
    
    high_protein_foods = categorizer.get_foods_by_category('Nutrient_Categories', 'High-Protein')
    print(f"\nHigh-Protein Foods ({len(high_protein_foods)} found):")
    print(high_protein_foods[['Food', 'Protein(g)', 'Health_Category']].to_string(index=False))
    
    carb_dominated = categorizer.get_foods_by_category('Macronutrient_Profile', 'Carbohydrates-Dominated')
    print(f"\nCarbohydrate-Dominated Foods ({len(carb_dominated)} found):")
    print(carb_dominated[['Food', 'Carbs(g)', 'Health_Category']].to_string(index=False))
    
    print("\nCATEGORY ANALYSIS")
    fruit_analysis = categorizer.analyze_category_nutrition('Original_Category', 'Fruit')
    print("Fruit Category Analysis:")
    for key, value in fruit_analysis.items():
        print(f"{key}: {value}")
    
    print(f"\nTop 5 Healthiest Foods:")
    healthiest = categorized_foods.nlargest(5, 'Health_Score')[['Food', 'Health_Score', 'Health_Category']]
    print(healthiest.to_string(index=False))

if __name__ == "__main__":
    main()

=== FOOD CATEGORIZATION RESULTS ===

Categorized Foods:
          Food  Category                                                                              Nutrient_Categories   Macronutrient_Profile  Health_Score Health_Category
         Apple     Fruit                Low-Protein, High-Carb, Low-Fat, Moderate-Fiber, Low-Vitamin-C, Low-Calorie, Lean Carbohydrates-Dominated         49.89         Average
        Banana     Fruit      Low-Protein, High-Carb, Low-Fat, Moderate-Fiber, Moderate-Vitamin-C, Moderate-Calorie, Lean Carbohydrates-Dominated         48.73         Average
Chicken Breast   Protein      High-Protein, Low-Carb, Moderate-Fat, Low-Fiber, Low-Vitamin-C, Moderate-Calorie, Low-Sugar       Protein-Dominated         63.20            Good
        Salmon   Protein          High-Protein, Low-Carb, High-Fat, Low-Fiber, Low-Vitamin-C, Moderate-Calorie, Low-Sugar       Protein-Dominated         58.50         Average
      Broccoli Vegetable     Low-Protein, Low-Carb, Low-Fat, Mod