# Import Libraries & Load Data

In [33]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from PIL import Image
import io

In [34]:
# # Mount Google Drive
from google.colab import drive
# drive.mount('/content/drive')

save_path = r'/content/drive/My Drive/research/unzipped'
image_df = pd.read_pickle(os.path.join(save_path, 'dish_images.pkl'))
dishes = pd.read_excel(os.path.join(save_path, 'dishes.xlsx'))
dish_ingredients = pd.read_excel(os.path.join(save_path, 'dish_ingredients.xlsx'))
ingredients = pd.read_excel(os.path.join(save_path, 'ingredients.xlsx'))

In [35]:
import os

directory_path = '/content/drive/My Drive/research/unzipped'

# Check if the directory exists
if os.path.exists(directory_path):
    # List the contents of the directory
    print(f"Contents of {directory_path}:")
    for item in os.listdir(directory_path):
        print(item)
else:
    print(f"Directory not found: {directory_path}")

Contents of /content/drive/My Drive/research/unzipped:
dish_images.pkl
dish_ingredients.xlsx
dishes.xlsx
ingredients.xlsx


# Merge Nutritional Information

In [37]:
image_df = pd.merge(image_df, dishes, left_on='dish', right_on='dish_id', how='left').drop('dish_id', axis=1)
print("image_df updated with nutritional information. Displaying info to verify:")
image_df.info()

image_df updated with nutritional information. Displaying info to verify:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3490 entries, 0 to 3489
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   dish            3490 non-null   object 
 1   rgb_image       3490 non-null   object 
 2   depth_image     3490 non-null   object 
 3   total_mass      3490 non-null   float64
 4   total_calories  3490 non-null   int64  
 5   total_fat       3490 non-null   float64
 6   total_carb      3490 non-null   float64
 7   total_protein   3490 non-null   float64
dtypes: float64(4), int64(1), object(3)
memory usage: 218.3+ KB


# Convert Nutritional Columns to Numeric

In [40]:
image_df = pd.merge(image_df, dishes, left_on='dish', right_on='dish_id', how='left').drop('dish_id', axis=1)
nutritional_columns = ['total_mass', 'total_calories', 'total_fat', 'total_carb', 'total_protein']
for col in nutritional_columns:
    image_df[col] = pd.to_numeric(image_df[col], errors='coerce')

print("Nutritional columns converted to numeric types. Displaying info to verify:")
image_df[nutritional_columns].info()

Nutritional columns converted to numeric types. Displaying info to verify:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3490 entries, 0 to 3489
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   total_mass      3490 non-null   float64
 1   total_calories  3490 non-null   int64  
 2   total_fat       3490 non-null   float64
 3   total_carb      3490 non-null   float64
 4   total_protein   3490 non-null   float64
dtypes: float64(4), int64(1)
memory usage: 136.5 KB


In [41]:
image_df['calories_from_fat'] = image_df['total_fat'] * 9
image_df['calories_from_carb'] = image_df['total_carb'] * 4
image_df['calories_from_protein'] = image_df['total_protein'] * 4

# Calculate percentage of calories from each macronutrient, handling division by zero
image_df['fat_pc'] = (image_df['calories_from_fat'] / image_df['total_calories']).fillna(0) * 100
image_df['carb_pc'] = (image_df['calories_from_carb'] / image_df['total_calories']).fillna(0) * 100
image_df['protein_pc'] = (image_df['calories_from_protein'] / image_df['total_calories']).fillna(0) * 100

# Replace any inf values (if total_calories was zero and macro calories were non-zero) with 0
image_df.replace([float('inf'), -float('inf')], 0, inplace=True)

print("Calculated calories from macronutrients and their percentages. Displaying first few rows with new columns:")
print(image_df[['dish', 'total_calories', 'calories_from_fat', 'calories_from_carb', 'calories_from_protein', 'fat_pc', 'carb_pc', 'protein_pc']].head())

Calculated calories from macronutrients and their percentages. Displaying first few rows with new columns:
              dish  total_calories  calories_from_fat  calories_from_carb  \
0  dish_1559243887              50         222.750000           45.400000   
1  dish_1563568319             347         259.064874          212.594528   
2  dish_1558641007              57           1.026000           22.800000   
3  dish_1558109945             143           2.574000            1.716000   
4  dish_1563216412             205           6.426000          146.440004   

   calories_from_protein      fat_pc    carb_pc  protein_pc  
0              44.800004  445.500000  90.800000   89.600008  
1             150.904100   74.658465  61.266435   43.488213  
2               2.052000    1.800000  40.000000    3.600000  
3              80.080000    1.800000   1.200000   56.000000  
4              20.952000    3.134634  71.434148   10.220488  


In [42]:
dishes

Unnamed: 0,dish_id,total_mass,total_calories,total_fat,total_carb,total_protein
0,dish_1561662216,300.794281,193,12.387489,28.218290,18.633970
1,dish_1562688426,137.569992,88,8.256000,5.190000,10.297000
2,dish_1561662054,419.438782,292,23.838249,26.351543,25.910593
3,dish_1562008979,382.936646,290,22.224644,10.173570,35.345387
4,dish_1560455030,20.590000,103,0.148000,4.625000,0.956000
...,...,...,...,...,...,...
5001,dish_1571934465,0.000000,232,0.000000,0.000000,0.000000
5002,dish_1573073666,0.000000,15,0.000000,0.000000,0.000000
5003,dish_1575924356,0.000000,103,0.000000,0.000000,0.000000
5004,dish_1574359199,0.000000,329,0.000000,0.000000,0.000000


In [43]:
dish_ingredients

Unnamed: 0,dish_id,ingr_id,ingr_name,grams,calories,fat,carb,protein
0,dish_1561662216,ingr_0000000508,soy sauce,3.398568,1.801241,0.020391,0.166530,0.275284
1,dish_1561662216,ingr_0000000122,garlic,2.124105,3.164916,0.010621,0.700955,0.135943
2,dish_1561662216,ingr_0000000026,white rice,8.496420,11.045346,0.025489,2.378998,0.229403
3,dish_1561662216,ingr_0000000524,parsley,0.213397,0.079170,0.001707,0.013657,0.006189
4,dish_1561662216,ingr_0000000094,onions,1.707173,0.682869,0.001707,0.153646,0.018779
...,...,...,...,...,...,...,...,...
28450,dish_1576171157,ingr_0000000077,almonds,0.294420,1.702042,0.145738,0.066833,0.065950
28451,dish_1576171157,ingr_0000000161,olive oil,1.430858,12.648785,1.430858,0.000000,0.000000
28452,dish_1576171157,ingr_0000000203,chili,0.953905,0.381562,0.003816,0.084898,0.017170
28453,dish_1576171157,ingr_0000000291,salt,0.008412,0.000000,0.000000,0.000000,0.000000


In [44]:
%%capture
!pip install pulp
print("PuLP library installed.")

# Define ILP Dish Selection Function

In [45]:
from pulp import LpProblem, LpMinimize, LpVariable, lpSum, LpStatus

def select_dishes_with_ilp(daily_calorie_target, num_meals, ilp_dishes_data):
    """
    Selects a set of dishes for all meals in a day using Integer Linear Programming
    to minimize deviation from the overall daily calorie target.

    Args:
        daily_calorie_target (int): The target total daily calorie count.
        num_meals (int): The number of meals to plan for.
        ilp_dishes_data (pd.DataFrame): DataFrame with 'dish' and 'total_calories'.

    Returns:
        list: A list of dictionaries, where each dictionary represents a selected dish
              and contains its 'dish' ID and 'total_calories'.
    """

    # Initialize the ILP problem for minimization
    prob = LpProblem("Daily Meal Plan", LpMinimize)

    # Create LpVariable for each dish: 1 if selected, 0 otherwise
    dish_selection_vars = LpVariable.dicts(
        "SelectDish", ilp_dishes_data['dish'].tolist(), 0, 1, 'Binary'
    )

    # Objective Function Variables (to minimize absolute deviation)
    # We want to minimize |sum(calories) - daily_calorie_target|
    # This is achieved by introducing two non-negative variables: over_target and under_target
    over_target = LpVariable("OverTarget", 0)
    under_target = LpVariable("UnderTarget", 0)

    # Objective: Minimize the sum of deviations from the target
    prob += over_target + under_target, "Minimize Calorie Deviation"

    # Constraint 1: Exactly num_meals dishes must be selected
    prob += lpSum(dish_selection_vars[dish_id] for dish_id in ilp_dishes_data['dish']) == num_meals, \
            "SelectExactlyNumMeals"

    # Constraint 2: Link calorie sum with deviations and daily_calorie_target
    # Sum of selected dish calories - daily_calorie_target = over_target - under_target
    total_calories_selected = lpSum(
        ilp_dishes_data.loc[ilp_dishes_data['dish'] == dish_id, 'total_calories'].iloc[0] *
        dish_selection_vars[dish_id] for dish_id in ilp_dishes_data['dish']
    )

    prob += total_calories_selected - daily_calorie_target == over_target - under_target, \
            "CalorieBalance"

    # Solve the problem
    prob.solve()

    selected_dishes = []
    if LpStatus[prob.status] == 'Optimal' or LpStatus[prob.status] == 'Feasible':
        for dish_id in ilp_dishes_data['dish']:
            if dish_selection_vars[dish_id].varValue == 1:
                calories = ilp_dishes_data.loc[ilp_dishes_data['dish'] == dish_id, 'total_calories'].iloc[0]
                selected_dishes.append({'dish': dish_id, 'total_calories': calories})
    else:
        print(f"No optimal solution found. Status: {LpStatus[prob.status]}")

    return selected_dishes

print("The `select_dishes_with_ilp` function has been defined using PuLP.")


The `select_dishes_with_ilp` function has been defined using PuLP.


# Creating Test Set

In [46]:
test_daily_calorie_targets = list(range(2000, 3001, 5))

print(f"First 5 elements of test_daily_calorie_targets: {test_daily_calorie_targets[:5]}")
print(f"Last 5 elements of test_daily_calorie_targets: {test_daily_calorie_targets[-5:]}")

First 5 elements of test_daily_calorie_targets: [2000, 2005, 2010, 2015, 2020]
Last 5 elements of test_daily_calorie_targets: [2980, 2985, 2990, 2995, 3000]


# Prepare for ILP Evaluation

In [47]:
num_meals = 3
def evaluate_ilp_meal_plan(daily_calorie_target):
    """
    Runs the ILP meal planning algorithm for a given daily calorie target
    and returns the actual total calories of the generated plan.

    Args:
        daily_calorie_target (int): The target daily calorie count.

    Returns:
        float: The actual total calories of the generated meal plan.
    """
    # Call the ILP dish selection function
    ilp_meal_plan_evaluation = select_dishes_with_ilp(
        daily_calorie_target,
        num_meals, # num_meals is a global variable from previous steps
        ilp_dishes_data.copy() # Use a copy to ensure fresh data for each evaluation
    )

    # Calculate total calories for the generated plan
    actual_total_calories = sum(meal['total_calories'] for meal in ilp_meal_plan_evaluation)

    return actual_total_calories

print("The `evaluate_ilp_meal_plan` function has been defined.")

The `evaluate_ilp_meal_plan` function has been defined.


In [48]:
available_dishes = image_df[image_df['total_calories'] > 0].copy()

print(f"Original image_df shape: {image_df.shape}")
print(f"Available dishes (excluding zero-calorie dishes) shape: {available_dishes.shape}")
print("First 5 rows of available_dishes:")
print(available_dishes.head())

Original image_df shape: (3490, 24)
Available dishes (excluding zero-calorie dishes) shape: (3490, 24)
First 5 rows of available_dishes:
              dish                                          rgb_image  \
0  dish_1559243887  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...   
1  dish_1563568319  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...   
2  dish_1558641007  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...   
3  dish_1558109945  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...   
4  dish_1563216412  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...   

                                         depth_image  total_mass_x  \
0  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...    289.050018   
1  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...    604.323303   
2  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...     23.369999   
3  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...     74.360001   
4  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...    172.449982   

   

In [49]:
feature_matrix = available_dishes[['total_calories']]

available_dishes = available_dishes.reset_index(drop=True)

print("Feature matrix created with 'total_calories':")
print(feature_matrix.head())
print("\nAvailable dishes DataFrame re-indexed. Displaying info to verify:")
print(available_dishes.info())

Feature matrix created with 'total_calories':
   total_calories
0              50
1             347
2              57
3             143
4             205

Available dishes DataFrame re-indexed. Displaying info to verify:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3490 entries, 0 to 3489
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   dish                   3490 non-null   object 
 1   rgb_image              3490 non-null   object 
 2   depth_image            3490 non-null   object 
 3   total_mass_x           3490 non-null   float64
 4   total_calories_x       3490 non-null   int64  
 5   total_fat_x            3490 non-null   float64
 6   total_carb_x           3490 non-null   float64
 7   total_protein_x        3490 non-null   float64
 8   total_mass_y           3490 non-null   float64
 9   total_calories_y       3490 non-null   int64  
 10  total_fat_y            3490 non-null   floa

In [50]:
ilp_dishes_data = available_dishes[['dish', 'total_calories']].copy()

print("Head of ilp_dishes_data DataFrame:")
print(ilp_dishes_data.head())

print("\nInfo of ilp_dishes_data DataFrame to verify structure and data types:")
ilp_dishes_data.info()

Head of ilp_dishes_data DataFrame:
              dish  total_calories
0  dish_1559243887              50
1  dish_1563568319             347
2  dish_1558641007              57
3  dish_1558109945             143
4  dish_1563216412             205

Info of ilp_dishes_data DataFrame to verify structure and data types:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3490 entries, 0 to 3489
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   dish            3490 non-null   object
 1   total_calories  3490 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 54.7+ KB


# Run ILP Simulations and Evaluate Accuracy

In [51]:
ilp_results = []
for target in test_daily_calorie_targets:
    actual_calories = evaluate_ilp_meal_plan(target)
    ilp_results.append({'target_calories': target, 'actual_calories': actual_calories})

print(f"Generated ILP results for {len(ilp_results)} daily calorie targets. Displaying first 5 results:")
print(ilp_results[:5])



Generated ILP results for 201 daily calorie targets. Displaying first 5 results:
[{'target_calories': 2000, 'actual_calories': np.int64(2000)}, {'target_calories': 2005, 'actual_calories': np.int64(2005)}, {'target_calories': 2010, 'actual_calories': np.int64(2010)}, {'target_calories': 2015, 'actual_calories': np.int64(2015)}, {'target_calories': 2020, 'actual_calories': np.int64(2020)}]


In [52]:
import warnings

ilp_results = []
for target in test_daily_calorie_targets:
    # Temporarily suppress the PuLP UserWarning about spaces in names
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)
        actual_calories = evaluate_ilp_meal_plan(target)
    ilp_results.append({'target_calories': target, 'actual_calories': actual_calories})

print(f"Generated ILP results for {len(ilp_results)} daily calorie targets. Displaying first 5 results:")
print(ilp_results[:5])

Generated ILP results for 201 daily calorie targets. Displaying first 5 results:
[{'target_calories': 2000, 'actual_calories': np.int64(2000)}, {'target_calories': 2005, 'actual_calories': np.int64(2005)}, {'target_calories': 2010, 'actual_calories': np.int64(2010)}, {'target_calories': 2015, 'actual_calories': np.int64(2015)}, {'target_calories': 2020, 'actual_calories': np.int64(2020)}]


In [53]:
ilp_within_tolerance_count = 0
total_ilp_plans = len(ilp_results)

for result in ilp_results:
    target = result['target_calories']
    actual = result['actual_calories']
    lower_bound = target
    upper_bound = target

    if lower_bound <= actual <= upper_bound:
        ilp_within_tolerance_count += 1

percentage_ilp_within_tolerance = (ilp_within_tolerance_count / total_ilp_plans) * 100

print(f"--- ILP Meal Plan Accuracy Summary (Range {test_daily_calorie_targets[0]}-{test_daily_calorie_targets[-1]} kcal) ---")
print(f"Total plans evaluated: {total_ilp_plans}")
print(f"Meal Plans with actual calories within +/-0% of target: {ilp_within_tolerance_count}")
print(f"Percentage of plans within +/-0% tolerance: {percentage_ilp_within_tolerance:.2f}%\n")


--- ILP Meal Plan Accuracy Summary (Range 2000-3000 kcal) ---
Total plans evaluated: 201
Meal Plans with actual calories within +/-0% of target: 170
Percentage of plans within +/-0% tolerance: 84.58%



# Predicting meal Plan for daily_calorie_target = 2000

In [54]:
daily_calorie_target = 2000
ilp_meal_plan = select_dishes_with_ilp(daily_calorie_target, num_meals, ilp_dishes_data)

print("ILP Generated Meal Plan (Dish IDs and Calories):")
for meal in ilp_meal_plan:
    print(f"  Dish ID: {meal['dish']}, Calories: {meal['total_calories']:.1f} kcal")



ILP Generated Meal Plan (Dish IDs and Calories):
  Dish ID: dish_1566402232, Calories: 841.0 kcal
  Dish ID: dish_1561739805, Calories: 1102.0 kcal
  Dish ID: dish_1559245920, Calories: 57.0 kcal


In [55]:
full_ilp_meal_plan_details = []

# Get all details for the selected dishes by merging with the original image_df
# Use 'dish' to merge, which is the dish ID
ilp_meal_plan_df = pd.DataFrame(ilp_meal_plan)
merged_ilp_dishes = pd.merge(ilp_meal_plan_df, image_df, on='dish', how='left', suffixes=('_ilp', ''))

for index, meal_detail in merged_ilp_dishes.iterrows():
    dish_id = meal_detail['dish']

    # Filter dish_ingredients for the current dish_id to get ingredient names
    ingredients_for_dish = dish_ingredients[dish_ingredients['dish_id'] == dish_id]['ingr_name'].tolist()

    # Convert the pandas Series to a dictionary for easier modification
    meal_dict = meal_detail.to_dict()
    meal_dict['ingredients_list'] = ingredients_for_dish

    full_ilp_meal_plan_details.append(meal_dict)

print("Full ILP Meal Plan Details with Ingredients and Nutrition:")
for i, meal_detail in enumerate(full_ilp_meal_plan_details):
    print(f"Meal {i+1} (Dish ID: {meal_detail['dish']}):")
    print(f"  Calories: {meal_detail['total_calories']:.1f} kcal")
    print(f"  Ingredients: {', '.join(meal_detail['ingredients_list'])}")
    print(f"  Fat: {meal_detail['total_fat']:.1f}g | Carbs: {meal_detail['total_carb']:.1f}g | Protein: {meal_detail['total_protein']:.1f}g\n")

Full ILP Meal Plan Details with Ingredients and Nutrition:
Meal 1 (Dish ID: dish_1566402232):
  Calories: 841.0 kcal
  Ingredients: watermelon, cantaloupe, salsa, egg whites, yam, berries, sweet potato, olive oil
  Fat: 9.0g | Carbs: 54.1g | Protein: 48.9g

Meal 2 (Dish ID: dish_1561739805):
  Calories: 1102.0 kcal
  Ingredients: raspberries, broccoli, berries, sweet potato, scrambled eggs, cantaloupe, vinaigrette, mixed greens, blackberries, granola
  Fat: 23.3g | Carbs: 101.3g | Protein: 29.1g

Meal 3 (Dish ID: dish_1559245920):
  Calories: 57.0 kcal
  Ingredients: cauliflower
  Fat: 0.2g | Carbs: 2.9g | Protein: 1.0g



In [56]:
print("\n--- Daily Meal Plan (ILP) ---")
for i, meal_detail in enumerate(full_ilp_meal_plan_details):
    print(f"Meal {i+1} (Dish ID: {meal_detail['dish']}):")
    print(f"  Calories: {meal_detail['total_calories']:.1f} kcal")
    print(f"  Ingredients: {', '.join(meal_detail['ingredients_list'])}")
    print(f"  Fat: {meal_detail['total_fat']:.1f}g ({meal_detail['fat_pc']:.1f}%) | Carbs: {meal_detail['total_carb']:.1f}g ({meal_detail['carb_pc']:.1f}%) | Protein: {meal_detail['total_protein']:.1f}g ({meal_detail['protein_pc']:.1f}%)\n")

# Calculate daily totals for the ILP plan
total_ilp_plan_calories = sum(meal['total_calories'] for meal in full_ilp_meal_plan_details)
total_ilp_plan_fat = sum(meal['total_fat'] for meal in full_ilp_meal_plan_details)
total_ilp_plan_carb = sum(meal['total_carb'] for meal in full_ilp_meal_plan_details)
total_ilp_plan_protein = sum(meal['total_protein'] for meal in full_ilp_meal_plan_details)

print("\n--- Daily Summary (ILP) ---")
print(f"Target Daily Calories: {daily_calorie_target:.1f} kcal")
print(f"Actual Plan Calories (ILP): {total_ilp_plan_calories:.1f} kcal\n")

print("Macronutrient Breakdown (ILP Plan):")

# Calculate actual macronutrient percentages for the ILP meal plan
if total_ilp_plan_calories > 0:
    actual_ilp_fat_pc = (total_ilp_plan_fat * 9 / total_ilp_plan_calories) * 100
    actual_ilp_carb_pc = (total_ilp_plan_carb * 4 / total_ilp_plan_calories) * 100
    actual_ilp_protein_pc = (total_ilp_plan_protein * 4 / total_ilp_plan_calories) * 100
else:
    actual_ilp_fat_pc = 0
    actual_ilp_carb_pc = 0
    actual_ilp_protein_pc = 0

print(f"  Fat: Actual {actual_ilp_fat_pc:.1f}% ({total_ilp_plan_fat:.1f}g)")
print(f"  Carbs: Actual {actual_ilp_carb_pc:.1f}% ({total_ilp_plan_carb:.1f}g)")
print(f"  Protein: Actual {actual_ilp_protein_pc:.1f}% ({total_ilp_plan_protein:.1f}g)")


--- Daily Meal Plan (ILP) ---
Meal 1 (Dish ID: dish_1566402232):
  Calories: 841.0 kcal
  Ingredients: watermelon, cantaloupe, salsa, egg whites, yam, berries, sweet potato, olive oil
  Fat: 9.0g (9.6%) | Carbs: 54.1g (25.7%) | Protein: 48.9g (23.2%)

Meal 2 (Dish ID: dish_1561739805):
  Calories: 1102.0 kcal
  Ingredients: raspberries, broccoli, berries, sweet potato, scrambled eggs, cantaloupe, vinaigrette, mixed greens, blackberries, granola
  Fat: 23.3g (19.1%) | Carbs: 101.3g (36.8%) | Protein: 29.1g (10.6%)

Meal 3 (Dish ID: dish_1559245920):
  Calories: 57.0 kcal
  Ingredients: cauliflower
  Fat: 0.2g (2.7%) | Carbs: 2.9g (20.0%) | Protein: 1.0g (7.2%)


--- Daily Summary (ILP) ---
Target Daily Calories: 2000.0 kcal
Actual Plan Calories (ILP): 2000.0 kcal

Macronutrient Breakdown (ILP Plan):
  Fat: Actual 14.6% (32.5g)
  Carbs: Actual 31.6% (158.2g)
  Protein: Actual 15.8% (79.0g)


### Notebook Steps Overview

This notebook processes food image and nutritional data to create meal plans using Integer Linear Programming (ILP). Here's a step-by-step breakdown of what has been done:

1.  **Import Libraries : Essential Python libraries like `pandas` for data manipulation, `os` for file paths, `matplotlib.pyplot` for plotting, `PIL.Image` for image handling, and `io` for byte streams were imported.

2.  **Load Data : DataFrames were loaded from pickled and Excel files located in a specified Google Drive path. This includes `image_df` (containing dish image data), `dishes` (nutritional information for dishes), `dish_ingredients` (linking dishes to ingredients), and `ingredients` (details about individual ingredients).

3.  **Verify Data Loading : The contents of the `unzipped` directory were listed to confirm files were accessible. The `dishes` and `dish_ingredients` DataFrames were displayed to inspect their initial structure.

4.  **Merge Nutritional Information : The `image_df` was merged with the `dishes` DataFrame based on `dish_id` to integrate nutritional values directly into the image data. The `image_df.info()` was displayed to verify the merge.

5.  **Convert Nutritional Columns to Numeric : Key nutritional columns (`total_mass`, `total_calories`, `total_fat`, `total_carb`, `total_protein`) in `image_df` were converted to numeric types, handling potential conversion errors by coercing them to `NaN` (which would typically be filled or dropped later, though not explicitly shown in this snippet). The `info()` method was used to confirm the data types.

6.  **Calculate Macronutrient Percentages : Calories derived from fat, carbohydrates, and protein were calculated. Subsequently, the percentage of total calories from each macronutrient (`fat_pc`, `carb_pc`, `protein_pc`) was computed, with safeguards for division by zero.

7.  **Install PuLP : The `pulp` library, a Python package for linear programming, was installed to facilitate Integer Linear Programming (ILP) for meal planning.

8.  **Define ILP Dish Selection Function : A Python function `select_dishes_with_ilp` was defined. This function uses PuLP to select a specified number of dishes from a given dataset to minimize the deviation from a daily calorie target.

9.  **Prepare for ILP Evaluation :
    *   `test_daily_calorie_targets` was created, a list of calorie targets for evaluation.
    *   `evaluate_ilp_meal_plan` function was defined to run the ILP solver for a given target and return the actual calories.
    *   `available_dishes` was created by filtering `image_df` for dishes with `total_calories > 0`.
    *   A `feature_matrix` was created with `total_calories`, and `available_dishes` was re-indexed.
    *   `ilp_dishes_data` was prepared as a subset of `available_dishes` containing only `dish` ID and `total_calories`, which is used by the ILP solver.

10. **Run ILP Simulations and Evaluate Accuracy :
    *   The `select_dishes_with_ilp` function was run for each `daily_calorie_target` in `test_daily_calorie_targets` to generate `ilp_results`.
    *   A `UserWarning` from PuLP was suppressed during this loop.
    *   The accuracy of the ILP plans was evaluated by checking how many generated plans matched their target calories exactly.

11. **Generate and Display a Specific ILP Meal Plan :
    *   An ILP meal plan was generated for a specific `daily_calorie_target` (e.g., 2000 kcal).
    *   The selected dishes were merged with `image_df` and `dish_ingredients` to gather full nutritional details and ingredient lists.
    *   The full details of the generated meal plan, including macronutrient breakdowns and ingredient lists for each dish, were printed. A daily summary with target vs. actual calories and overall macronutrient percentages was also displayed.

### AI Approach: Integer Linear Programming (ILP) for Meal Planning

The approach implemented in this notebook for creating meal plans is **Integer Linear Programming (ILP)**. ILP is a mathematical optimization technique used in AI to find the best possible outcome (maximum or minimum) of a mathematical model whose requirements are represented by linear relationships, and where some or all of the variables are restricted to be integers.

In the context of meal planning, here's how it's applied:

1.  **Objective**: The primary objective is to select a specific number of dishes (`num_meals`) such that the *total calorie count* of the selected dishes is as close as possible to a `daily_calorie_target`. This is formulated as minimizing the absolute deviation from the target.

2.  **Variables**: For each available dish, an **integer variable** (specifically, a binary variable) is created. This variable can take a value of 1 if the dish is selected for the meal plan, and 0 if it is not.

3.  **Constraints**: Constraints are defined to guide the selection process:
    *   **Number of Meals**: A constraint ensures that exactly `num_meals` are selected in total.
    *   **Calorie Balance**: This constraint links the sum of calories from the selected dishes to the `daily_calorie_target`, using auxiliary variables to capture any over-target or under-target calorie deviation.

4.  **Solver**: The `PuLP` library is used to define and solve this ILP problem. The solver explores various combinations of dishes, respecting the defined constraints, to find an optimal solution that minimizes the calorie deviation from the target.

This method allows for a systematic and mathematically rigorous way to create meal plans that adhere to specific nutritional goals, making it a powerful tool for dietary management and food recommendation systems.