## Data Preprocessing

In [1]:
# Library imports
import pandas as pd
import numpy as np
from pulp import *

In [2]:
# Importing and checking out the dataset
df = pd.read_csv("servings.csv")
df.head()

Unnamed: 0,Day,Group,Food Name,Amount,Energy (kcal),Alcohol (g),Caffeine (mg),Water (g),B1 (Thiamine) (mg),B2 (Riboflavin) (mg),...,Leucine (g),Lysine (g),Methionine (g),Phenylalanine (g),Protein (g),Threonine (g),Tryptophan (g),Tyrosine (g),Valine (g),Category
0,2024-03-21,Uncategorized,"Eggs, Cooked",3.00 large,232.5,0.0,0.0,111.93,0.1,0.77,...,1.61,1.36,0.59,1.0,18.87,0.91,0.23,0.77,1.15,Dairy and Egg Products
1,2024-03-21,Uncategorized,Argentina Corned Beef,1.00 can,180.0,,,,,,...,,,,,12.0,,,,,
2,2024-03-21,Uncategorized,"Quaker, Instant Oatmeal, Chocolate",32.00 g,129.94,,,,,,...,,,,,3.88,,,,,Breakfast Cereals
3,2024-03-21,Uncategorized,"Banana, Fresh","1.00 medium - 7"" to 7 7/8"" long",105.02,0.0,0.0,88.39,0.04,0.09,...,0.08,0.06,0.01,0.06,1.29,0.03,0.01,0.01,0.06,Fruits and Fruit Juices
4,2024-03-21,Uncategorized,C-Lium Fibre,1.00 Pack,30.0,,,,,,...,,,,,0.0,,,,,


In [3]:
# Group by 'Food Name' and collect unique 'Amount' for each group
grouped_df = df.groupby('Food Name')['Amount'].unique().reset_index()

# Expand the DataFrame so each unique 'Food Name' and 'Amount' is on a separate row
expanded_df = grouped_df.explode('Amount')

# Export the DataFrame to a CSV file
expanded_df.to_csv('grouped_food_names_amounts.csv')
expanded_df

Unnamed: 0,Food Name,Amount
0,"7-Eleven, Fresh, Breakfast Burger, Bacon, Egg ...",1.00 pack
1,"Apple, Fresh, With Skin","1.00 medium - 3"" diameter"
1,"Apple, Fresh, With Skin","0.25 large - 3 1/4"" diameter"
1,"Apple, Fresh, With Skin","0.75 large - 3 1/4"" diameter"
1,"Apple, Fresh, With Skin","1.00 large - 3 1/4"" diameter"
...,...,...
79,"Turo-turo Gourmet, Sisig Fully Cooked Stir Fri...",2.00 serving
80,"White Rice, Steamed",1.00 cup
80,"White Rice, Steamed",2.00 cup
80,"White Rice, Steamed",3.00 cup


In [4]:
df_cost = pd.read_csv("cost.csv").dropna()
df_cost.head()

Unnamed: 0,Food Name,Amount,Price
0,"7-Eleven, Fresh, Breakfast Burger, Bacon, Egg ...",1.00 pack,125.0
1,"Apple, Fresh, With Skin","1.00 medium - 3"" diameter",40.0
2,"Apple, Fresh, With Skin","0.25 large - 3 1/4"" diameter",20.0
3,"Apple, Fresh, With Skin","0.75 large - 3 1/4"" diameter",32.5
4,"Apple, Fresh, With Skin","1.00 large - 3 1/4"" diameter",47.5


In [5]:
merged_df = pd.merge(df, df_cost, on=['Food Name', 'Amount'], how='inner')

specified_columns = ['Food Name', 'Amount', 'Energy (kcal)', 'Fiber (g)', 'Protein (g)', 'Price']
final_df = merged_df[specified_columns].drop_duplicates()
final_df.fillna(0, inplace=True)
final_df.head()

Unnamed: 0,Food Name,Amount,Energy (kcal),Fiber (g),Protein (g),Price
0,"Eggs, Cooked",3.00 large,232.5,0.0,18.87,30.0
1,Argentina Corned Beef,1.00 can,180.0,2.0,12.0,35.0
2,"Quaker, Instant Oatmeal, Chocolate",32.00 g,129.94,2.72,3.88,30.0
3,"Banana, Fresh","1.00 medium - 7"" to 7 7/8"" long",105.02,3.07,1.29,12.5
4,C-Lium Fibre,1.00 Pack,30.0,5.0,0.0,23.0


In [6]:
# Concatenate Amount into Food Name
final_df['Food Name'] = final_df['Food Name'] + ' ' + final_df['Amount'].astype(str)
food_names = final_df['Food Name'].tolist()

# Create dictionaries for 'Energy', 'Fiber', 'Protein', and 'Price'
energy_dict = final_df.set_index('Food Name')['Energy (kcal)'].to_dict()
fiber_dict = final_df.set_index('Food Name')['Fiber (g)'].to_dict()
fiber_dict['Gardenia, High Fiber Wheat Raisin Loaf 1.00 Slice'] = 3
fiber_dict['Gardenia, High Fiber Wheat Raisin Loaf 2.00 Slice'] = 6
protein_dict = final_df.set_index('Food Name')['Protein (g)'].to_dict()
price_dict = final_df.set_index('Food Name')['Price'].to_dict()

# Display the results
print("Food Names Array:", food_names)
print("Energy Dictionary:", energy_dict)
print("Fiber Dictionary:", fiber_dict)
print("Protein Dictionary:", protein_dict)
print("Price Dictionary:", price_dict)

Food Names Array: ['Eggs, Cooked 3.00 large', 'Argentina Corned Beef 1.00 can', 'Quaker, Instant Oatmeal, Chocolate 32.00 g', 'Banana, Fresh 1.00 medium - 7" to 7 7/8" long', 'C-Lium Fibre 1.00 Pack', 'Gardenia, High Fiber Wheat Raisin Loaf 2.00 Slice', 'Peanut Butter, Regular, Salted 1.00 tbsp', 'Hosen Baked Beans 1.00 can', 'Gardenia, High Fiber Wheat Raisin Loaf 1.00 Slice', 'Chicken Nuggets or Sticks, Store Bought 3.00 nugget', 'Pancake, Plain or Buttermilk, Homemade 3.00 each - 4" diameter', 'Collagen Vit C Zinc 1.00  capsule', 'Beef Steak, Sirloin, No Visible Fat Eaten 100.00 g', 'White Rice, Steamed 1.00 cup', 'Quaker, Oaties Mini Oat Cookies, Honey Nuts 1.00 Package', 'Smart C 1.00 x 350.0 ml', 'Bacon, Pork 8.00 slice - 6" long', 'Chicken with Gravy 1.00 cup', 'Nescafe, Instant Coffee  1.00 tsp', 'Century Tuna, Flakes in Oil 2.00 serving', 'Chicken patty, frozen, cooked 4.00 patty', 'Hamburger Bun, White 2.00 medium - 3 1/2" diameter', 'Kalbe, Fitbar, Chocolate 1.00 bar', 'Sard

## Optimization

In [7]:
# Set variables
min_protein = 120
min_fiber = 40
max_energy = 1500

# Just read the case study at https://coin-or.github.io/pulp/CaseStudies/a_blending_problem.html. They explain it way better than I ever could.
prob = LpProblem("Meal Optimization", LpMinimize)
food_vars = LpVariable.dicts("Food", food_names, 0)
prob += (
    lpSum([price_dict[i] * food_vars[i] for i in food_names]),
    "Total Cost of Food daily",
)
prob += (
    lpSum([energy_dict[i] * food_vars[i] for i in food_names]) <= max_energy,
    "EnergyRequirement",
)
prob += (
    lpSum([fiber_dict[i] * food_vars[i] for i in food_names]) >= min_fiber,
    "FiberRequirement",
)
prob += (
    lpSum([protein_dict[i] * food_vars[i] for i in food_names]) >= min_protein,
    "ProteinRequirement",
)
prob.writeLP("MealOptimization.lp")
prob.solve()
print("Status:", LpStatus[prob.status])
for v in prob.variables():
    if v.varValue > 0:
        print(v.name, "=", v.varValue)
print("Total Cost of Food per day = ", value(prob.objective))




Status: Optimal
Food_Chicken_Breast,_Skin_Removed_Before_Cooking_100.00_g = 2.6869179
Food_Mung_Beans,_Cooked_from_Dried_100.00_g = 5.2631579
Total Cost of Food per day =  138.391596
