### Imports

In [1]:
import gurobipy as gp
import pandas as pd
import os

### Functions

In [2]:
def vprint_factory(verbose: bool = False):
    if verbose:
        return print
    else:
        return lambda *x, **y: None

In [3]:
def extract(data, row, fact, maximize=True):
    text = str(data.iloc[row][fact]).strip("mg%?")
    if str(text) == "nan" or len(text) == 0:
        return maximize * 1_000_000
    return float(text)

In [4]:
def classify_age(a: int, year=2020) -> str:
    a = round(a, 0)
    age_strings_2015 = ["1-3", "4-8", "9-13", "14-18", "19-30", "31-50", "51+"]
    age_strings_2020 = ["2-3", "4-8", "9-13", "14-18", "19-30", "31-50", "51+"]
    age_groups = [tuple([int(age) for age in i.split("-")]) if "-" in i else (int(i.strip("+")), 110) for i in age_strings_2015]
    for i, age in enumerate(age_groups):
        if age[0] <= a <= age[1]:
            if year == 2020:
                return age_strings_2020[i]
            else:
                return age_strings_2015[i]
    else:
        raise ValueError(f"{a} is not a valid age in the {year} guidelines.")

### File Input

In [5]:
def load_ref_files(nutrition_name="Nutritional Facts - Categorized", guidelines_name="Dietary Guidelines", year=2020):
    df = pd.read_excel(nutrition_name + ".xlsm", index_col=0)
    guide = pd.read_excel(guidelines_name + ".xlsx", header=1,
                          index_col=[1, 2], skiprows=[2],
                          sheet_name=f"Dietary Guidelines {year}").drop("Unnamed: 0", axis=1)
    return df, guide

### Requirement Selection

In [6]:
def guide_lookup(gender: str, age: int, guide: pd.DataFrame, columns=[]):
    ff_nutrition_to_guidelines = {"Protein": "Protein (g)",
                                  "Vitamin A %": "Vitamin A (mcg RAEd)",
                                  "Sodium": "Sodium (mg)",
                                  "Total Carbohydrates": "Carbohydrate (g)",
                                  "Dietary Fiber": "Fiber (g)",
                                  "Calories": "Calorie Level Assessed", }
    guidelines_to_ff_nutrition = {value: key for key, value in ff_nutrition_to_guidelines.items()}
    guideline_kcals_to_ff_nutrition = {# "Total lipid (% kcal)": "Total Fat",
                                       "Added Sugars (% kcal)": "Sugars",
                                       "Saturated Fatty Acids (% kcal)": "Saturated Fat",
                                       "Calorie Level Assessed": "Calories From Fat"}
    genders = {"m": "Male", "f": "Female"}
    gender = genders[gender[0].lower()] # 'm' / 'M' / 'male' / 'Male' -> 'Male'
    # Selecting the appropriate Row
    filtered = guide.loc[gender, classify_age(age)]
    # Handling kcal measurements
    cal_level = filtered["Calorie Level Assessed"]
    nutrient_cals = {"Total Fat": 9, "Saturated Fat": 9, "Sugars": 4, "Calories From Fat": cal_level / 10}  # Cals from fat <= 10% cal_level
    for kcal_nutrient, out_name in guideline_kcals_to_ff_nutrition.items():
        new_entry = pd.Series(index=[out_name],
                              data=float(str(filtered[kcal_nutrient]).strip("<>").split("-")[-1]) / 100
                                         * cal_level / nutrient_cals[out_name])
        filtered = filtered.append(new_entry)
        filtered.drop(columns=kcal_nutrient, inplace=True)
    filtered.drop((col for col in filtered.index
                   if col not in guidelines_to_ff_nutrition.keys()
                   and col not in guideline_kcals_to_ff_nutrition.values()),
                  inplace=True)
    filtered.rename(guidelines_to_ff_nutrition, inplace=True) # I made this right at the end to potentially fix a problem, but I don't think it worked. Evaluate!
    return filtered[columns] if columns else filtered

### Model Building: Variables, Constraints, and Objective

In [97]:
def create_model(subset: pd.DataFrame, less_thans: list, guide: pd.DataFrame, costs: dict, filter_relaxations: set = {},
                 objective: str = "all", meals: int = 2, cat_limit: int = 0, min_cal_cutoff: int = 1, verbose: bool = False):
    """
    Generates the Gurobi model according to many available filters and options, discussed below
    
    :subset: menu items to consider, frequently broken down by restaurant
    :less_thans: list of nutrients in guide where the sum of food nutritions must be less than the guideline (such as sodium) rather than more (protein)
    :guide: dietary recommendations taken from the government's '20-'25 Dietary Guidelines For Americans.
    :costs: dictionary of penalties associated with exceeding / falling short of the recommendations. Must include objective nutrients
    :filter_relaxations: recommendations in guide which may be relaxed. TODO: currently un-implemented
    :objective: which nutrient to rank by. Defaults to multiobjective ('all')
    :meals: requires the solution to meet only (1 / meals) of each nutrient recommendation. Meals=1 & cat_limit=1 usually infeasible. Default: 2
    :cat_limit: requires no more than cat_limit of any one food type in the solution. Prevents 31 apple juice box solutions. Default: False (no limit)
    :min_cal_cutoff: when non-zero, omits foods from the solution with fewer calories than the cutoff. Default: 1
    :verbose: provides detailed constraining / solving progress updates. Disable for more concise output. Default: False
    """
    vprint = vprint_factory(verbose)
    
    if objective == "all" and objective in costs.keys():
        filter_relaxations.update(costs.keys())             
    
    m = gp.Model()
    xis = [m.addVar(vtype=gp.GRB.CONTINUOUS) for _ in subset.index]  # GRB.BINARY / GRB.INTEGER, whether to include a food in the meal
    f_rel = {fact: m.addVar(name=f"{fact}_rel") if fact in filter_relaxations else 0 for fact in guide.index}  # excess variable
    
    for fact, req in guide.items():  # Nutrition Requirements
        if objective != "all" and fact == objective:
            continue
        if fact in less_thans:
            vprint(f"Constraining {fact}".ljust(35), f"<= {round(req, 2)}".ljust(10), f"across {meals} meals")
            m.addConstr(sum((x * extract(subset, r, fact, maximize=True) for r, x in enumerate(xis))) - f_rel[fact] <= req / meals)
        else:
            vprint(f"Constraining {fact}".ljust(35), f">= {round(req, 2)}".ljust(10), f"across {meals} meals")
            m.addConstr(sum((x * extract(subset, r, fact, maximize=False) for r, x in enumerate(xis))) - f_rel[fact] >= req / meals)

    if min_cal_cutoff:
        for i, food in enumerate(subset.iterrows()):  # Excludes zero calorie (from fat) entries
            if extract(subset, i, "Calories From Fat", maximize=True) < min_cal_cutoff:
                m.addConstr(xis[i] == 0)  # If a food has fewer than the min_cal_cutoff, require 0 of it in the solution
                
    if cat_limit:
        vprint()
        for cat in pd.unique(subset["Common Category"]):
            vprint(f"Constraining only {cat_limit} or fewer {cat.strip('s')} items.")
            m.addConstr(sum((x for i, x in enumerate(xis) if subset["Common Category"][i] == cat)) <= cat_limit)
    
    m.setParam("OutputFlag", verbose)
    m.ModelSense = gp.GRB.MINIMIZE
    # overage_cost = [f_rel[fact] * costs[fact] if fact in costs else f_rel[fact] for fact in filters] TODO: Update to use filter_relaxations, when implementing
    # Note: The below line fails if any of the objectives are not less than constraints because the above variable convention is wrong
    # Essentially, the solver can improve the objective by increasing the deficit of a less than variable.
    # This cannot be solved by wrapping difference with an abs(), and I like the current convention.
    # The solution, which is not necessary at present, would be to change the convention and just check whether the relaxations
    # correspond to nutrients in less_thans when extracting solution information later.
    if objective == "all":
        m.setObjective(sum(difference * costs[nutrient]
                           if type(difference) is gp.Var else 0
                           for nutrient, difference in f_rel.items()))
    else:
        m.setObjective(sum((x * extract(subset, r, objective, maximize=False) for r, x in enumerate(xis))) / meals)
    return m

### Next Steps


### Status
#### Finished

* Understand why various alternate objectives aren't all feasible / infeasible together
* Finish single objective optimization (omitting actual meals, just assessing feasibility)
* Talk about appropriate Calorie / Sugar / Sodium Balancing
* Add ability to introspect any particular model to analyze food choices and relaxations
* Update guide to 2020-2025 recommendations
    * Add conversion dictionary to import function to allow various constraints from new dataset
* Constrain no more than 10% of the calories to come from fat

### Broken

### In Progress

* Simplify model: trim down to only constraints mentioned in paper
* Drop Vitamin A because so few foods have data

* 

### Next

* Work towards general metric capable of ranking restaurants against one another - do for each objective
* Write all optimal diets out a spreadsheet
* Individual rankings (splitting multiobjective)

### Future

* Consider adding other relaxations
* Get list of nutrients by which we are actually constraining
* Simplify model to obtain more feasible solutions (ignore vitamins) (use protein, fiber, calories, iron, sugars, sodium, etc.) - potentially unnecessary given recent improvements
* Work with C.C. to display and analyze selected menus
* Multicriteria optimization so that the sum of all three overages is minimized, rather than just one plus the overages of the other two

### Individual Analysis

In [101]:
res = "McDonald's"

costs = {"Sugars": 1, "Sodium": 50, "Calories From Fat": 4}   # Note units: Sugars (g) vs Sodium (mg)
results = pd.DataFrame(columns=["Overage Penalty"] + list(costs.keys()), index=[res])
subset = df.loc[df["Restaurant"] == res].reset_index(drop=True)

verbose = False
model_args = {"subset": subset, "less_thans": less_thans, "guide": filtered_guide, "filter_relaxations": set(),
              "objective": "Calories From Fat", "costs": costs, "meals": 1, "cat_limit": False, "min_cal_cutoff": 1, "verbose": verbose, }
m = create_model(**model_args)
m.optimize()

if m.status == 2:
    print(f"The {res} model is feasible!")
    results.loc[res, "Overage Penalty"] = m.ObjVal
    if objective == "all":
        results.loc[res, costs.keys()] = [m.getVarByName(name + "_rel").x for name in costs.keys()]
        choices = [True if var.x > 0 else False for var in m.getVars() if "_rel" not in var.varName]
        values = [var.x for var in m.getVars() if var.x > 0 and "_rel" not in var.varName]
    else:
        choices = [True if var.x > 0 else False for var in m.getVars()]
        values = [var.x for var in m.getVars() if var.x > 0]
    food_choice = subset.loc[choices, ["Food"] + columns]
    food_choice.loc[:, "Amount"] = values
    display(results)
    display(food_choice)
else:
    print(f"The {res} model is infeasible.\t\t<-----------!")
    # results.loc[res, ["Overage Penalty", "Sugars", "Sodium", "Calories From Fat"]] = "-"

The McDonald's model is feasible!


Unnamed: 0,Overage Penalty,Sugars,Sodium,Calories From Fat
McDonald's,448.046,,,


Unnamed: 0,Food,Protein,Total Carbohydrates,Dietary Fiber,Sodium,Sugars,Saturated Fat,Calories From Fat,Amount
92,McDonald's Fruit & Maple Oatmeal w/o Brown Sugar,6g,49g,5g,115mg,18g,1.5g,40,1.109137
238,McDonald's Premium Asian Salad w/o Chicken,7g,13g,5g,20mg,7g,0.5g,70,5.291878
245,McDonald's Premium Southwest Salad w/ Grilled ...,37g,27g,6g,1070mg,9g,4.5g,100,0.332487


### Bulk Run

In [68]:
df, guide = load_ref_files()
age, gender = 24, "Male"
columns = ["Protein", "Total Carbohydrates", "Dietary Fiber", "Sodium",
           "Sugars", "Saturated Fat", "Calories From Fat"]
less_thans = ["Sodium", "Sugars", "Saturated Fat", "Calories From Fat"]  # These correspond to columns from Fast Food Nutrition dataset
filtered_guide = guide_lookup(gender, age, guide, columns)
objective = "Calories From Fat"

verbose = False                     # Solving, solution, and output solutions
out_name = "Separate"         # Result output filename

costs = {"Sugars": 1, "Sodium": 50, "Calories From Fat": 4}   # Note units: Sugars (g) vs Sodium (mg)
if objective == "all":
    results = pd.DataFrame(columns=["Overage Penalty"] + list(costs.keys()), index=pd.unique(df["Restaurant"]))
else:
    results = pd.DataFrame(columns=[objective], index=pd.unique(df["Restaurant"]))
vprint = vprint_factory(verbose)

for res in pd.unique(df["Restaurant"]):
    subset = df.loc[df["Restaurant"] == res].reset_index(drop=True)
    model_args = {"subset": subset, "less_thans": less_thans, "guide": filtered_guide, "filter_relaxations": set(),
                  "objective": objective, "costs": costs, "meals": 1, "cat_limit": False, "min_cal_cutoff": False, "verbose": verbose, }
    m = create_model(**model_args)
    m.optimize()
    
    if not verbose:
        print(res, end=", ")
    if m.status == 2:
        vprint(f"The {res} model is feasible!") 
        if model_args["objective"] == "all":
            results.loc[res, "Overage Penalty"] = m.ObjVal
            results.loc[res, costs.keys()] = [m.getVarByName(name + "_rel").x for name in costs.keys()]
        else:
            # Other nutrient requirements should be strictly enforced. No relaxations -> no penalty
            results.loc[res, objective] = m.ObjVal
    else:
        vprint(f"The {res} model is infeasible.\t\t<-----------!")
        if model_args["objective"] == "all":
            results.loc[res, ["Overage Penalty", "Sugars", "Sodium", "Calories From Fat"]] = "-"
        else:
            results.loc[res, objective] = "-"
# if not os.path.exists("./" + out_name + ".xlsx"):
results.to_excel(out_name + f"-{model_args['objective']}.xlsx")
results

Arby's, Baskin-Robbins, Blimpie, Boston Market, Buffalo Wild Wings, Burger King, Carl's Jr, Chipotle, Culvers, Dairy Queen, Del Taco, Domino's Pizza, Dunkin Donuts, Five Guys, Godfather's Pizza, Hardee's, In-N-Out Burger, Jack in the Box, Jimmy Johns, KFC, Little Caesars, Long John Silver's, Olive Garden, Panda Express, Papa John's, Pizza Hut, Popeyes, Quiznos, Red Lobster, Smashburger, Sonic, Subway, Taco Bell, Taco John's, Wendy's, Whataburger, Zaxby's, Chick-fil-A, McDonald's, 

Unnamed: 0,Calories From Fat
Arby's,-
Baskin-Robbins,-
Blimpie,90.911
Boston Market,347.669
Buffalo Wild Wings,180.867
Burger King,301.552
Carl's Jr,-
Chipotle,0
Culvers,0
Dairy Queen,0
