### Imports

In [3]:
import gurobipy as gp
import pandas as pd
import os

### Functions

In [4]:
def extract(data, row, fact, maximize=True):
    text = str(data.iloc[row][fact]).strip("mg%?")
    if str(text) == "nan" or len(text) == 0:
        return maximize * 1_000_000
    return float(text)

In [5]:
def classify_age(a: int, year=2020) -> str:
    a = round(a, 0)
    age_strings_2015 = ["1-3", "4-8", "9-13", "14-18", "19-30", "31-50", "51+"]
    age_strings_2020 = ["2-3", "4-8", "9-13", "14-18", "19-30", "31-50", "51+"]
    age_groups = [tuple([int(age) for age in i.split("-")]) if "-" in i else (int(i.strip("+")), 110) for i in age_strings_2015]
    for i, age in enumerate(age_groups):
        if age[0] <= a <= age[1]:
            if year == 2020:
                return age_strings_2020[i]
            else:
                return age_strings_2015[i]
    else:
        raise ValueError(f"{a} is not a valid age in the {year} guidelines.")

### File Input

In [6]:
def load_ref_files(nutrition_name="Nutritional Facts - Categorized", guidelines_name="Dietary Guidelines", year=2020):
    df = pd.read_excel(nutrition_name + ".xlsm", index_col=0)
    guide = pd.read_excel(guidelines_name + ".xlsx", header=1,
                          index_col=[1, 2], skiprows=[2],
                          sheet_name=f"Dietary Guidelines {year}").drop("Unnamed: 0", axis=1)
    return df, guide

### Requirement Selection

In [142]:
def guide_lookup(gender: str, age: int, guide: pd.DataFrame, columns=[]):
    ff_nutrition_to_guidelines = {"Protein": "Protein (g)",
                                  "Vitamin A %": "Vitamin A (mcg RAEd)",
                                  "Sodium": "Sodium (mg)",
                                  "Total Carbohydrates": "Carbohydrate (g)",
                                  "Dietary Fiber": "Fiber (g)",
                                  "Calories": "Calorie Level Assessed", }
    guidelines_to_ff_nutrition = {value: key for key, value in ff_nutrition_to_guidelines.items()}
    guideline_kcals_to_ff_nutrition = {# "Total lipid (% kcal)": "Total Fat",
                                       "Added Sugars (% kcal)": "Sugars",
                                       "Saturated Fatty Acids (% kcal)": "Saturated Fat",
                                       "Calorie Level Assessed": "Calories From Fat"}
    genders = {"m": "Male", "f": "Female"}
    gender = genders[gender[0].lower()] # 'm' / 'M' / 'male' / 'Male' -> 'Male'
    # Selecting the appropriate Row
    filtered = guide.loc[gender, classify_age(age)]
    # Handling kcal measurements
    cal_level = filtered["Calorie Level Assessed"]
    nutrient_cals = {"Total Fat": 9, "Saturated Fat": 9, "Sugars": 4, "Calories From Fat": cal_level / 10}  # Cals from fat <= 10% cal_level
    for kcal_nutrient, out_name in guideline_kcals_to_ff_nutrition.items():
        print(kcal_nutrient, "-", out_name)
        new_entry = pd.Series(index=[out_name],
                              data=float(str(filtered[kcal_nutrient]).strip("<>").split("-")[-1]) / 100
                                         * cal_level / nutrient_cals[out_name])
        filtered = filtered.append(new_entry)
        filtered.drop(columns=kcal_nutrient, inplace=True)
    filtered.drop((col for col in filtered.index
                   if col not in guidelines_to_ff_nutrition.keys()
                   and col not in guideline_kcals_to_ff_nutrition.values()),
                  inplace=True)
    return filtered[columns] if columns else filtered

### Model Building: Variables, Constraints, and Objective

In [8]:
def create_model(subset: pd.DataFrame, filters: dict, less_thans: list, guide: pd.DataFrame,
                 objective: str = "Calories From Fat", min_obj: bool = True, min_cal_cutoff: int = 1,
                 real_meal: bool = False, cat_limit: bool = False, meals: int = 2, verbose: bool = True,
                 filter_relaxations: list = []):
    """
    Generates the Gurobi model according to many available filters and options. Objective is the most changing default option.
    Other defaults are usually okay, with the possible exception of filter_relaxations, which handles infeasible models.
    
    :subset: menu items to consider, frequently broken down by restaurant
    :filters: dictionary of nutrition requirements (sugars, proteins, etc.). Inequalities may go either way (specified in less_thans)
    :less_thans: list of filters where the sum of food nutritions must be less than the guideline (sodium) rather than more (protein)
    :guide: dietary recommendations taken from the government's Dietary Guidelines For Americans. TODO: Update to 2020-2025 data
    :objective: dietary fact by which the model should be optimized. By default, this is minimized by min_obj
    :min_obj: whether the objective ought to [True] minimized (sodium, sugar, calories, etc.) or [False] maximized (protein, vitamins, ...)
    :min_cal_cutoff: when non-zero, omits foods from the solution with fewer calories than the cutoff. Enabled by default
    :real_meal: enforces the minimum calorie cutoff above. Primarily to omit condiments with 'free' nutrition
    :cat_limit: requires no more than cat_limit of any one food type in the solution. Prevents 31 apple juice box solutions
    :meals: requires the solution to meet only (1 / meals) of each nutrient recommendation. Meals=1 & cat_limit=True usually infeasible
    :verbose: provides detailed constraining / solving progress updates. Disable for more concise output
    :filter_relaxations: which requirements are allowed to be relaxed if an optimal solution is not otherwise possible. [Sugars, sodium, calories, ...]
    """
    if verbose:
        vprint = print
    else:
        vprint = lambda *x, **y: None
    m = gp.Model()
    xis = [m.addVar(vtype=gp.GRB.CONTINUOUS) for _ in subset.index]  # GRB.BINARY / GRB.INTEGER, whether to include a food in the meal
    f_rel = {fact: m.addVar(name=f"{fact}_rel") if fact in filter_relaxations else 0 for fact in filters}  # excess variable
    
    for fact, req in filters.items():  # Nutrition Requirements
        if fact in less_thans:
            vprint(f"Constraining {fact}".ljust(35), f"<= {round(guide[filters[fact]], 2)}".ljust(10), f"across {meals} meals")
            m.addConstr(sum((x * extract(subset, r, fact, maximize=False) for r, x in enumerate(xis))) - f_rel[fact] <= float(guide[filters[fact]]) / meals)
        else:
            vprint(f"Constraining {fact}".ljust(35), f">= {round(guide[filters[fact]], 2)}".ljust(10), f"across {meals} meals")
            m.addConstr(sum((x * extract(subset, r, fact, maximize=False) for r, x in enumerate(xis))) >= float(guide[filters[fact]]) / meals)

    if real_meal:
        for i, food in enumerate(subset.iloc):  # Excludes zero calorie (from fat) entries
            if extract(subset, i, "Calories From Fat", maximize=True) <= min_cal_cutoff:
                m.addConstr(xis[i] <= 0)
    if cat_limit:
        vprint()
        for cat in pd.unique(subset["Common Category"]):
            vprint(f"Constraining only {cat_limit} or fewer {cat.strip('s')} items.")
            m.addConstr(sum((x for i, x in enumerate(xis) if subset["Common Category"][i] == cat)) <= cat_limit)
    
    m.setParam("OutputFlag", verbose)
    m.ModelSense = gp.GRB.MINIMIZE if min_obj else gp.GRB.MAXIMIZE  # Less concise, more clear than '2 * min_obj - 1'
    costs = {"Sugars": 1, "Sodium": 50, "Calories From Fat": 4}  # Note units: Sugars (g) vs Sodium (mg)
    overage_cost = [f_rel[fact] * costs[fact] if fact in costs else f_rel[fact] for fact in filters]
    m.setObjective(sum((x * extract(subset, r, objective, maximize=True) for r, x in enumerate(xis))) + sum(overage_cost))
    return m

### Meal Output

In [9]:
def display_details(m, model_args, subset):
    best_choices = [i for i, x in enumerate(m.getVars()) if x.x > 0 and "rel" not in x.VarName]
#     print(sum([m.x[i] for i in best_choices]), "calories from fat")  # TODO: Appears to produce incorrect output...
    
#     print(subset.iloc[best_choices]["Food"], ":")
    selection = subset.iloc[best_choices]
    print(f"The following foods will satisfy your requirements across {model_args['meals']} meals, subject to these overages:", overages)
    display(selection)

### Solving

In [10]:
def solve_models(filter_relaxations: list, objective: str, feasibility: bool=True, full_run: bool=False, verbose: bool=True):
    """
    Attempts to find feasible solutions for all restaurants, objectives, and relaxations.
    :filter_relaxations: which constraints may be relaxed. Require an associated cost, set in create_model()
    :objective: which nutrient to rank by
    :feasibility: try looser constraints if the initial model is infeasible
    :full_run: supress interactive prompts and run full dataset
    :verbose: display operation messages. full_run will take several seconds
    """
    if verbose:
        vprint = print
    else:
        vprint = lambda *x, **y: None
    feasible = set()
    rel_feasible = set()
    weak_feasible = set()
    infeasible = set()
    for res in pd.unique(df["Restaurant"]):
        vprint()
        subset = filter_subset(df, res)
        filters, less_thans = get_requirements(subset, guide)
        model_args = {"subset": subset, "filters": filters, "less_thans": less_thans,
                      "objective": objective, "min_obj": True, "guide": guide,
                      "verbose": False, "filter_relaxations": filter_relaxations, "meals": 2,
                      "cat_limit": 200, "real_meal": False}
        m = create_model(**model_args)
        m.optimize()
        if m.status != 2:
            vprint(f"The strict {res} model is infeasible.")
            if feasibility:
                if full_run:
                    verbose_retry = "R"
                else:
                    verbose_retry = input(f"""'S'kip {res};
                                              'R'etry with loosened constraints (default);
                                              'V'erbose retry; [S/R/V] """).upper()
            else:
                continue
            if verbose_retry == "S":
                continue
            weak_model_args = {"subset": subset, "filters": filters, "less_thans": less_thans,
                               "objective": objective, "min_obj": True, "min_cal_cutoff": -1,
                               "guide": guide, "verbose": verbose_retry == "V",
                               "filter_relaxations": filter_relaxations, "meals": 2,
                               "real_meal": False, "cat_limit": 10}
            m = create_model(**weak_model_args)
            m.optimize()
            if m.status == 2:
                weak_feasible.add(res)
        if m.status == 2:
    #         objval = sum([x.x * subset.loc[:, "Calories From Fat"].iloc[i] if "rel" not in x.VarName else 0 for i, x in enumerate(m.getVars())])
            overages = [x for i, x in enumerate(m.getVars()) if x.x > 0 and "rel" in x.VarName]
            if sum([v.x for v in overages]) == 0:
                vprint(f"The {res} model is feasible:", "\t" * 5, "<" + "-" * 10)
                if res not in weak_feasible:
                    feasible.add(res)
            else:
                vprint(f"The {res} model is feasible with the following relaxations:\t\t", overages)
                if res not in weak_feasible:
                    rel_feasible.add(res)
        else:
            vprint(f"The {res} model is still infeasible. Skipping.")
            infeasible.add(res)
        if full_run:
            directions = "S"
        else:
            directions = input(f"'S'olve next restaurant (default); 'E'xit this loop; 'D'etails about current solution; [S/E/D] ").upper()
        if len(directions) == 0 or directions[0] == "S":
            continue
        elif directions[0] == "D":
            if m.status == 2:
                display_details(m, model_args, subset)
            else:
                vprint("The model is infeasible - no details are available")
        elif directions[0] == "E":
            break
    # print("Return reached...")        
    return {"Feasible": feasible, "Relaxed": rel_feasible, "Weakened": weak_feasible, "Infeasible": infeasible}

### Partial Evaluation

In [None]:
for objective in ["Sugars", "Sodium", "Calories From Fat"]:
    if verbose:
        print(f"Ranking by {objective} and relaxing by {filter_relaxations}:")
    filter_relaxations = ["Sugars", "Sodium", "Calories From Fat"]
    del filter_relaxations[filter_relaxations.index(objective)]
    
    groups = solve_models(filter_relaxations, objective,
                          feasibility = attempt_feasibility,
                          full_run = full_run, verbose = verbose)
    for t, restaurants in groups.items():
        for restaurant in restaurants:
            results.loc[restaurant, objective] = t

    if verbose:
        print(f"\n\nRanking by {objective} and relaxing by {filter_relaxations}:")
        for t, res in groups.items():
            print(t, "\t:\t", res)

if verbose:
    print(f"\n\nWriting results to {out_name}.xlsx...")
if not os.path.exists(f"./{out_name}.xlsx"):
    results.to_excel(out_name + ".xlsx")
if verbose:
    print("Done!")

### Next Steps


### Status
#### Finished

* Understand why various alternate objectives aren't all feasible / infeasible together
* Finish single objective optimization (omitting actual meals, just assessing feasibility)
* Talk about appropriate Calorie / Sugar / Sodium Balancing
* Add ability to introspect any particular model to analyze food choices and relaxations
* Update guide to 2020-2025 recommendations
    * Add conversion dictionary to import function to allow various constraints from new dataset
* Constrain no more than 10% of the calories to come from fat

### Broken

### In Progress

* Simplify model: trim down to only constraints mentioned in paper
* Drop Vitamin A because so few foods have data

* 

### Next

* Work towards general metric capable of ranking restaurants against one another - do for each objective
* Write all optimal diets out a spreadsheet

### Future

* Consider adding other relaxations
* Get list of nutrients by which we are actually constraining
* Simplify model to obtain more feasible solutions (ignore vitamins) (use protein, fiber, calories, iron, sugars, sodium, etc.) - potentially unnecessary given recent improvements
* Work with C.C. to display and analyze selected menus
* Multicriteria optimization so that the sum of all three overages is minimized, rather than just one plus the overages of the other two

In [154]:
df, guide = load_ref_files()
age, gender = 24, "Male"
columns = ["Protein (g)", "Carbohydrate (g)", "Fiber (g)", "Sodium (mg)",
           "Sugars", "Saturated Fat", "Calories From Fat"]
less_thans = ["Sodium (mg)", "Sugars", "Saturated Fat", "Calories From Fat"]
filtered_guide = guide_lookup(gender, age, guide, columns)

verbose = True                   # Solving, solution, and output solutions
out_name = "NewApproach"         # Result output filename

results = pd.DataFrame(columns=["Objective"], index=pd.unique(df["Restaurant"]))

res = "Arby's"
subset = df.loc[df["Restaurant"] == res]

model_args = {"subset": subset, "filters": filters, "less_thans": less_thans,
              "guide": filtered_guide, "filter_relaxations": filter_relaxations,
              "meals": 1, "cat_limit": False, "real_meal": False, "verbose": True,}
m = create_model(**model_args)
m.optimize()
if m.status != 2:
    vprint(f"The strict {res} model is infeasible.")
    if feasibility:
        if full_run:
            verbose_retry = "R"
        else:
            verbose_retry = input(f"""'S'kip {res};
                                      'R'etry with loosened constraints (default);
                                      'V'erbose retry; [S/R/V] """).upper()
    else:
        continue
    if verbose_retry == "S":
        continue
    weak_model_args = {"subset": subset, "filters": filters, "less_thans": less_thans,
                       "objective": objective, "min_obj": True, "min_cal_cutoff": -1,
                       "guide": guide, "verbose": verbose_retry == "V",
                       "filter_relaxations": filter_relaxations, "meals": 2,
                       "real_meal": False, "cat_limit": 10}
    m = create_model(**weak_model_args)
    m.optimize()
    if m.status == 2:
        weak_feasible.add(res)
if m.status == 2:
#         objval = sum([x.x * subset.loc[:, "Calories From Fat"].iloc[i] if "rel" not in x.VarName else 0 for i, x in enumerate(m.getVars())])
    overages = [x for i, x in enumerate(m.getVars()) if x.x > 0 and "rel" in x.VarName]
    if sum([v.x for v in overages]) == 0:
        vprint(f"The {res} model is feasible:", "\t" * 5, "<" + "-" * 10)
        if res not in weak_feasible:
            feasible.add(res)
    else:
        vprint(f"The {res} model is feasible with the following relaxations:\t\t", overages)
        if res not in weak_feasible:
            rel_feasible.add(res)
else:
    vprint(f"The {res} model is still infeasible. Skipping.")
    infeasible.add(res)
if full_run:
    directions = "S"
else:
    directions = input(f"'S'olve next restaurant (default); 'E'xit this loop; 'D'etails about current solution; [S/E/D] ").upper()
if len(directions) == 0 or directions[0] == "S":
    continue
elif directions[0] == "D":
    if m.status == 2:
        display_details(m, model_args, subset)
    else:
        vprint("The model is infeasible - no details are available")
elif directions[0] == "E":
    break
# print("Return reached...")        
return {"Feasible": feasible, "Relaxed": rel_feasible, "Weakened": weak_feasible, "Infeasible": infeasible}

if verbose:
    print(f"\n\nWriting results to {out_name}.xlsx...")
if not os.path.exists(f"./{out_name}.xlsx"):
    results.to_excel(out_name + ".xlsx")
if verbose:
    print("Done!")

Added Sugars (% kcal) - Sugars
Saturated Fatty Acids (% kcal) - Saturated Fat
Calorie Level Assessed - Calories From Fat


NameError: name 'get_requirements' is not defined

In [52]:
guide

Unnamed: 0_level_0,Unnamed: 1_level_0,Calorie Level Assessed,Protein (% kcal),Protein (g),Carbohydrate (% kcal),Carbohydrate (g),Fiber (g),Added Sugars (% kcal),Total lipid (% kcal),Saturated Fatty Acids (% kcal),18:2 Linoleic acid (g),...,Vitamin D (IUd),Vitamin C (mg),Thiamin (mg),Riboflavin (mg),Niacin (mg),Vitamin B-6 (mg),Vitamin B-12 (mcg),Choline (mg),Vitamin K (mcg),Folate (mcg DFEd)
Gender,Age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Female,2-3,1000,5-20,13,45-65,130,14,<10,30-40,<10,7,...,600,15,0.5,0.5,6,0.5,0.9,200,30,150
Female,4-8,1200,10-30,19,45-65,130,17,<10,25-35,<10,10,...,600,25,0.6,0.6,8,0.6,1.2,250,55,200
Female,9-13,1600,10-30,34,45-65,130,22,<10,25-35,<10,10,...,600,45,0.9,0.9,12,1.0,1.8,375,60,300
Female,14-18,1800,10-30,46,45-65,130,25,<10,25-35,<10,11,...,600,65,1.0,1.0,14,1.2,2.4,400,75,400
Female,19-30,2000,10-35,46,45-65,130,28,<10,20-35,<10,12,...,600,75,1.1,1.1,14,1.3,2.4,425,90,400
Female,31-50,1800,10-35,46,45-65,130,25,<10,20-35,<10,12,...,600,75,1.1,1.1,14,1.3,2.4,425,90,400
Female,51+,1600,10-35,46,45-65,130,22,<10,20-35,<10,11,...,600c,75,1.1,1.1,14,1.5,2.4,425,90,400
Male,2-3,1000,5-20,13,45-65,130,14,<10,30-40,<10,7,...,600,15,0.5,0.5,6,0.5,0.9,200,30,150
Male,4-8,1400,10-30,19,45-65,130,20,<10,25-35,<10,10,...,600,25,0.6,0.6,8,0.6,1.2,250,55,200
Male,9-13,1800,10-30,34,45-65,130,25,<10,25-35,<10,12,...,600,45,0.9,0.9,12,1.0,1.8,375,60,300


Added Sugars (% kcal) - Sugars
Saturated Fatty Acids (% kcal) - Saturated Fat
Calorie Level Assessed - Calories From Fat


In [153]:
filtered_guide

Protein (g)               56
Carbohydrate (g)         130
Fiber (g)                 34
Sodium (mg)             2300
Sugars                    60
Saturated Fat        26.6667
Calories From Fat        240
dtype: object