###### An approach to determining correspondence between constraints and nutrients dataframes

In [1]:
import difflib

Algo:

1) Find constraint closest to each nutrient
2) Find nutrient closest to each constraint
3) Find which constraints have an identical match and which do not.
4) Rename either the relevant nutrient or constraint, or create a new nutrient column, so that an identical match exists.


Using difflib to find 'close' matches is too fiddly and so I'll instead use regex to find exact matches.

In [None]:
def find_closest_string(source_list, target_list):
    closest_strings = {}
    for source_string in source_list: 
        closest_match = difflib.get_close_matches(source_string, target_list, n=1)
        if closest_match:
            closest_strings[source_string] = closest_match[0]
        else:
            closest_strings[source_string] = None
    return closest_strings

comp_nvdf_cnstr = find_closest_string(nv_df.columns, all_constraints.index)

In [188]:
# these are the nutrients that are found by 
[comp_cnstr_nvdf[item] for item in comp_nvdf_cnstr if item in 
 all_constraints.index]

['protein',
 'calories',
 'saturated_fat',
 'cholesterol',
 'vitamin_b6',
 'vitamin_b12',
 'biotin',
 'choline',
 'folate',
 'pantothenic_acid',
 'vitamin_c',
 'vitamin_k',
 'calcium',
 'chloride',
 'chromium',
 'copper',
 'magnesium',
 'manganese',
 'molybdenum',
 'potassium',
 'selenium',
 'sodium',
 'histidine',
 'isoleucine',
 'leucine',
 'lysine',
 'methionine',
 'phenylalanine',
 'threonine',
 'tryptophan',
 'valine']

Does the constraint's name exist as nutrient name

```{python}
all_constraints.index.values[[bool(val) for val in 1- np.array(find_exact_matches(all_constraints.index.values, nv_df.columns.values))]]
```

###### From lpp_formulation_affine

In [None]:
sorted(foods.index.values)

In [None]:
servings = [food[2] for food in model.coefficients()][0:foods.shape[0]]

In [None]:
# 109,835 calories under constraints -- this result requires investigation.  
sum(servings*foods.calories)

In [None]:
[c for c in range(0,40) if c not in candidates ]

In [None]:
constraints.index.values[[c for c in range(0,40) if c not in candidates ]]

In [None]:
# narrowing the constraints to non-macros, which may have been setting a floor on
# the total number of calories that limits optimization

macros = ['protein', 'fat_total', 'saturated_fat', 'carbohydrates']
constraints_micros_aa = constraints.loc[[c for c in constraints.index.values if 
                                         c not in macros]]

'copper', 'chloride', 'biotin', 'chromium', 'cysteine_methionine', 'histidine', 'leucine', 'lysine', 'methionine', 'phenylalanine_tyrosine', and 'threonine' are not solvable when all constraints are used in search.

In [None]:
vitamins = ['choline', 'folate', 'niacin', 'pantothenic_acid', 'vitamin_b2',
       'vitamin_b1', 'vitamin_a', 'vitamin_b12', 'vitamin_b6', 'vitamin_c',
       'vitamin_d_mcg', 'vitamin_k', 'calcium', 'copper', 'iron', 'magnesium',
       'manganese', 'phosphorus', 'potassium', 'selenium', 'zinc']

for v in vitamins[0:5]:
    if not np.isnan(constraints.loc[v][0]):
        min_cals += LpConstraint(e=lpSum([foods.loc[food,v]*food_vars[food] 
                                          for food in foods.index]),
                             sense=plp.LpConstraintGE, 
                             rhs=constraints.loc[v][0],
                             name=f'min_{v}'
                            )
    if not np.isnan(constraints.loc[v][1]):
        min_cals += LpConstraint(e=lpSum([foods.loc[food,v]*food_vars[food] 
                                          for food in foods.index]),
                             sense=plp.LpConstraintLE, 
                             rhs=constraints.loc[v][1],
                             name=f'max_{v}'
                            )

In [None]:
min_cals.solve()
min_cals.objective.value()

In [None]:
for c in constraints.index[]:
    if not np.isnan(constraints.loc[c][0]):
        min_cals += LpConstraint(e=lpSum([foods.loc[food,c]*food_vars[food] 
                                          for food in foods.index]),
                             sense=plp.LpConstraintGE, 
                             rhs=constraints.loc[c][0],
                             name=f'min_{c}'
                            )
    if not np.isnan(constraints.loc[c][1]):
        min_cals += LpConstraint(e=lpSum([foods.loc[food,c]*food_vars[food] 
                                          for food in foods.index]),
                             sense=plp.LpConstraintLE, 
                             rhs=constraints.loc[c][1],
                             name=f'max_{c}'
                            )

In [None]:
core_constraints = ['protein', 'cysteine_methionine', 'histidine', 'leucine', 'lysine',
       'methionine', 'phenylalanine_tyrosine', 'threonine', 'valine']

In [None]:
constraints.index

In [None]:
min_cals.solve()

In [None]:
dir(plp);

In [None]:
# Constraints

plp.Lp

In [None]:
pulp.lpSum

In [None]:
protein_calories = foods['protein']/foods['calories'].sort_values(ascending=False).copy()

In [None]:
protein_calories.sort_values(ascending=False)

In [None]:
# selecting high protein to calorie ratio foods
protein_foods = foods.loc[protein_calories.sort_values(ascending=False)[0:40].index,:]\
.query("calories > 50")

In [None]:
constraints.drop(['saturated_fat','cholesterol'])

In [None]:
necessary_nutrients = [c for c in constraints.index if c not in ['saturated_fat','cholesterol']]

In [None]:
constraints.loc[~constraints.index.isin(['saturated_fat','cholesterol']),:].index.values

In [None]:
foods[necessary_nutrients].idxmax()

In [None]:
max_foods = foods.loc[list(set(foods[necessary_nutrients].idxmax().values)),:]

In [None]:

# Sample data for foods and nutrients
data_foods = {
    'Food': [food for food in max_foods.index],
}

data_foods.update({col: protein_foods[col] for col in protein_foods.columns})

df_foods = pd.DataFrame(data_foods)

# Sample data for constraints
data_constraints = {
    'Nutrient': [nutrient for nutrient in constraints.index],
    'MinValue': [val for val in constraints['min']],
    'MaxValue': [val for val in constraints['max']],
}

df_constraints = pd.DataFrame(data_constraints)

# Create the LP Minimization problem
problem = LpProblem("Nutritionally_Complete_Foods", LpMinimize)

# Define decision variables
food_vars = LpVariable.dicts("Servings", df_foods['Food'], lowBound=0, cat='Integer')

# Define the objective function to minimize the total number of servings
problem += lpSum(food_vars[food] for food in df_foods['Food'])

# Add constraints based on nutrient requirements
for nutrient in df_constraints['Nutrient']:
    min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MinValue'].values[0]
    max_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MaxValue'].values[0]
    
    if not pd.isna(min_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    >= min_value
    if not pd.isna(max_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    <= max_value

# # Add constraint for nutritional completeness
# for nutrient in df_constraints['Nutrient']:
#     min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 
#                             'MinValue'].values[0]
#     problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
#                 food, nutrient].values[0] for food in df_foods['Food']) 
#                 >= min_value

# Solve the problem
problem.solve()

# Print the results
print("Status:", problem.status)

if problem.status == 1:  # If the problem was solved successfully
    for food in df_foods['Food']:
        servings = food_vars[food].value()
        if servings > 0:
            print(f"{food}: {servings} servings")
else:
    print("No solution found.")

Next I will solve to minimize calories since minimizing for serving size has no solution.

In [None]:

# Sample data for foods and nutrients
data_foods = {
    'Food': [food for food in foods.index],
}

data_foods.update({col: foods[col] for col in foods.columns})

df_foods = pd.DataFrame(data_foods)

# Sample data for constraints
data_constraints = {
    'Nutrient': [nutrient for nutrient in constraints.index],
    'MinValue': [val for val in constraints['min']],
    'MaxValue': [val for val in constraints['max']],
}

df_constraints = pd.DataFrame(data_constraints)

# Create the LP Minimization problem
problem = LpProblem("Nutritionally_Complete_Foods", LpMinimize)

# Define decision variables
food_vars = LpVariable.dicts("Servings", df_foods['Food'], lowBound=0, cat='Continuous')

# Define the objective function to minimize the total number of servings
problem += lpSum(food_vars[food] for food in df_foods['Food'])

# Add constraints based on nutrient requirements
for nutrient in df_constraints['Nutrient']:
    min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MinValue'].values[0]
    max_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MaxValue'].values[0]
    
    if not pd.isna(min_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    >= min_value
    if not pd.isna(max_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    <= max_value

# # Add constraint for nutritional completeness
# for nutrient in df_constraints['Nutrient']:
#     min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 
#                             'MinValue'].values[0]
#     problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
#                 food, nutrient].values[0] for food in df_foods['Food']) 
#                 >= min_value

# Solve the problem
problem.solve()

# Print the results
print("Status:", problem.status)

if problem.status == 1:  # If the problem was solved successfully
    for food in df_foods['Food']:
        servings = food_vars[food].value()
        if servings > 0:
            print(f"{food}: {servings} servings")
else:
    print("No solution found.")

Looking to minimize calories:

In [None]:

# Sample data for foods and nutrients
data_foods = {
    'Food': [food for food in foods.index],
}

data_foods.update({col: foods[col] for col in foods.columns})

df_foods = pd.DataFrame(data_foods)

# Sample data for constraints
data_constraints = {
    'Nutrient': [nutrient for nutrient in constraints.index],
    'MinValue': [val for val in constraints['min']],
    'MaxValue': [val for val in constraints['max']],
}

df_constraints = pd.DataFrame(data_constraints)

# Create the LP Minimization problem
problem = LpProblem("Nutritionally_Complete_Foods", LpMinimize)

# Define decision variables
food_vars = LpVariable.dicts("Servings", df_foods['Food'], lowBound=0, cat='Continuous')

# Define the objective function to minimize the total number of servings
ae_calories = LpAffineExpression([(food_vars[food],df_foods.loc[food,'calories']) for food in df_foods['Food']])
problem += ae_calories
# Add constraints based on nutrient requirements
for nutrient in df_constraints['Nutrient']:
    min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MinValue'].values[0]
    max_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MaxValue'].values[0]
    
    if not pd.isna(min_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    >= min_value
    if not pd.isna(max_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    <= max_value

# # Add constraint for nutritional completeness
# for nutrient in df_constraints['Nutrient']:
#     min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 
#                             'MinValue'].values[0]
#     problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
#                 food, nutrient].values[0] for food in df_foods['Food']) 
#                 >= min_value

# Solve the problem
problem.solve()

# Print the results
print("Status:", problem.status)

if problem.status == 1:  # If the problem was solved successfully
    for food in df_foods['Food']:
        servings = food_vars[food].value()
        if servings > 0:
            print(f"{food}: {servings} servings")
else:
    print("No solution found.")

In [None]:
constraints.drop(index=['calories'],inplace=True)
# Sample data for foods and nutrients
data_foods = {
    'Food': [food for food in foods.index],
}

data_foods.update({col: foods[col] for col in foods.columns})

df_foods = pd.DataFrame(data_foods)

# Sample data for constraints
data_constraints = {
    'Nutrient': [nutrient for nutrient in constraints.index],
    'MinValue': [val for val in constraints['min']],
    'MaxValue': [val for val in constraints['max']],
}

df_constraints = pd.DataFrame(data_constraints)

# Create the LP Minimization problem
problem = LpProblem("Nutritionally_Complete_Foods", LpMinimize)

# Define decision variables
food_vars = LpVariable.dicts("Servings", df_foods['Food'], lowBound=0, cat='Continuous')

# Define the objective function to minimize the total number of servings
ae_calories = LpAffineExpression([(food_vars[food],df_foods.loc[food,'calories']) for food in df_foods['Food']])
problem += ae_calories
# Add constraints based on nutrient requirements
for nutrient in df_constraints['Nutrient']:
    min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MinValue'].values[0]
    max_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 'MaxValue'].values[0]
    
    if not pd.isna(min_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    >= min_value
    if not pd.isna(max_value):
        problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
                    food, nutrient].values[0] for food in df_foods['Food']) \
                    <= max_value

# # Add constraint for nutritional completeness
# for nutrient in df_constraints['Nutrient']:
#     min_value = df_constraints.loc[df_constraints['Nutrient'] == nutrient, 
#                             'MinValue'].values[0]
#     problem += lpSum(food_vars[food] * df_foods.loc[df_foods['Food'] == 
#                 food, nutrient].values[0] for food in df_foods['Food']) 
#                 >= min_value

# Solve the problem
problem.solve()

# Print the results
print("Status:", problem.status)

if problem.status == 1:  # If the problem was solved successfully
    for food in df_foods['Food']:
        servings = food_vars[food].value()
        if servings > 0:
            print(f"{food}: {servings} servings")
else:
    print("No solution found.")