- Calculate randomized base ration according to https://www.omnicalculator.com/health/meal-calorie. Randomize the first 2/3 meals
- Calculate the remaining nutrition based on already chosen meals.
- Re-normalize the remaining ratios.
- Train the model
- Output the predictions. Randomize the top 3 choices and output one of them.

# Setup

In [23]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import io
import os
import boto3
import csv
import json
import random

# Load the dataset from S3 (Wait to solve problems on AWS first)

In [7]:
AWS_ACCESS_KEY_ID = os.environ["AWS_ACCESS_KEY_ID"]
AWS_SECRET_ACCESS_KEY = os.environ["AWS_SECRET_ACCESS_KEY"]
AWS_DEFAULT_REGION = os.environ["AWS_DEFAULT_REGION"]

In [8]:
s3_client = boto3.client('s3', region_name=AWS_DEFAULT_REGION,
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
bucket_name = "toeat-mlbucket"
object_key = "recipes.csv"

In [9]:
# Read the object directly into memory instead of downloading to local machine.
csv_obj = s3_client.get_object(Bucket=bucket_name, Key=object_key)
body = csv_obj['Body']
csv_string = body.read().decode('utf-8')
df = pd.read_csv(io.StringIO(csv_string), dtype = {"title": str, "mealType": str})
#file_stream = io.BytesIO()
#s3_client.download_fileobj(bucket_name, file_name, file_stream)

In [10]:
df.head()

Unnamed: 0,id,title,nutriScore,mealType,kcal,sodium,sugars,carbs,protein,fat,saturates,fibre
0,-7350875131303561697,Tuna Pasta Salad,81.82,Breakfast,486.67,0.207,5.0,54.67,24.67,16.0,1.67,3.33
1,-3720415708234728595,Air Fryer Hot Dogs,74.55,Lunch,222.75,0.50225,2.5,25.5,8.75,9.0,2.75,1.5
2,4730887800733880567,Steak and potato,80.0,Dinner,710.0,0.38,3.0,49.0,50.0,31.0,13.0,6.0
3,-8968252886510129462,Crocket,74.55,Snack,242.0,0.055,35.0,38.0,15.0,0.0,0.0,3.0
4,-5195237535349084959,Broccoli Cheese Soup,70.91,Lunch,141.0,0.21733,1.75,9.33,2.5,9.92,6.08,0.5


In [170]:
list(df.loc[:, "kcal":].columns)

['kcal', 'sodium', 'sugars', 'carbs', 'protein', 'fat', 'saturates', 'fibre']

# Load the dataset locally

In [3]:
df = pd.read_csv("recipes.csv", dtype = {"title": str, "mealType": str})
df.head()

Unnamed: 0,id,title,nutriScore,mealType,kcal,sodium,sugars,carbs,protein,fat,saturates,fibre
0,-7350875131303561697,Tuna Pasta Salad,81.82,Breakfast,486.67,0.207,5.0,54.67,24.67,16.0,1.67,3.33
1,-3720415708234728595,Air Fryer Hot Dogs,74.55,Lunch,222.75,0.50225,2.5,25.5,8.75,9.0,2.75,1.5
2,4730887800733880567,Steak and potato,80.0,Dinner,710.0,0.38,3.0,49.0,50.0,31.0,13.0,6.0
3,-8968252886510129462,Crocket,74.55,Snack,242.0,0.055,35.0,38.0,15.0,0.0,0.0,3.0
4,-5195237535349084959,Broccoli Cheese Soup,70.91,Lunch,141.0,0.21733,1.75,9.33,2.5,9.92,6.08,0.5


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          22 non-null     int64  
 1   title       22 non-null     object 
 2   nutriScore  22 non-null     float64
 3   mealType    22 non-null     object 
 4   kcal        22 non-null     float64
 5   sodium      22 non-null     float64
 6   sugars      22 non-null     float64
 7   carbs       22 non-null     float64
 8   protein     22 non-null     float64
 9   fat         22 non-null     float64
 10  saturates   22 non-null     float64
 11  fibre       22 non-null     float64
dtypes: float64(9), int64(1), object(2)
memory usage: 2.2+ KB


In [5]:
len(df.columns)

12

# Train models

In [155]:
breakfast_df = df.loc[df["mealType"].str.contains("Breakfast"), :]
lunch_df = df.loc[df["mealType"].str.contains("Lunch"), "kcal":]
snack_df = df.loc[df["mealType"].str.contains("Dinner"), "kcal":]
dinner_df = df.loc[df["mealType"].str.contains("Snack"), "kcal":]

In [14]:
breakfast_arr = df.loc[df["mealType"].str.contains("Breakfast"), "kcal":].to_numpy()
lunch_arr = df.loc[df["mealType"].str.contains("Lunch"), "kcal":].to_numpy()
snack_arr = df.loc[df["mealType"].str.contains("Dinner"), "kcal":].to_numpy()
dinner_arr = df.loc[df["mealType"].str.contains("Snack"), "kcal":].to_numpy()

In [162]:
dict(breakfast_df.iloc[0,:])

{'id': -7350875131303561697,
 'title': 'Tuna Pasta Salad',
 'nutriScore': 81.82,
 'mealType': 'Breakfast',
 'kcal': 486.67,
 'sodium': 0.207,
 'sugars': 5.0,
 'carbs': 54.67,
 'protein': 24.67,
 'fat': 16.0,
 'saturates': 1.67,
 'fibre': 3.33}

In [16]:
breakfast_neigh = NearestNeighbors(n_neighbors=3).fit(breakfast_arr)
lunch_neigh = NearestNeighbors(n_neighbors=3).fit(lunch_arr)
snack_neigh = NearestNeighbors(n_neighbors=3).fit(snack_arr)
dinner_neigh = NearestNeighbors(n_neighbors=3).fit(dinner_arr)

# Load sample request locally

In [17]:
with open("sample_request.json", "r") as f:
    sample_request = json.load(f)
sample_request.keys()

dict_keys(['x-access-token', 'suggestNutriIntake', 'numMeals', 'Breakfast', 'Lunch', 'Dinner', 'Snack'])

In [18]:
sample_request["suggestNutriIntake"]

{'kcal': 2816,
 'sodium': 2300,
 'sugars': 70,
 'carbs': 420,
 'protein': 109,
 'fat': 78,
 'saturates': 31,
 'fibre': 39}

# Calculate randomized ratio

In [123]:
num_meals = sample_request["numMeals"]

org_ratios = {
    3: [(0.30, 0.35), (0.35, 0.4), (0.25, 0.35)],
    4: [(0.25, 0.3), (0.35, 0.4), (0.05, 0.1), (0.25, 0.3)]
}
def randomize_meal_ratios(org_ratios: dict, num_meals: int, smallest_step = 0.01) -> list:
    """
    Given a dictionary with lits of meal ratios ranges, key is the number of meals. Randomize the ratio
    for each meal then return the result list. For example [(0.3, 0.35)] -> randomize to a number in the range
    of 0.3 - 0.35.
    """
    randomized_ratios = []
    smallest_step = 0.01

    # The last meal is not randomized.
    try:
        for min_ratio, max_ratio in org_ratios[num_meals][:-1]:
            rand_ratio = np.around(np.random.choice(np.arange(min_ratio, max_ratio + smallest_step, smallest_step), size=1)[0], 2)
            randomized_ratios.append(rand_ratio)
    except KeyError:
        error_log = {"error": f"{num_meals} meals is not implemented"}
        return error_log

    randomized_ratios.append(np.around(1 - sum(randomized_ratios), 2))
    return randomized_ratios

randomized_ratios = randomize_meal_ratios(org_ratios, num_meals)
print(randomized_ratios, sum(randomized_ratios))

[0.28, 0.35, 0.05, 0.32] 1.0


# Calculate remaining nutrition based on already chosen meals

In [124]:
def subtract_nutri(total_nutri: dict, target_nutri: dict) -> dict:
    """
    Calculate the remaning nutrition by subtracting target_nutri from total_nutri.
    """
    result = {}
    try:
        for nutri in total_nutri.keys():
            result[nutri] = total_nutri[nutri] - target_nutri[nutri]
    except KeyError:
        return {"error": f"{nutri} does not exist in the recipe"}
    return result

In [125]:
subtract_nutri(sample_request["suggestNutriIntake"], sample_request["Breakfast"])

{'kcal': 2552,
 'sodium': 1902,
 'sugars': 69,
 'carbs': 408,
 'protein': 96,
 'fat': 63,
 'saturates': 24,
 'fibre': 38}

In [154]:
meal_names = {
    3: ["Breakfast", "Lunch", "Dinner"],
    4: ["Breakfast", "Lunch", "Snack", "Dinner"]
}
def remain_total_NutriRatio(meal_request: dict, meal_names: dict, meal_ratios: dict) -> (float, dict):
    """
    Iterating through all chosen meals and calculate the remaining meal nutrition. Also
    calculating remaining meal ratios.
    """
    totalRec_nutri = meal_request["suggestNutriIntake"]
    num_meals = meal_request["numMeals"]
    total_ratio = 1.0
    for meal_index, meal in enumerate(meal_names[num_meals]):
        meal_nutri = meal_request[meal]
        # Empty meals need to be recommended
        if not meal_nutri:
            pass
        else:
            total_ratio -= meal_ratios[meal_index]
            totalRec_nutri = subtract_nutri(totalRec_nutri, meal_nutri)
    return np.round(total_ratio, 2), totalRec_nutri

In [153]:
remain_ratio, remain_nutrition = remain_total_NutriRatio(sample_request, meal_names, randomized_ratios)
remain_ratio, remain_nutrition

(0.4,
 {'kcal': 1842,
  'sodium': 1522,
  'sugars': 66,
  'carbs': 359,
  'protein': 46,
  'fat': 32,
  'saturates': 11,
  'fibre': 32})

In [165]:
remain_total_NutriRatio({"suggestNutriIntake": sample_request["suggestNutriIntake"], 
                         "numMeals": 4,
                        "Breakfast": {}, 
                        "Lunch": {},
                        "Snack": {},
                        "Dinner": {}}, meal_names, randomized_ratios)

(1.0,
 {'kcal': 2816,
  'sodium': 2300,
  'sugars': 70,
  'carbs': 420,
  'protein': 109,
  'fat': 78,
  'saturates': 31,
  'fibre': 39})

# Renormalize Ratios

In [150]:
def reNormalize_ratio(base_ratios: list, remain_ratio: float) -> list:
    """
    Renormalize base ratios based on the remaining ratio.
    """
    new_base_ratios = np.round(np.array(base_ratios) / remain_ratio, 2)
    return list(new_base_ratios)

In [164]:
normalized_ratios = reNormalize_ratio(randomized_ratios, remain_ratio)
normalized_ratios

[0.7, 0.87, 0.12, 0.8]

# Train models

In [167]:
np.array(remain_nutrition)

array({'kcal': 1842, 'sodium': 1522, 'sugars': 66, 'carbs': 359, 'protein': 46, 'fat': 32, 'saturates': 11, 'fibre': 32},
      dtype=object)

In [183]:
def recommend_recipes(df: pd.DataFrame, meal_request: dict, total_nutrition: dict, meal_names: dict, meal_ratios: list, n_neighbors = 3) -> dict:
    """
    Recommend recipes based on the request, empty dict means need to be recommended.
    For each meal, take out the top 3 -> randomize a choice -> recommend.
    """
    num_meals = meal_request["numMeals"]
    result = {}
    for meal_index, meal in enumerate(meal_names[num_meals]):
        meal_nutri = meal_request[meal]
        # Empty meals need to be recommended
        if not meal_nutri:
            total_nutrition_arr = []
            allMeal_arr = df.loc[df["mealType"].str.contains(meal), "kcal":].to_numpy()
            allMeal_df = df.loc[df["mealType"].str.contains(meal), :]
            
            nutrition_order = list(allMeal_df.loc[:, "kcal":].columns)
            for nutri in nutrition_order:
                total_nutrition_arr.append(total_nutrition[nutri])
            
            meal_arr = meal_ratios[meal_index] * np.array(total_nutrition_arr)
            top_rec_indx = NearestNeighbors(n_neighbors=n_neighbors).fit(allMeal_arr).kneighbors(meal_arr.reshape(1, -1), return_distance=False)
            randomized_rec_indx = np.random.choice(top_rec_indx[0])
            result[meal] = dict(allMeal_df.iloc[randomized_rec_indx, :])
    return result

In [187]:
0.87 * 1842, 0.12 * 1842

(1602.54, 221.04)

In [194]:
recommend_recipes(df, sample_request, remain_nutrition, meal_names, normalized_ratios)

{'Lunch': {'id': -7951924760067307772,
  'title': 'Cashew Chicken',
  'nutriScore': 69.09,
  'mealType': 'Lunch',
  'kcal': 683.5,
  'sodium': 0.78325,
  'sugars': 0.5,
  'carbs': 65.25,
  'protein': 60.0,
  'fat': 16.75,
  'saturates': 3.0,
  'fibre': 1.25},
 'Snack': {'id': -6847128501804771164,
  'title': 'Cookies and milk',
  'nutriScore': 72.73,
  'mealType': 'Snack',
  'kcal': 254.0,
  'sodium': 0.169,
  'sugars': 13.0,
  'carbs': 27.0,
  'protein': 5.0,
  'fat': 12.0,
  'saturates': 4.0,
  'fibre': 0.0}}