# Setup

In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import io
import os
import boto3
import csv

# Load the dataset from S3 (Wait to solve problems on AWS first)

In [3]:
AWS_ACCESS_KEY_ID = os.environ["AWS_ACCESS_KEY_ID"]
AWS_SECRET_ACCESS_KEY = os.environ["AWS_SECRET_ACCESS_KEY"]
AWS_DEFAULT_REGION = os.environ["AWS_DEFAULT_REGION"]

In [8]:
s3_client = boto3.client('s3', region_name=AWS_DEFAULT_REGION,
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
bucket_name = "our-recipe-recipes-table"
object_key = "recipes.csv"

In [14]:
# Read the object directly into memory instead of downloading to local machine.
csv_obj = s3_client.get_object(Bucket=bucket_name, Key=object_key)
body = csv_obj['Body']
csv_string = body.read().decode('utf-8')
df = pd.read_csv(io.StringIO(csv_string), dtype = {"title": str, "mealType": str})
#file_stream = io.BytesIO()
#s3_client.download_fileobj(bucket_name, file_name, file_stream)

In [15]:
df.head()

Unnamed: 0,id,title,nutriScore,mealType,kcal,sodium,sugars,carbs,protein,fat,saturates,fibre
0,-297529017418066210,Coffee Protein Smoothie,60.0,Snack,356.4,0.14072,25.94,34.59,34.21,10.16,5.15,2.6
1,2984524080050943643,Sear tuna and potatoes,69.09,Dinner,368.5,1.55038,10.12,40.6,26.65,8.26,6.1,7.54
2,-134860378839050690,Sear tuna and potatoes,69.09,Dinner,368.5,1.55038,10.12,40.6,26.65,8.26,6.1,7.54
3,-6989815825829559279,Burrito Jars,81.82,Lunch,505.25,0.28775,6.25,15.25,54.5,21.5,3.25,8.75
4,1662862941331579196,NEW APPLE,72.73,Breakfast,115.0,0.002,23.0,25.0,0.0,0.0,0.0,5.0


# Load the dataset locally

In [4]:
df = pd.read_csv("data/recipes.csv", dtype = {"title": str, "mealType": str})
df.head()

Unnamed: 0,id,title,nutriScore,mealType,kcal,sodium,sugars,carbs,protein,fat,saturates,fibre
0,-297529017418066210,Coffee Protein Smoothie,60.0,Snack,356.4,0.14072,25.94,34.59,34.21,10.16,5.15,2.6
1,2984524080050943643,Sear tuna and potatoes,69.09,Dinner,368.5,1.55038,10.12,40.6,26.65,8.26,6.1,7.54
2,-134860378839050690,Sear tuna and potatoes,69.09,Dinner,368.5,1.55038,10.12,40.6,26.65,8.26,6.1,7.54
3,-6989815825829559279,Burrito Jars,81.82,Lunch,505.25,0.28775,6.25,15.25,54.5,21.5,3.25,8.75
4,1662862941331579196,NEW APPLE,72.73,Breakfast,115.0,0.002,23.0,25.0,0.0,0.0,0.0,5.0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          8 non-null      int64  
 1   title       8 non-null      object 
 2   nutriScore  8 non-null      float64
 3   mealType    8 non-null      object 
 4   kcal        8 non-null      float64
 5   sodium      8 non-null      float64
 6   sugars      8 non-null      float64
 7   carbs       8 non-null      float64
 8   protein     8 non-null      float64
 9   fat         8 non-null      float64
 10  saturates   8 non-null      float64
 11  fibre       8 non-null      float64
dtypes: float64(9), int64(1), object(2)
memory usage: 896.0+ bytes


In [9]:
len(df.columns)

12

# Train models

In [16]:
breakfast_df = df.loc[df["mealType"].str.contains("Breakfast"), "kcal":]
lunch_df = df.loc[df["mealType"].str.contains("Lunch"), "kcal":]
snack_df = df.loc[df["mealType"].str.contains("Dinner"), "kcal":]
dinner_df = df.loc[df["mealType"].str.contains("Snack"), "kcal":]

In [17]:
breakfast_arr = df.loc[df["mealType"].str.contains("Breakfast"), "kcal":].to_numpy()
lunch_arr = df.loc[df["mealType"].str.contains("Lunch"), "kcal":].to_numpy()
snack_arr = df.loc[df["mealType"].str.contains("Dinner"), "kcal":].to_numpy()
dinner_arr = df.loc[df["mealType"].str.contains("Snack"), "kcal":].to_numpy()

In [18]:
breakfast_df

Unnamed: 0,kcal,sodium,sugars,carbs,protein,fat,saturates,fibre
4,115.0,0.002,23.0,25.0,0.0,0.0,0.0,5.0
5,58.0,0.001,0.0,0.0,1.0,3.0,0.0,4.0
7,58.32,0.00192,0.0,0.93,1.98,3.69,0.4,4.13


In [19]:
breakfast_arr

array([[1.150e+02, 2.000e-03, 2.300e+01, 2.500e+01, 0.000e+00, 0.000e+00,
        0.000e+00, 5.000e+00],
       [5.800e+01, 1.000e-03, 0.000e+00, 0.000e+00, 1.000e+00, 3.000e+00,
        0.000e+00, 4.000e+00],
       [5.832e+01, 1.920e-03, 0.000e+00, 9.300e-01, 1.980e+00, 3.690e+00,
        4.000e-01, 4.130e+00]])

In [20]:
breakfast_neigh = NearestNeighbors(n_neighbors=2).fit(breakfast_arr)
lunch_neigh = NearestNeighbors(n_neighbors=2).fit(lunch_arr)
snack_neigh = NearestNeighbors(n_neighbors=2).fit(snack_arr)
dinner_neigh = NearestNeighbors(n_neighbors=2).fit(dinner_arr)

In [21]:
breakfast_sample = breakfast_arr[1]
breakfast_pred = breakfast_neigh.kneighbors(breakfast_sample[np.newaxis, :], return_distance = False)
breakfast_pred

array([[1, 2]])

In [None]:
def prepare_output(days: list, meals: list) -> dict:
    """
    Prepare an output dict according to output_modeling.txt
    """
    output_dict = {}
    for day in days:
        output_dict.

In [None]:
result_preds = {"Monday": [], "Tuesday"}