In [19]:
import pandas as pd
import numpy as np

from ast import literal_eval
from collections import Counter
from tqdm.auto import tqdm
import warnings

pd.set_option('display.max_colwidth', 1000)
warnings.filterwarnings("ignore")
tqdm.pandas()

In [20]:
food_df = pd.read_csv("recommended.csv")
food_df

Unnamed: 0,Name,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,foodnumber,recommended,tags
0,"Aloo Tikki, Aloo Tikki, Aloo Tikki, Aloo Tikki, Aloo Tikki",1249.0,4.0,1.0,0.0,3451.0,276.5,33.5,14.0,33.5,0,1,"['15-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'occasion', 'lunch', 'side-dishes', 'snacks', 'asian', 'indian', 'easy', 'beginner-cook', 'dinner-party', 'kid-friendly', 'vegan', 'vegetarian', 'grains', 'stove-top', 'dietary', 'inexpensive', 'toddler-friendly', 'pasta-rice-and-grains', 'equipment', 'presentation', 'served-hot']"
1,Kadhi Pakora,275.0,17.0,3.0,60.0,450.0,21.0,3.0,5.0,12.0,1,0,"['30-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'appetizers', 'eggs-dairy', 'asian', 'indian']"
2,"Kadhi Pakora, Palak Paneer",495.3,27.7,9.0,77.4,896.8,46.1,13.7,12.3,25.8,2,1,"['time-to-make', 'course', 'low-cholesterol', 'low-carb', 'appetizers', 'low-saturated-fat', 'main-ingredient', 'low-calorie', 'dietary', 'asian', 'low-protein', 'low-in-something', 'preparation', 'indian', 'cuisine', 'healthy-2', 'low-sodium', 'vegetarian', 'eggs-dairy', '60-minutes-or-less']"
3,Bhindi Masala,175.0,10.0,2.0,0.0,600.0,15.0,5.0,3.0,7.0,3,0,"['30-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'low-protein', 'healthy', 'side-dishes', 'vegetables', 'asian', 'indian', 'vegan', 'vegetarian', 'dietary', 'low-sodium', 'low-cholesterol', 'low-calorie', 'low-carb', 'healthy-2', 'low-in-something']"
4,Kadhi Pakora,275.0,17.0,3.0,60.0,450.0,21.0,3.0,5.0,12.0,4,0,"['30-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'appetizers', 'eggs-dairy', 'asian', 'indian']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,Chai,69.3,1.1,0.7,4.3,23.0,14.8,0.1,13.0,1.1,145,1,"['15-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'beverages', 'asian', 'indian', 'dietary', 'presentation', 'served-hot']"
146,Chai,69.3,1.1,0.7,4.3,23.0,14.8,0.1,13.0,1.1,146,1,"['15-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'beverages', 'asian', 'indian', 'dietary', 'presentation', 'served-hot']"
147,Paratha,511.6,28.7,4.3,0.0,4.0,58.1,9.8,0.3,11.0,147,0,"['30-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'flat-shapes', 'breads', 'unhealthy', 'lunch', 'snacks', 'asian', 'indian', 'easy', 'beginner-cook', 'vegetarian', 'dietary', 'egg-free', 'free-of-something', 'presentation', 'served-hot', '3-steps-or-less']"
148,"Paratha, Paratha",1023.2,57.4,8.6,0.0,8.0,116.2,19.6,0.6,22.0,148,1,"['30-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'flat-shapes', 'breads', 'unhealthy', 'lunch', 'snacks', 'asian', 'indian', 'easy', 'beginner-cook', 'vegetarian', 'dietary', 'egg-free', 'free-of-something', 'presentation', 'served-hot', '3-steps-or-less']"


In [21]:
from surprise import Reader, Dataset, NMF, accuracy
from surprise.model_selection import train_test_split

In [22]:
ratings_df = pd.read_csv('ratings.csv')
ratings_df

Unnamed: 0,foodnumber,User,Rating
0,0,111,1
1,1,112,4
2,2,113,3
3,3,114,4
4,4,115,4
...,...,...,...
145,145,1505,3
146,146,1506,3
147,147,1507,3
148,148,1508,2


In [23]:
ratings_df['Rating'].value_counts()

Rating
3    59
4    37
2    35
5    15
1     4
Name: count, dtype: int64

In [24]:
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(ratings_df[['foodnumber','User','Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)

In [25]:
#Non-negative Matrix Factorization  
model = NMF()

In [26]:
# Train the model
model.fit(trainset)

# Evaluate the model
predictions = model.test(testset)
mse = accuracy.mse(predictions)
rmse = accuracy.rmse(predictions)

MSE: 1.2048
RMSE: 1.0976


In [27]:
tags_count = Counter()
food_df["tags"].progress_apply(
    lambda tags: tags_count.update(literal_eval(tags))
)

100%|██████████| 150/150 [00:00<00:00, 17285.17it/s]


0      None
1      None
2      None
3      None
4      None
       ... 
145    None
146    None
147    None
148    None
149    None
Name: tags, Length: 150, dtype: object

In [28]:
tags_count

Counter({'time-to-make': 148,
         'cuisine': 148,
         'preparation': 148,
         'course': 143,
         'dietary': 124,
         'indian': 120,
         'main-ingredient': 117,
         'asian': 100,
         'main-dish': 67,
         'low-in-something': 65,
         'easy': 62,
         'vegetarian': 56,
         'meat': 53,
         '30-minutes-or-less': 46,
         'equipment': 44,
         'low-sodium': 41,
         'stove-top': 39,
         'occasion': 38,
         'lunch': 38,
         'vegetables': 38,
         'poultry': 37,
         'chicken': 37,
         '60-minutes-or-less': 36,
         'presentation': 35,
         '3-steps-or-less': 35,
         '15-minutes-or-less': 32,
         'low-carb': 32,
         'side-dishes': 30,
         'pasta-rice-and-grains': 30,
         'healthy': 30,
         'served-hot': 27,
         'appetizers': 26,
         'low-protein': 26,
         'beginner-cook': 24,
         'north-american': 24,
         '5-ingredients-or-less': 

In [30]:
TIME_TAGS = [
    '15-minutes-or-less',
    '30-minutes-or-less',
    '60-minutes-or-less',
    '4-hours-or-less',
]
VEGAN_TAGS = ['vegan']
MEAT_TAGS =[
    'beef',
    'chicken',
]

FEATURE_COLS = TIME_TAGS+VEGAN_TAGS+MEAT_TAGS
FEATURE_COLS

['15-minutes-or-less',
 '30-minutes-or-less',
 '60-minutes-or-less',
 '4-hours-or-less',
 'vegan',
 'beef',
 'chicken']

In [31]:
def fe_tags(food_tags):
    values = []
    
    for group_tag in [TIME_TAGS, VEGAN_TAGS]:
        for tag in group_tag:
            values.append(True) if tag in food_tags else values.append(False)

    for tag in MEAT_TAGS:
        values.append(True) if tag in food_tags else values.append(False)

    return values   

In [32]:
food_df['tmp'] = food_df["tags"].progress_apply(
    lambda food_tags: fe_tags(food_tags))
food_df[FEATURE_COLS] = pd.DataFrame(food_df['tmp'].tolist(), index=food_df.index)
food_df.drop(columns='tmp', inplace=True)
food_df

100%|██████████| 150/150 [00:00<00:00, 119541.25it/s]


Unnamed: 0,Name,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,foodnumber,recommended,tags,15-minutes-or-less,30-minutes-or-less,60-minutes-or-less,4-hours-or-less,vegan,beef,chicken
0,"Aloo Tikki, Aloo Tikki, Aloo Tikki, Aloo Tikki, Aloo Tikki",1249.0,4.0,1.0,0.0,3451.0,276.5,33.5,14.0,33.5,0,1,"['15-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'occasion', 'lunch', 'side-dishes', 'snacks', 'asian', 'indian', 'easy', 'beginner-cook', 'dinner-party', 'kid-friendly', 'vegan', 'vegetarian', 'grains', 'stove-top', 'dietary', 'inexpensive', 'toddler-friendly', 'pasta-rice-and-grains', 'equipment', 'presentation', 'served-hot']",True,False,False,False,True,False,False
1,Kadhi Pakora,275.0,17.0,3.0,60.0,450.0,21.0,3.0,5.0,12.0,1,0,"['30-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'appetizers', 'eggs-dairy', 'asian', 'indian']",False,True,False,False,False,False,False
2,"Kadhi Pakora, Palak Paneer",495.3,27.7,9.0,77.4,896.8,46.1,13.7,12.3,25.8,2,1,"['time-to-make', 'course', 'low-cholesterol', 'low-carb', 'appetizers', 'low-saturated-fat', 'main-ingredient', 'low-calorie', 'dietary', 'asian', 'low-protein', 'low-in-something', 'preparation', 'indian', 'cuisine', 'healthy-2', 'low-sodium', 'vegetarian', 'eggs-dairy', '60-minutes-or-less']",False,False,True,False,False,False,False
3,Bhindi Masala,175.0,10.0,2.0,0.0,600.0,15.0,5.0,3.0,7.0,3,0,"['30-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'low-protein', 'healthy', 'side-dishes', 'vegetables', 'asian', 'indian', 'vegan', 'vegetarian', 'dietary', 'low-sodium', 'low-cholesterol', 'low-calorie', 'low-carb', 'healthy-2', 'low-in-something']",False,True,False,False,True,False,False
4,Kadhi Pakora,275.0,17.0,3.0,60.0,450.0,21.0,3.0,5.0,12.0,4,0,"['30-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'appetizers', 'eggs-dairy', 'asian', 'indian']",False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,Chai,69.3,1.1,0.7,4.3,23.0,14.8,0.1,13.0,1.1,145,1,"['15-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'beverages', 'asian', 'indian', 'dietary', 'presentation', 'served-hot']",True,False,False,False,False,False,False
146,Chai,69.3,1.1,0.7,4.3,23.0,14.8,0.1,13.0,1.1,146,1,"['15-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'beverages', 'asian', 'indian', 'dietary', 'presentation', 'served-hot']",True,False,False,False,False,False,False
147,Paratha,511.6,28.7,4.3,0.0,4.0,58.1,9.8,0.3,11.0,147,0,"['30-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'flat-shapes', 'breads', 'unhealthy', 'lunch', 'snacks', 'asian', 'indian', 'easy', 'beginner-cook', 'vegetarian', 'dietary', 'egg-free', 'free-of-something', 'presentation', 'served-hot', '3-steps-or-less']",False,True,False,False,False,False,False
148,"Paratha, Paratha",1023.2,57.4,8.6,0.0,8.0,116.2,19.6,0.6,22.0,148,1,"['30-minutes-or-less', 'time-to-make', 'course', 'cuisine', 'preparation', 'flat-shapes', 'breads', 'unhealthy', 'lunch', 'snacks', 'asian', 'indian', 'easy', 'beginner-cook', 'vegetarian', 'dietary', 'egg-free', 'free-of-something', 'presentation', 'served-hot', '3-steps-or-less']",False,True,False,False,False,False,False


In [33]:
conds = [
    (food_df['4-hours-or-less']),
    (food_df['60-minutes-or-less']),
    (food_df['30-minutes-or-less']),
    (food_df['15-minutes-or-less']),
]
choices = [4,3,2,1]
food_df['time'] = np.select(conds, choices, default=5)
food_df["time"].value_counts()

time
2    46
3    36
1    32
5    19
4    17
Name: count, dtype: int64

In [34]:
def recommend_meal(model, uid, filtered_ids, topk):
    preds = []
    for iid in filtered_ids:
        pred_rating = model.predict(uid=uid, iid=iid).est
        preds.append([iid, pred_rating])
    preds.sort(key=lambda x:x[1], reverse=True)
    
    return preds[:topk]

In [36]:
# 1. vegan
filtered_ids = food_df[(food_df['vegan'])]['foodnumber'].to_list()
random_user = ratings_df['User'].sample(1).values[0]
filtered_df = food_df[food_df['recommended'] == 0]
preds = recommend_meal(model, uid=random_user, filtered_ids=filtered_ids, topk=10)
selected_columns = ['Name', 'Calories', 'SaturatedFatContent', 'CholesterolContent', 'FiberContent']
filtered_df[filtered_df['foodnumber'].isin([x[0] for x in preds])][selected_columns]

Unnamed: 0,Name,Calories,SaturatedFatContent,CholesterolContent,FiberContent
3,Bhindi Masala,175.0,2.0,0.0,5.0
17,Bhindi Masala,175.0,2.0,0.0,5.0
38,Bhindi Masala,175.0,2.0,0.0,5.0
44,Aloo Matar,184.3,1.3,0.0,4.5
45,Bhindi Masala,175.0,2.0,0.0,5.0
58,Aloo Matar,184.3,1.3,0.0,4.5
74,Bhindi Masala,175.0,2.0,0.0,5.0


In [37]:
# 2. chicken
filtered_ids = food_df[(food_df['chicken'])]['foodnumber'].to_list()
random_user = ratings_df['User'].sample(1).values[0]
filtered_df = food_df[food_df['recommended'] == 0]
preds = recommend_meal(model, uid=random_user, filtered_ids=filtered_ids, topk=10)
filtered_df[filtered_df['foodnumber'].isin([x[0] for x in preds])][['Name','Calories','SaturatedFatContent','CholesterolContent','FiberContent']] 

Unnamed: 0,Name,Calories,SaturatedFatContent,CholesterolContent,FiberContent
7,Chicken Wings,84.0,1.7,29.1,0.0
8,"Onion Rings, Chicken Wings",293.1,3.1,58.8,4.0
9,Biryani,400.0,3.0,30.0,2.0
14,Biryani,400.0,3.0,30.0,2.0
22,Chicken Wings,84.0,1.7,29.1,0.0
23,Butter Chicken,308.1,7.6,99.4,0.9
24,Biryani,400.0,3.0,30.0,2.0
26,Biryani,400.0,3.0,30.0,2.0
28,"Chicken Wings, Chicken Wings, Onion Rings",377.1,4.8,87.9,4.0


In [38]:
# 2. less time to make
filtered_ids = food_df[(food_df['time']<=1)]['foodnumber'].to_list()
random_user = ratings_df['User'].sample(1).values[0]
filtered_df = food_df[food_df['recommended'] == 0]
preds = recommend_meal(model, uid=random_user, filtered_ids=filtered_ids, topk=10)
filtered_df[filtered_df['foodnumber'].isin([x[0] for x in preds])][['Name','Calories','SaturatedFatContent','CholesterolContent','FiberContent']] 

Unnamed: 0,Name,Calories,SaturatedFatContent,CholesterolContent,FiberContent
18,Sandwich,192.8,0.6,0.6,3.1
20,Sandwich,192.8,0.6,0.6,3.1
25,Sandwich,192.8,0.6,0.6,3.1
30,Rice,183.9,0.4,0.0,1.6
31,Sandwich,192.8,0.6,0.6,3.1
47,Rice,183.9,0.4,0.0,1.6
