# Recipe Simple Embedding Data Setup



In [148]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import ast
from sentence_transformers import SentenceTransformer
from IPython.display import clear_output

In [149]:
# read in the data
data = pd.read_csv("/media/michael/Delta/Data/Data_Science_Masters/MSDS_498/RAW_recipes.csv", nrows=10000)

# build the ingredients and steps text
data['steps_text'] = data['steps'].apply(lambda x: ". ".join(ast.literal_eval(x)))
data['ingredients_text'] = data['ingredients'].apply(lambda x: ", ".join(ast.literal_eval(x)))

# filter to only recipes with a 10 word description
data = data[data['description'].str.count(" ") >= 9]

# filter to only those that use 3 or more ingredients
data = data.query("n_ingredients >= 3")

# filter to only recipes with at least some instruction
data = data[data['steps_text'].str.len() > 10]

# reset index for horizontal concatenation
data = data.reset_index(drop=True)
data

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,steps_text,ingredients_text
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,make a choice and proceed with recipe. dependi...,"winter squash, mexican seasoning, mixed spice,..."
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,preheat oven to 425 degrees f. press dough int...,"prepared pizza crust, sausage patty, eggs, mil..."
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,brown ground beef in large pot. add chopped on...,"ground beef, yellow onions, diced tomatoes, to..."
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,place potatoes in a large pot of lightly salte...,"spreadable cheese with garlic and herbs, new p..."
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,"mix all ingredients& boil for 2 1 / 2 hours , ...","tomato juice, apple cider vinegar, sugar, salt..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8337,asparagus with mustard seed dressing,194343,20,319738,2006-11-06,"['30-minutes-or-less', 'time-to-make', 'course...","[89.1, 3.0, 15.0, 12.0, 14.0, 1.0, 4.0]",7,"['mix lemon juice , vinegar , and country dijo...",i think this is based on a cooking light recip...,"['asparagus', 'whole mustard seeds', 'lemon ju...",5,"mix lemon juice , vinegar , and country dijon ...","asparagus, whole mustard seeds, lemon juice, w..."
8338,asparagus with no cook creamy mustard sauce,216762,13,17803,2007-03-13,"['15-minutes-or-less', 'time-to-make', 'course...","[136.9, 10.0, 17.0, 70.0, 18.0, 3.0, 4.0]",7,"['for sauce , combine in a bowl yogurt , mayon...",you may want to play with some different seaso...,"['plain nonfat yogurt', 'light mayonnaise', 'd...",8,"for sauce , combine in a bowl yogurt , mayonna...","plain nonfat yogurt, light mayonnaise, dijon m..."
8339,asparagus with nutmeg butter,362466,11,865936,2009-03-23,"['15-minutes-or-less', 'time-to-make', 'course...","[92.7, 13.0, 3.0, 2.0, 3.0, 27.0, 1.0]",7,"['in a skillet , bring enough water to cover a...",adapted from an old cookbook which suggested i...,"['asparagus spears', 'butter', 'lemon', 'nutme...",5,"in a skillet , bring enough water to cover asp...","asparagus spears, butter, lemon, nutmeg, salt ..."
8340,asparagus with olive oil herbs,87478,25,52543,2004-03-27,"['30-minutes-or-less', 'time-to-make', 'course...","[124.3, 14.0, 9.0, 1.0, 9.0, 6.0, 2.0]",9,"['mix parsley , garlic& oil in a 9 x 13 inch b...",i love the spring when fresh asparagus is avai...,"['fresh parsley', 'garlic', 'extra virgin oliv...",7,"mix parsley , garlic& oil in a 9 x 13 inch bak...","fresh parsley, garlic, extra virgin olive oil,..."


In [150]:
# load the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# create the embedding dict in case it fails and only some variables have to be rerun
embedding_dict = {
    "name": pd.DataFrame(), 
    "description": pd.DataFrame(), 
    "steps_text": pd.DataFrame(), 
    "ingredients_text": pd.DataFrame()
}

In [151]:
# calculate embeddings for each column
cols_to_embed = ["name", "description", "steps_text", "ingredients_text"]

for curr_col in cols_to_embed:
    print(curr_col)
    embeddings = model.encode(data[curr_col].tolist(), normalize_embeddings=True)
    embeddings_df = pd.DataFrame(embeddings, columns=[curr_col + "_embedding_" + str(x).rjust(3, "0") for x in range(embeddings.shape[1])])
    embedding_dict[curr_col] = embeddings_df

name
description
steps_text
ingredients_text


In [152]:
# combine all of the embedding into the dataset
all_embeddings = pd.concat(embedding_dict.values(), axis=1)
data = data.drop([x for x in all_embeddings.columns if x in data.columns], axis=1)
data = pd.concat([data, all_embeddings], axis=1)
data

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,ingredients_text_embedding_374,ingredients_text_embedding_375,ingredients_text_embedding_376,ingredients_text_embedding_377,ingredients_text_embedding_378,ingredients_text_embedding_379,ingredients_text_embedding_380,ingredients_text_embedding_381,ingredients_text_embedding_382,ingredients_text_embedding_383
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,...,0.021177,0.007456,0.110229,-0.068143,0.088277,-0.052553,-0.054856,-0.009026,0.012120,0.082861
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,...,0.080783,0.074230,0.089201,0.006190,0.099552,0.022226,-0.062413,0.064582,0.013286,-0.014859
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,...,0.051172,0.082343,0.116373,-0.058827,0.041265,-0.043046,-0.054921,0.040403,0.032460,0.043497
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...",...,-0.001429,0.025129,0.088833,-0.009907,0.080626,-0.029695,-0.061843,0.024248,0.030216,0.069643
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,...,0.029602,0.018748,0.086573,-0.064266,0.005418,-0.051273,-0.052236,0.014114,0.075585,0.041810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8337,asparagus with mustard seed dressing,194343,20,319738,2006-11-06,"['30-minutes-or-less', 'time-to-make', 'course...","[89.1, 3.0, 15.0, 12.0, 14.0, 1.0, 4.0]",7,"['mix lemon juice , vinegar , and country dijo...",i think this is based on a cooking light recip...,...,0.035330,0.021276,0.036673,-0.047971,0.016466,-0.042152,-0.010107,0.009911,0.024444,0.045421
8338,asparagus with no cook creamy mustard sauce,216762,13,17803,2007-03-13,"['15-minutes-or-less', 'time-to-make', 'course...","[136.9, 10.0, 17.0, 70.0, 18.0, 3.0, 4.0]",7,"['for sauce , combine in a bowl yogurt , mayon...",you may want to play with some different seaso...,...,0.052594,0.004721,0.069222,-0.012268,0.008866,-0.021207,0.032476,-0.016295,0.032342,0.015713
8339,asparagus with nutmeg butter,362466,11,865936,2009-03-23,"['15-minutes-or-less', 'time-to-make', 'course...","[92.7, 13.0, 3.0, 2.0, 3.0, 27.0, 1.0]",7,"['in a skillet , bring enough water to cover a...",adapted from an old cookbook which suggested i...,...,0.035120,0.010410,0.046741,-0.028349,0.033499,0.037990,-0.015982,-0.030320,-0.030035,0.112288
8340,asparagus with olive oil herbs,87478,25,52543,2004-03-27,"['30-minutes-or-less', 'time-to-make', 'course...","[124.3, 14.0, 9.0, 1.0, 9.0, 6.0, 2.0]",9,"['mix parsley , garlic& oil in a 9 x 13 inch b...",i love the spring when fresh asparagus is avai...,...,-0.006233,-0.001013,0.040290,-0.009347,0.093195,-0.067350,-0.042218,0.027918,0.027484,0.061431


In [153]:
review_data

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."
...,...,...,...,...,...
995,1329782,305675,2010-01-26,5,Very good.\nI used a combination of 4 breasts ...
996,1800103922,305675,2013-10-05,3,Very good! Not quite like the ones you get on ...
997,1802533639,305675,2014-02-27,5,"Absolutely yummy. It tastes as good, if not be..."
998,806937,310344,2009-08-16,4,Very good and moist. I made a 1/2 recipe and w...


In [154]:
# Load in the review data and join it to the encoded data
review_data = pd.read_csv("/media/michael/Delta/Data/Data_Science_Masters/MSDS_498/RAW_interactions.csv", nrows=1000)
review_summary = review_data.groupby(['recipe_id']).rating.value_counts().unstack(fill_value=0)
review_summary.columns = ["rating_count_" + str(x) for x in review_summary.columns]
review_summary['rating_count_total'] = review_summary.sum(axis=1)
review_summary.reset_index(inplace=True)
review_summary = review_summary.rename({
    "recipe_id": "id"
}, axis=1)
review_summary

Unnamed: 0,id,rating_count_0,rating_count_1,rating_count_2,rating_count_3,rating_count_4,rating_count_5,rating_count_total
0,5139,1,0,0,0,1,3,5
1,8846,4,0,1,0,3,8,16
2,14813,1,0,0,0,1,2,4
3,16127,0,0,0,0,0,1,1
4,18204,0,0,0,0,2,1,3
...,...,...,...,...,...,...,...,...
263,507226,1,0,0,0,0,2,3
264,512986,0,1,0,0,0,0,1
265,523223,0,0,0,1,0,1,2
266,523426,0,0,0,0,0,1,1


In [155]:
rating_cols = ["rating_count_0", "rating_count_1", "rating_count_2", "rating_count_3", "rating_count_4", "rating_count_5", "rating_count_total"]
data = data.drop([x for x in rating_cols if x in data.columns], axis=1)
data = data.merge(review_summary, on=['id'], how='left')
data = data.fillna({x: 0 for x in rating_cols})
data

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,ingredients_text_embedding_381,ingredients_text_embedding_382,ingredients_text_embedding_383,rating_count_0,rating_count_1,rating_count_2,rating_count_3,rating_count_4,rating_count_5,rating_count_total
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,...,-0.009026,0.012120,0.082861,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,...,0.064582,0.013286,-0.014859,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,...,0.040403,0.032460,0.043497,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...",...,0.024248,0.030216,0.069643,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,...,0.014114,0.075585,0.041810,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8337,asparagus with mustard seed dressing,194343,20,319738,2006-11-06,"['30-minutes-or-less', 'time-to-make', 'course...","[89.1, 3.0, 15.0, 12.0, 14.0, 1.0, 4.0]",7,"['mix lemon juice , vinegar , and country dijo...",i think this is based on a cooking light recip...,...,0.009911,0.024444,0.045421,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8338,asparagus with no cook creamy mustard sauce,216762,13,17803,2007-03-13,"['15-minutes-or-less', 'time-to-make', 'course...","[136.9, 10.0, 17.0, 70.0, 18.0, 3.0, 4.0]",7,"['for sauce , combine in a bowl yogurt , mayon...",you may want to play with some different seaso...,...,-0.016295,0.032342,0.015713,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8339,asparagus with nutmeg butter,362466,11,865936,2009-03-23,"['15-minutes-or-less', 'time-to-make', 'course...","[92.7, 13.0, 3.0, 2.0, 3.0, 27.0, 1.0]",7,"['in a skillet , bring enough water to cover a...",adapted from an old cookbook which suggested i...,...,-0.030320,-0.030035,0.112288,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8340,asparagus with olive oil herbs,87478,25,52543,2004-03-27,"['30-minutes-or-less', 'time-to-make', 'course...","[124.3, 14.0, 9.0, 1.0, 9.0, 6.0, 2.0]",9,"['mix parsley , garlic& oil in a 9 x 13 inch b...",i love the spring when fresh asparagus is avai...,...,0.027918,0.027484,0.061431,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [156]:
# we have the final data. Save as a parquet file to be picked up by the next notebook.
data.to_parquet("/media/michael/Delta/Data/Data_Science_Masters/MSDS_498/recipes_embedded.parquet")