# Ingredient App Builder
### Uses Cosine Similarity from vectorized ingredient lists and user input to recommend recipe
App can be found in web_apps/ingred_app.py

In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from nltk.corpus import stopwords

In [5]:
stop = stopwords.words('english')
stop_ws = stop + ['cup', 'cups', 'tbsp', 'tsp', 'tablespoon', 'tablespoons', 'teaspoon', 'teaspoons', 'dash', 'pound', 'kg', 'kilogram', 'ounce', 'ounces', 'temperature', 'large', 'medium', 'small', \
    'dash', 'piece', 'slice', 'chop', 'julienne', 'blend', 'mix', 'knead', 'preferably', 'coarsely', 'fresh', 'freshly', 'I', 'have']

In [6]:
user_in = input("Enter string: ")

In [9]:
df = pd.read_csv('ingred.csv')

In [5]:
df

Unnamed: 0.1,Unnamed: 0,name,date,servs,ings,img_url
0,0,Nan-e Berenji,"December 1, 2020",Makes about 50,"['tsp. baking powder', 'tsp. ground cardamom',...",https://assets.bonappetit.com/photos/5fb7e3af5...
1,1,Ginger-Citrus Cookies,"December 1, 2020",Makes about 28,"['cups 1""–2""-wide strips lemon, lime, orange, ...",https://assets.bonappetit.com/photos/5fb7e3ae5...
2,2,"Tequila, Beer, and Citrus Cocktail","December 1, 2020",Makes 1,"['oz. fresh grapefruit juice', 'oz. fresh lime...",https://assets.bonappetit.com/photos/5fb7e3b35...
3,3,Corn and Crab Beignets With Yaji Aioli,"December 1, 2020",8 - 10 Servings,"['cup unsalted dry-roasted peanuts', 'Tbsp. gr...",https://assets.bonappetit.com/photos/5fbc26d57...
4,4,Swedish Glögg,"December 1, 2020",Makes about 1½ quarts,"['cinnamon sticks, broken into pieces', 'tsp. ...",https://assets.bonappetit.com/photos/5fbd604d2...
...,...,...,...,...,...,...
4984,4984,Vegetable Stock,"December 10, 2012",2 quarts,"['tablespoon olive oil', 'medium onions, unpee...",https://assets.bonappetit.com/photos/57ae399f5...
4985,4985,Roast Beef Tenderloin with Horseradish Cream,"November 13, 2012",8 to 10 Servings,['cup plus 2 tablespoons crème fraîche or sour...,https://assets.bonappetit.com/photos/57ae3cda5...
4986,4986,Roasted Fresh Ham with Citrus and Rye,"November 13, 2012",14 Servings,"['12-pound fresh ham, cut from the shank end',...",https://assets.bonappetit.com/photos/57ae3cfff...
4987,4987,Nutmeg and Black Pepper Popovers,"November 13, 2012",Makes 12 Servings,"['cups all-purpose flour', 'tablespoons finely...",https://assets.bonappetit.com/photos/57ae3c8bf...


In [6]:
#df = df.append({'name': 'user', 'ings':user_in}, ignore_index= True)

In [6]:
type(user_in)

str

In [10]:
tf_vectorizer = CountVectorizer(strip_accents = 'unicode',
                                stop_words = stop_ws,
                                lowercase = True,
                                token_pattern = r'\b[a-zA-Z]{3,}\b',
                                ngram_range = (1,3),
                                max_df = 0.8, 
                                min_df = 0.01)
dtm_tf = tf_vectorizer.fit_transform(df.ings)
print(dtm_tf.shape)

(4989, 1119)


In [11]:
column_names = tf_vectorizer.get_feature_names()
vectors = pd.DataFrame.sparse.from_spmatrix(dtm_tf)
vectors.columns=column_names
vectors

Unnamed: 0,active,active dry,active dry yeast,agave,aleppo,aleppo style,aleppo style pepper,allspice,almond,almonds,...,yellow,yogurt,yolk,yolks,yukon,yukon gold,yukon gold potatoes,zest,zest lemon,zest lemon juice
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4984,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4985,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4986,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4987,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
import pickle
with open('dtm_tf.pickle', 'wb') as to_write:
    pickle.dump(dtm_tf, to_write)
with open('tf_vec.pickle', 'wb') as to_write:
    pickle.dump(tf_vectorizer, to_write)

In [117]:
user_in = input("Enter string: ")

In [12]:
vec_user = tf_vectorizer.transform([user_in])

In [72]:
vec_user.toarray()

array([[0, 0, 0, ..., 0, 0, 0]])

In [13]:
from sklearn.metrics.pairwise import cosine_similarity

## Sort desc. cosine similarity scores

In [14]:
-np.sort(-cosine_similarity(vec_user, dtm_tf))[0]

array([0.31622777, 0.2773501 , 0.27216553, ..., 0.        , 0.        ,
       0.        ])

In [15]:
df.iloc[cosine_similarity(vec_user, dtm_tf).argmax()], cosine_similarity(vec_user, dtm_tf).argmax()

(Unnamed: 0                                                 2455
 name                                    Whole Corn on the Grill
 date                                               May 17, 2016
 servs                                               4  Servings
 ings          ['ears of corn, in husk', 'Charred Spring Onio...
 img_url       https://assets.bonappetit.com/photos/57acbaf31...
 Name: 2455, dtype: object,
 2455)

In [17]:
num = 20

locs = cosine_similarity(vec_user, dtm_tf)#.argsort()#[::-1]
scores = -np.sort(-cosine_similarity(vec_user, dtm_tf))[0][:num+1]
idxs = (-locs).argsort()
idxs = zip(idxs[0][:num], scores) 
#idxs = np.argpartition(locs, -num)[-num:]
# idxs = locs[0][:num]
#idxs = idxs[0][:num]
for idx, score in idxs:
    if score >= 0.20:
        print(df.iloc[idx])
        print(score)
    else:
        continue
print(list(idxs))

Unnamed: 0                                                 2455
name                                    Whole Corn on the Grill
date                                               May 17, 2016
servs                                               4  Servings
ings          ['ears of corn, in husk', 'Charred Spring Onio...
img_url       https://assets.bonappetit.com/photos/57acbaf31...
Name: 2455, dtype: object
0.3162277660168379
Unnamed: 0                                                 2380
name                                             Miso Corn Soup
date                                              June 21, 2016
servs                                               4  Servings
ings          ['ears of corn', 'tablespoons butter', 'large ...
img_url       https://assets.bonappetit.com/photos/57aca8db5...
Name: 2380, dtype: object
0.2773500981126146
Unnamed: 0                                                 3339
name                                     Ludo Lefebvre's Omelet
date          

In [67]:
test = df.iloc[9]
test[1]

'Pretzel and Potato Chip Moon Pies'

In [68]:
urls = pd.read_csv('w_urls.csv')

In [69]:
dfx = pd.merge(df, urls, on = 'name')

In [71]:
dfx.to_csv('super.csv')

In [92]:
arrayd = dtm_tf.toarray()
user_array = vec_user.toarray()

In [97]:
user_array.shape

(1, 1119)