In [11]:
#Import Library
from time import time
import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

import pandas as pd
import numpy as np
import scipy as sp

from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [25]:
# ------------------------------- similarity approach------------------------------

food_df = pd.read_csv('ZenHealthAppEngine/dataset/fooditem.csv')
food_df.columns = ['fooditem' , 'Course','Cuisine','Ingredients' , 'Name' , 'Nutrients.Calories' , 
                      'Nutrients.Carbohydrates','Nutrients.Sugar' 
                   ,'allowedAllergy','allowedDiet','allowedIngredient','sugarLevel']
food_df.head(3)

Unnamed: 0,fooditem,Course,Cuisine,Ingredients,Name,Nutrients.Calories,Nutrients.Carbohydrates,Nutrients.Sugar,allowedAllergy,allowedDiet,allowedIngredient,sugarLevel
0,0,['Main Dishes'],,"['4 cups zucchini, cubed', '1 cup green peas',...",Zucchini and Green Peas Coconut Curry,302.13 kcal,19.55 grams,10.08 grams,Dairy-Free,Pescetarian,,High
1,1,['Main Dishes'],,"['1 tablespoon chile powder', '1 tablespoon pa...",10-Minute Maple-Crusted Salmon,409.81 kcal,13.59 grams,10.83 grams,Dairy-Free,Pescetarian,,High
2,2,['Main Dishes'],['Asian'],"['2 cups dried shiitakes (12-16 mushrooms)', '...",Crispy & Chewy Sesame Shiitake,331.11 kcal,38.23 grams,11.2 grams,Dairy-Free,Pescetarian,,High


In [26]:
 to_drop = ['Course','Cuisine' , 'allowedAllergy' , 'allowedDiet', 'allowedIngredient'
           ]
food_df.drop(to_drop, inplace=True, axis=1)

food_df.head(3)

Unnamed: 0,fooditem,Ingredients,Name,Nutrients.Calories,Nutrients.Carbohydrates,Nutrients.Sugar,sugarLevel
0,0,"['4 cups zucchini, cubed', '1 cup green peas',...",Zucchini and Green Peas Coconut Curry,302.13 kcal,19.55 grams,10.08 grams,High
1,1,"['1 tablespoon chile powder', '1 tablespoon pa...",10-Minute Maple-Crusted Salmon,409.81 kcal,13.59 grams,10.83 grams,High
2,2,"['2 cups dried shiitakes (12-16 mushrooms)', '...",Crispy & Chewy Sesame Shiitake,331.11 kcal,38.23 grams,11.2 grams,High


In [44]:
t = pd.DataFrame()
t = food_df.loc[ food_df['sugarLevel'] == 'High']
t.shape

(471, 7)

In [27]:
import re
def clean_df(column):
    column = re.sub(r'([^\.\s\w]|_)+', '', column).replace(".", ". ")    
    return column

def clean_units(column):
    column = column.replace("grams","")
    column = column.replace("kcal","")
    return column


In [28]:

food_df['Ingredients'] = food_df['Ingredients'].map(lambda j: clean_df(j))
food_df['Name'] = food_df['Name'].map(lambda j: clean_df(j))
food_df['Nutrients.Calories'] = food_df['Nutrients.Calories'].map(lambda j: clean_units(str(j)))
food_df['Nutrients.Carbohydrates'] = food_df['Nutrients.Carbohydrates'].map(lambda j: clean_units(str(j)))
food_df['Nutrients.Sugar'] = food_df['Nutrients.Sugar'].map(lambda j: clean_units(str(j)))
food_df.head(3)

Unnamed: 0,fooditem,Ingredients,Name,Nutrients.Calories,Nutrients.Carbohydrates,Nutrients.Sugar,sugarLevel
0,0,4 cups zucchini cubed 1 cup green peas 2 cups ...,Zucchini and Green Peas Coconut Curry,302.13,19.55,10.08,High
1,1,1 tablespoon chile powder 1 tablespoon paprika...,10Minute MapleCrusted Salmon,409.81,13.59,10.83,High
2,2,2 cups dried shiitakes 1216 mushrooms Boiling ...,Crispy Chewy Sesame Shiitake,331.11,38.23,11.2,High


In [29]:
le = preprocessing.LabelEncoder()
df_encoded = food_df.apply(le.fit_transform)
df_encoded.columns = ['fooditem' , 'e_infredients' , 'e_Name' , 'e_Nutrients.Calories' , 
                      'e_Nutrients.Carbohydrates','e_Nutrients.Sugar' ,'e_sugarLevel']
df_encoded.shape
df_encoded.head(3)

Unnamed: 0,fooditem,e_infredients,e_Name,e_Nutrients.Calories,e_Nutrients.Carbohydrates,e_Nutrients.Sugar,e_sugarLevel
0,0,705,902,395,271,132,0
1,1,280,0,550,138,198,0
2,2,458,269,440,537,233,0


In [31]:
df = pd.merge(food_df, df_encoded, on ='fooditem')
df.head(3)

Unnamed: 0,fooditem,Ingredients,Name,Nutrients.Calories,Nutrients.Carbohydrates,Nutrients.Sugar,sugarLevel,e_infredients,e_Name,e_Nutrients.Calories,e_Nutrients.Carbohydrates,e_Nutrients.Sugar,e_sugarLevel
0,0,4 cups zucchini cubed 1 cup green peas 2 cups ...,Zucchini and Green Peas Coconut Curry,302.13,19.55,10.08,High,705,902,395,271,132,0
1,1,1 tablespoon chile powder 1 tablespoon paprika...,10Minute MapleCrusted Salmon,409.81,13.59,10.83,High,280,0,550,138,198,0
2,2,2 cups dried shiitakes 1216 mushrooms Boiling ...,Crispy Chewy Sesame Shiitake,331.11,38.23,11.2,High,458,269,440,537,233,0


In [32]:
# *************** cosine similarity *********************

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity  

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df_encoded)
#print(tfidf_matrix)
cosine = cosine_similarity(df_encoded[0:1], df_encoded)
#print(cosine)
cosine.sort()
print(cosine[0][-5:])

[ 0.97855364  0.97936525  0.98843895  0.99157941  1.        ]


In [33]:
#************************* neighbourhood similarity *********************

import numpy as np
from sklearn.neighbors import NearestNeighbors



# Next we will instantiate a nearest neighbor object, and call it nbrs. Then we will fit it to dataset X.
model = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(df_encoded)

# Let's find the k-neighbors of each point in object X. To do that we call the kneighbors() function on object X.
distances, indices = model.kneighbors(df_encoded)

# Let's print out the indices of neighbors for each record in object X.
indices

array([[  0,  91, 183,  77, 129],
       [  1,  92, 171, 106, 117],
       [  2,  67,  34,  19,  75],
       ..., 
       [938, 859, 684, 752, 804],
       [939, 781, 754, 914, 867],
       [940, 937, 924, 931, 830]], dtype=int64)

In [34]:
#sample
# u, i = nbrs.kneighbors([[2,
# 370,
# 216,
# 370,
# 458,
# 216,
# 0]])
     
# print(i)
already_consumed_fooditems = [ 'INDIAN CHOLE', 'Grilled Marinated Flank Steak']
result = []
for item in already_consumed_fooditems:
    row = df.loc[df['Name'] == item]
    #print(row)
    match = []
    match.append(row['fooditem'].values[0])
    match.append( row['e_infredients'].values[0])
    match.append( row['e_Name'].values[0])
    match.append( row['e_Nutrients.Calories'].values[0])
    match.append( row['e_Nutrients.Carbohydrates'].values[0])
    match.append( row['e_Nutrients.Sugar'].values[0])
    match.append( row['e_sugarLevel'].values[0])
    #print(match)
    #fooditem e_infredients e_Name e_Nutrients.Calories e_Nutrients.Carbohydrates e_Nutrients.Sugar e_sugarLevel
    u, i =model.kneighbors([ match ])
    result = result + list(i[0])
    print(list(i[0]))
    print('**********')

print(result)

[15, 67, 35, 75, 122]
**********
[30, 145, 63, 7, 22]
**********
[15, 67, 35, 75, 122, 30, 145, 63, 7, 22]


In [37]:
for k in result:
    
    row = df.loc[df['e_Name'] == k]
    print(row['Name'].values[0] ," : ",  row['Nutrients.Sugar'].values[0])

Adorable Heart Shaped Brownies  :  0.06 
Bacon Fat Spice Cookies  :  11.25 
Asian Shrimp Scampi  :  11.38 
Bacon Wrapped Caramelized Sesame Asparagus  :  4.74 
Bell Pepper Chicken Burgers  :  0.83 
Asian Beef and Cabbage Salad  :  0.19 
Blueberry Banana Oatmeal Smoothie  :  11.29 
Bacon Chive Deviled Eggs  :  0.29 
3 ingredient Chia Pudding  :  7.35 
Andouille Sausage Jambalaya with Shrimp  :  5.93 


In [38]:
from sklearn.externals import joblib
joblib.dump(model, 'ZenHealthAppEngine/models/similarity.pkl')

['ZenHealthAppEngine/models/similarity.pkl']

In [39]:
# test model
test_model = joblib.load('ZenHealthAppEngine/models/similarity.pkl') 

In [40]:
user_food_df = pd.read_csv('ZenHealthAppEngine/dataset/user_fooditem.csv')
user_food_df.head()

test = pd.merge(user_food_df, food_df, on='fooditem')
test.head(3)
#test.shape

Unnamed: 0,userid,fooditem,consumptions,Ingredients,Name,Nutrients.Calories,Nutrients.Carbohydrates,Nutrients.Sugar,sugarLevel
0,user1,431,3,1 10 ounce package frozen shelled edamame 1 ta...,Chipotle Lime Edamame,27.16,1.31,0.22,Low
1,user1,432,4,1 cup maize flour makai ka atta 34 cup plain f...,Corn Tortillas,73.95,13.45,0.1,Low
2,user1,433,2,1 10 inch Tortilla wrap 34 Tablespoons of Sout...,Southwest Hummus Wraps,99.2,13.86,0.83,Low


In [41]:
users = {}

distinct_users=np.unique(user_food_df['userid'])
for user in distinct_users:
    data =test[test['userid'] == user]['Name'].values
    #print(data)
    if user in users:
        users[user].append(data[0])
    else:
        users[user] = list(data)
     #already_consumed_fooditems.append(row[''])

print(users)

{'user1': ['Chipotle Lime Edamame', 'Corn Tortillas', 'Southwest Hummus Wraps', 'Oven Roasted Cabbage Steaks', 'Chipotles Cilantro Lime Rice', 'Homemade Chili Paste with Dried Chilies'], 'user10': ['Macedonian Kebabs Kebapchinja', 'Kerala Style Mutton Potato Curry', 'Mutton and Yellow Split Peas Curry', 'Andhra Mutton Fry RecipeHow to make mutton fry andhra style', 'Mutton Curry Punjabi Style', 'Mutton Chaap Recipe Pakistani Mutton Chops Fry'], 'user15': ['Homemade Pumpkin Pie Spice', 'Clean Eating Peanut Butter Fat Bombs', 'Clean Eating Almond Butter Fudge', 'Sugar Free Strawberry Jelly', 'Wonton Wrappers', 'Paleo Tortilla Chips', 'Adorable Heart Shaped Brownies'], 'user2': ['Garlic Roast Beef', 'Shredded Mexican Beef', 'Garlic Bacon Pot Roast', 'Roasted Bone Marrow', 'Garlic Crusted Prime Rib Roast', 'Garlic Beef Stirfry', 'Simple Roast Beef'], 'user20': ['Slow Cooker Turkey Bacon Avocado and Bean Soup', 'SlowCooker Bacon Corn Chowder', 'Egg Drop Soup'], 'user30': ['Easy Lo Mein', 'R

In [42]:

rec = {}
for user, already_consumed_fooditems in users.items():
    result = []
    
    for item in already_consumed_fooditems:
        
        row = df.loc[df['Name'] == item]
        #print(row)
        match = []
        match.append(row['fooditem'].values[0])
        match.append( row['e_infredients'].values[0])
        match.append( row['e_Name'].values[0])
        match.append( row['e_Nutrients.Calories'].values[0])
        match.append( row['e_Nutrients.Carbohydrates'].values[0])
        match.append( row['e_Nutrients.Sugar'].values[0])
        match.append( row['e_sugarLevel'].values[0])
        #print(match)
        #fooditem e_infredients e_Name e_Nutrients.Calories e_Nutrients.Carbohydrates e_Nutrients.Sugar e_sugarLevel
        u, i =model.kneighbors([ match ])
        result = result + list(i[0])
        print(list(i[0]))
        print('**********')
        for k in result:
            
    rec[user] = result

print(rec)

IndentationError: expected an indented block (<ipython-input-42-819069cf80a5>, line 26)

In [43]:
names = []
for user,result in rec.items():
    for k in result:           
        row = df.loc[encoded_decode['DecName'] == k]
        print(row)
        name = print(row['Name'].values[0] ," : ",  row['Nutrients.Sugar'].values[0])
        names.append(name)

NameError: name 'rec' is not defined