In [37]:
#Import Library
from time import time
import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

import pandas as pd
import numpy as np
import scipy as sp

from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [38]:
#------------------------ matrix factorization usign SVD approach-----------------------------------

user_food_df = pd.read_csv('ZenHealthAppEngine/dataset/user_fooditem.csv')
user_food_df.head()

Unnamed: 0,userid,fooditem,consumptions
0,user1,431,3
1,user1,432,4
2,user1,433,2
3,user1,434,1
4,user1,435,4


In [39]:
utility_df = user_food_df.pivot(index = 'userid', columns ='fooditem', values = 'consumptions').fillna(0)
utility_df.head()

fooditem,1,2,3,4,5,6,11,12,13,14,...,583,584,585,586,587,588,589,731,732,733
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
user1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
user10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
user15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.0,3.0,2.0,6.0,7.0,4.0,4.0,0.0,0.0,0.0
user2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
user20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,2.0,4.0


In [40]:

food_df = pd.read_csv('ZenHealthAppEngine/dataset/fooditem.csv')
food_df.head(3)

Unnamed: 0,fooditem,Course,Cuisine,Ingredients,Name,Nutrients.Calories,Nutrients.Carbohydrates,Nutrients.Sugar,allowedAllergy,allowedDiet,allowedIngredient,sugarLevel
0,0,['Main Dishes'],,"['4 cups zucchini, cubed', '1 cup green peas',...",Zucchini and Green Peas Coconut Curry,302.13 kcal,19.55 grams,10.08 grams,Dairy-Free,Pescetarian,,High
1,1,['Main Dishes'],,"['1 tablespoon chile powder', '1 tablespoon pa...",10-Minute Maple-Crusted Salmon,409.81 kcal,13.59 grams,10.83 grams,Dairy-Free,Pescetarian,,High
2,2,['Main Dishes'],['Asian'],"['2 cups dried shiitakes (12-16 mushrooms)', '...",Crispy & Chewy Sesame Shiitake,331.11 kcal,38.23 grams,11.2 grams,Dairy-Free,Pescetarian,,High


In [41]:
utility_matrix = utility_df.as_matrix()
utility_mean = np.mean(utility_matrix , axis = 1)
utility_demeaned = utility_matrix- utility_mean.reshape(-1, 1)
utility_demeaned.shape

(17, 99)

In [42]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(utility_demeaned)
sigma = np.diag(sigma)

In [52]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + utility_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = utility_df.columns)
print(preds_df)
preds_df.shape

fooditem       1         2         3         4         5         6    \
0         0.132364  0.149791  0.158505  0.167218  0.175932  0.149791   
1         0.116368  0.126813  0.132035  0.137257  0.142479  0.126813   
2        -0.645662 -0.392576 -0.266033 -0.139490 -0.012947 -0.392576   
3         0.139529  0.160663  0.171231  0.181798  0.192365  0.160663   
4         0.089306  0.098724  0.103433  0.108141  0.112850  0.098724   
5        -0.729162 -0.462586 -0.329297 -0.196009 -0.062721 -0.462586   
6        -0.317657 -0.160135 -0.081374 -0.002614  0.076147 -0.160135   
7        -0.506221 -0.295488 -0.190121 -0.084754  0.020612 -0.295488   
8         0.044585  0.099893  0.127547  0.155201  0.182855  0.099893   
9        -0.654641 -0.403236 -0.277534 -0.151831 -0.026128 -0.403236   
10        0.071613  0.077375  0.080255  0.083136  0.086016  0.077375   
11        7.578076  5.755455  4.844144  3.932834  3.021524  5.755455   
12        0.105403  0.133065  0.146896  0.160727  0.174559  0.13

(17, 99)

In [50]:
def recommend_food(predictions_df, userID, food_df, original_ratings_df, num_recommendations=5):
    
    # Get and sort the user's predictions
   
    sorted_user_predictions = predictions_df.loc[predictions_df['userid'] == userID].sort_values(ascending=False)
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.UserID == (userID)]
    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'fooditem', right_on = 'fooditem').
                     sort_values(['Rating'], ascending=False)
                 )

    print('User {0} has already rated {1} foods.'.format(userID, user_full.shape[0]))
    print('Recommending the highest {0} predicted ratings food not already rated.'.format(num_recommendations))
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (food_df[~food_df['fooditem'].isin(user_full['fooditem'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'fooditem',
               right_on = 'fooditem').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations

already_rated, predictions = recommend_food(preds_df, 'user1', food_df, utility_df, 10)

KeyError: 'userid'

In [45]:
# from scikits.crab.models import MatrixPreferenceDataModel
# #Build the model
# model = MatrixPreferenceDataModel(movies.data)

# from scikits.crab.metrics import pearson_correlation
# from scikits.crab.similarities import UserSimilarity
# #Build the similarity
# similarity = UserSimilarity(model, pearson_correlation)

# from crab.recommenders.knn import UserBasedRecommender
# #Build the User based recommender
# recommender = UserBasedRecommender(model, similarity, with_preference=True)
# #Recommend items for the user 5 (Toby)
# recommender.recommend(5)