In [1]:
import json,pandas as pd, requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from surprise import Reader, Dataset, SVD
from collections import defaultdict

In [2]:
# url= "http://flask-test-aramark.azurewebsites.net/admin/api/rating/"
url = "http://markoeats-test.azurewebsites.net/admin/api/rating/"
response = requests.get(url)
user_data = response.json()

In [3]:
# url = "http://flask-test-aramark.azurewebsites.net/admin/api/meals/"
url = "http://markoeats-test.azurewebsites.net/admin/api/meals/"
response = requests.get(url)
service_recipe = response.json()

In [4]:
with open("./Urban eatery data.json") as fp:
    service_recipe = json.load(fp)

In [5]:
def user_recommendations_consolidated_without_mealperiod(service_recipe,user_data,user_id):
    
    '''
    DESCRIPTION
    ==============
    
    This is the main function for Hybrid Recommender.
    Gives recommendations for a User Id based on meal period.

    Parameters:
    ===========

    service_recipe : Response Data from Aramark service_recipe API.

    user_data : Response from Rating API.

    user_id (int) : User ID for whom recommendations should be fetched. 

    meal_period (int) : Meal Period for which recommendations should be fetched. 
    
    About Sub-Functions:   
    1) user_dish_list(user_id): Expects the user id of the user for whom the recommendations have to be made as an argument. 
                                Returns a list of dishes consumed by user with rating 4 or 5 and 
                                a dictionary of all the dishes consumed by the user.
                                
    2) hybrid(user_id, recipe_id): Expects the user id and recipie id as arguments and returns the list of similar recipe id's
                                   to that particular recipe id based on SVD algorithm.
                                   
    3) content_recommendations(Recipe_id): Expects recipe id as argument and returns the list of recipe id's 
                                           similar to that recipe id based on cosine similarity.
                                           
    4) get_hybrid_recommendations(): Getter method, which iterates over the list returend by user_dish_list method and
                                     calls hybrid method with user id and recipe id and rerturns the list of hybrid recommendations.
                                     
    5) get_content_recommendations(): Getter method, which iterates over the list returend by user_dish_list method and
                                      calls content recommendation method with the recipe id 
                                      and rerturns the list of content recommendations.
                                      
    6) rating_verification(hybrid_list_temp): Expects the final hybrid list as the argument and compares the recipe id's
                                              with user consumption history and
                                              sorts them in order (not consumed, consumed in descending order) before
                                              returning the sorted list.
    
    7) dict_creation(j): Expects recipe dict as argument and creates a json format as requested by back-end team.
    
    8) Meal_Period_Verification(temp): Expects list of final hybrid recommendation list and verifies the meal-period
                                       on which they are being served before calling the dict creation method and
                                       returns the final dictionary.
                                              
    ================================================================================================================

    '''
    
    #Iterate over aramark service recipe data and create data frame.
    
    recipe_ing_list = []
    for recipe in service_recipe['results']: 
        temp_list = []    
        for ing in recipe['ingredients']: 
            if(ing["ingredient_id"]==None):
                temp_list.append(ing['sub_assembly_id'])
            else:
                temp_list.append(ing['ingredient_id'])
        ing_dict = {}
        temp_list = str(temp_list)[1:-1]
        ing_dict.update({'Recipe_id': recipe['recipe_id'], 'Recipe_name' : recipe['recipe_name'], 'meal_period' : recipe['meal_period_id'], 'ingredient_data':temp_list })
        recipe_ing_list.append(ing_dict)


    recipe_ing_df = pd.DataFrame(recipe_ing_list)   
    recipe_ing_df = recipe_ing_df.assign(var1=recipe_ing_df['meal_period'].str.split(',')).explode('meal_period').drop(columns=['var1'])
    recipe_ing_df.sort_values(by=['meal_period'], inplace = True) 
            
    user_ratings = pd.DataFrame(columns=['Recipe_id', 'Recipe_name','Meal_period', 'Rating'])


    # Code to append the User Consumption data coming from Rating API into a dataframe.

    new_list = []
    new_list.extend(user_data['results'])
    user_ratings = pd.DataFrame(new_list)
    user_ratings = user_ratings[user_ratings['meal_period'] != 4].copy()
    user_ratings.drop('id', axis=1, inplace=True)
    user_ratings.rename(columns = {'meal_period':'Meal_period','recipe_name':'Recipe_name','profile_id':'User_ID','recipe_id':'Recipe_id','rating':'Rating'}, inplace = True)

    ### Calculating the Average ratings of the dish and Rating counts and creating a dataframe
    avg_rating =  user_ratings.groupby('Recipe_id')['Rating'].mean()
    ratings_mean_count = pd.DataFrame(avg_rating)
    ratings_mean_count['Recipe_id'] = ratings_mean_count.index
    ratings_mean_count['Rating_counts'] = pd.DataFrame(user_ratings.groupby('Recipe_id')['Rating'].count())
    ratings_mean_count.index.names = ['index']
    ratings_mean_count.rename(columns={"Rating": "Avg_rating"}, inplace = True)

    ingredient_data_df = recipe_ing_df.drop_duplicates(subset='Recipe_name', keep='first')
    

    # Content Based: Vectoring on Ingredient ID data using TFIDF

    tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 1),min_df=0, stop_words='english')
    tfidf_matrix = tf.fit_transform(ingredient_data_df['ingredient_data'])
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    content_df = ingredient_data_df.reset_index(drop = True)
    recipes = content_df['Recipe_id']
    indices = pd.Series(content_df.index, index=content_df['Recipe_id'])

    # Collabortive Filtering Algorithm using SVD Algorithm:    
    
    reader = Reader()
    data = Dataset.load_from_df(user_ratings[['User_ID', 'Recipe_id','Rating']], reader)
    trainset = data.build_full_trainset()
    algo = SVD()
    algo.fit(trainset) 

    id_map = ingredient_data_df[['Recipe_id','Recipe_name']]
    id_map = id_map.set_index('Recipe_id',drop=False)

    # Get the User consumed/rated dishes

    def user_dish_list(user_id):
        
        user_dish_rating_dict = {}
        mask1 = user_ratings['Rating'] >= 4 # Get the dishes rated 4 and above by the user
        mask2 = user_ratings['User_ID'] == user_id # Get the dishes of the particular user
        
        if user_ratings[ mask1 & mask2 ].empty :
            user_dishes_df = user_ratings[mask2]            
        else:
            user_dishes_df = user_ratings[ mask1 & mask2]
        
        if not user_ratings[ mask2 ].empty:
            user_dishes_df_temp = user_ratings[mask2 ] 
            #print(user_dishes_df_temp)
            user_dish_rating_dict = pd.Series(user_dishes_df_temp.Rating.values,index=user_dishes_df_temp.Recipe_id).to_dict()  
         
        return list(set(user_dishes_df.sample(20, replace = True)['Recipe_id'])), user_dish_rating_dict
    
    # Defining the hybrid recommender based on SVD algorithm which works on Content and Collaboraives Algorithms:

    def hybrid(user_id, recipe_id):
        idx = indices[recipe_id] # gives the index value of the recipe/dish
        recipe_id = id_map.loc[recipe_id]['Recipe_id']
        sim_scores = list(enumerate(cosine_sim[idx])) # Similarity scores based on content based
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) # Gives the score/values of the dishes similar to the mentioned dish
        sim_scores = sim_scores[1:20]

        recipe_indices = [i[0] for i in sim_scores]
        recipe_names = content_df.iloc[recipe_indices,:]  
        recipe_names = recipe_names.merge(ratings_mean_count,left_on='Recipe_id' , right_index=True,left_index=False)
        recipe_names = recipe_names[['Recipe_id', 'Recipe_name','Rating_counts','Avg_rating']]
        recipe_names['est'] = [algo.predict(user_id,x).est for x in recipe_names['Recipe_id'] ] # Finding the similar dishes based on the rating.
        recipe_names = recipe_names.sort_values(by=['est', 'Avg_rating'], ascending=False)
        #print("EST values :\n",recipe_names)
        return (recipe_names['Recipe_id'])[:20] # Taking only 20 dishes 
    
    def content_recommendations(Recipe_id): # Defining Content Based
        idx = indices[Recipe_id]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:20] # Considering top 30 similar 
        recipe_indices = [i[0] for i in sim_scores]
        content_output = recipes.iloc[recipe_indices]
        
        return content_output.tolist() 
    
    
    current_day_list = ingredient_data_df['Recipe_id'].to_list() # Gives the each day recipe's
    
    dish_list, user_dish_rating3_dict = user_dish_list(user_id) # User consumed dishes
    
    print("user consumed dishes : \n", user_dish_rating3_dict)
    #print(b) 
    def get_hybrid_recommendations(): # Gives the Hybrid recommendations
        sim_dish_set = set()
        sim_dish_list = []

        for dish in dish_list: 
            if dish not in current_day_list: # Checking if the consumed dish is availabe in day's recipe list
                continue
            else:
                recommended_dishes = hybrid(user_id,dish)
                sim_dish_list.extend(list(recommended_dishes))
            sim_dish_set.update(sim_dish_list) # Set removes the repeated item
        return list(sim_dish_set)
    
    def get_content_recommendations(): # Get Content Based Recommendations

        content_sim_dish_set = set()
        content_sim_dish_list = []

        for dish in dish_list: 
            if dish not in current_day_list: # Checking if the dish is availabe in day's recipe list
                continue
            else:
                recommended_dishes = content_recommendations(dish)
                content_sim_dish_list.extend(list(recommended_dishes))
            content_sim_dish_set.update(content_sim_dish_list) # Set removes the repeated item
        return list(content_sim_dish_set)
    
    hybrid_list = get_hybrid_recommendations()    # Hybrid Recommendation list
    
    def rating_verification(hybrid_list_temp): # sort the recommendations based on not-consumed , consumed
        recomendation_list = []
        consumed_list = []
        temp_dict1 ={}
        for dish in hybrid_list_temp:
            if dish in user_dish_rating3_dict:
                    temp_dict1[dish] = user_dish_rating3_dict[dish]
            else:
                recomendation_list.append(dish) 
        res = dict(sorted(temp_dict1.items(), key = lambda x:x[1],reverse=True))
        recomendation_list.extend(res.keys())
        return recomendation_list

    # Creates json structure with required fields.
    def dict_creation(j):
        temp_dict = dict()
        temp_dict['recipe_id'] = j['recipe_id']
        temp_dict['recipe_name'] = j['recipe_name']
        temp_dict['marketing_description'] = j['marketing_description']
        temp_dict['allergen_attributes'] = j['allergen_attributes']
        temp_dict['dietary_attributes'] = j['dietary_attributes']
        temp_dict['new'] = j['new']
        temp_dict['featured'] = j['featured']
        
        return temp_dict
    
    # Verifies the meal period of the recommended dishes.
    def Meal_Period_Verification(temp):
        final_dict = defaultdict(list)
        for i in temp:
            for j in service_recipe['results']:
                if i == j['recipe_id']:
                    if 1 in j['meal_period_id']  : 
                        temp_dict = dict_creation(j)
                        final_dict['1'].append(temp_dict)
                    if 2 in j['meal_period_id'] : 
                        temp_dict = dict_creation(j)
                        final_dict['2'].append(temp_dict)
                    if 3 in j['meal_period_id'] : 
                        temp_dict = dict_creation(j)
                        final_dict['3'].append(temp_dict)
        return final_dict
    
    # Getting Recommendations: If the the hybrid recommedations are less than 5, switching to content output
    if len(hybrid_list) >=5:
        temp = hybrid_list
        recomended_dishes = rating_verification(temp)
        return Meal_Period_Verification(recomended_dishes)
        
    else:
        temp = get_content_recommendations()
        recomended_dishes = rating_verification(temp)
        return Meal_Period_Verification(recomended_dishes)

In [8]:
user_recommendations_consolidated_without_mealperiod(service_recipe,user_data,251)['1']

user consumed dishes : 
 {'M9577': 2, 'M9519': 5, 'M14857': 5, 'M37420': 4, 'M14861': 3, 'M33750': 4, 'M10583': 4, 'M12296': 0, 'M35663': 4, 'M9906': 4}


[{'recipe_id': 'M10588',
  'recipe_name': 'Pork Sausage Patty',
  'marketing_description': 'Sizzling hot golden brown pork sausage patty',
  'allergen_attributes': "OrderedDict([('allergen_statement_not_available', None), ('contains_shellfish', 'NO'), ('contains_peanut', 'NO'), ('contains_tree_nuts', 'NO'), ('contains_milk', 'NO'), ('contains_wheat', 'NO'), ('contains_soy', 'NO'), ('contains_eggs', 'NO'), ('contains_fish', 'NO'), ('contains_added_msg', 'UNKNOWN'), ('contains_hfcs', 'UNKNOWN'), ('contains_mustard', 'UNKNOWN'), ('contains_celery', 'UNKNOWN'), ('contains_sesame', 'UNKNOWN'), ('contains_red_yellow_blue_dye', 'UNKNOWN'), ('gluten_free_per_fda', 'UNKNOWN'), ('non_gmo_claim', 'UNKNOWN'), ('contains_gluten', 'NO')])",
  'dietary_attributes': "OrderedDict([('vegan', 'NO'), ('vegetarian', 'NO'), ('kosher', 'NO'), ('halal', 'NO')])",
  'new': 0,
  'featured': 0},
 {'recipe_id': 'M9519',
  'recipe_name': 'Bacon',
  'marketing_description': 'Crispy bacon',
  'allergen_attributes': 

In [9]:
user_recommendations_consolidated_without_mealperiod(service_recipe,user_data,251)['2']

user consumed dishes : 
 {'M9577': 2, 'M9519': 5, 'M14857': 5, 'M37420': 4, 'M14861': 3, 'M33750': 4, 'M10583': 4, 'M12296': 0, 'M35663': 4, 'M9906': 4}


[{'recipe_id': 'M9869',
  'recipe_name': 'Herb-Roasted Potatoes',
  'marketing_description': 'Oven-roasted cubed potatoes seasoned with lemon-pepper, parsley and paprika',
  'allergen_attributes': "OrderedDict([('allergen_statement_not_available', None), ('contains_shellfish', 'NO'), ('contains_peanut', 'NO'), ('contains_tree_nuts', 'NO'), ('contains_milk', 'NO'), ('contains_wheat', 'NO'), ('contains_soy', 'NO'), ('contains_eggs', 'NO'), ('contains_fish', 'NO'), ('contains_added_msg', 'UNKNOWN'), ('contains_hfcs', 'UNKNOWN'), ('contains_mustard', 'UNKNOWN'), ('contains_celery', 'UNKNOWN'), ('contains_sesame', 'UNKNOWN'), ('contains_red_yellow_blue_dye', 'UNKNOWN'), ('gluten_free_per_fda', 'UNKNOWN'), ('non_gmo_claim', 'UNKNOWN'), ('contains_gluten', 'NO')])",
  'dietary_attributes': "OrderedDict([('vegan', 'YES'), ('vegetarian', 'YES'), ('kosher', 'YES'), ('halal', 'NO')])",
  'new': 0,
  'featured': 0},
 {'recipe_id': 'M20957',
  'recipe_name': 'Vegan Mexican Beef',
  'marketing_descr