In [1]:
# Mounting Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
# Importing Libraries
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the Data...
recipes = pd.read_csv('/content/drive/MyDrive/Semester-2/Recommendation System/Assignment_2/RAW_recipes.csv')
data = pd.read_csv('/content/drive/MyDrive/Semester-2/Recommendation System/Assignment_2/RAW_interactions.csv')
data.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


In [13]:
recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [3]:
# Convert user_id and recipe_id columns to integers
data['user_id'] = data['user_id'].astype(int)
data['recipe_id'] = data['recipe_id'].astype(int)

# Select the necessary columns and a random sample of 10% of the data
data = data[['user_id', 'recipe_id', 'rating']].sample(frac=0.1)

In [4]:
# Select the top 10000 users who have rated the most number of items
user_counts = data['user_id'].value_counts()
top_users = user_counts.index[:10000]
data = data[data['user_id'].isin(top_users)]

In [5]:
# Select the top 10000 recipes that have received the most ratings
item_counts = data['recipe_id'].value_counts()
top_items = item_counts.index[:10000]
data = data[data['recipe_id'].isin(top_items)]

In [6]:
# Compute the baseline estimate for each user and item
mean_rating = data['rating'].mean()
user_bias = data.groupby('user_id')['rating'].mean() - mean_rating
item_bias = data.groupby('recipe_id')['rating'].mean() - mean_rating

In [9]:
# Compute the sparse user-item rating matrix
ratings_matrix = coo_matrix((data['rating'].values, (data['user_id'].values, data['recipe_id'].values)))

In [10]:
# Compute the item-item similarity matrix using cosine similarity
item_sim = cosine_similarity(ratings_matrix.T)

In [17]:
# Create a dictionary to map item IDs to item names
id_to_item = {}
for i, row in recipes.iterrows():
    item_id = row['id']
    item_name = row['name']
    id_to_item[item_id] = item_name

In [18]:
# Create a dictionary to map item IDs to ingredients
id_to_ingredients = {}
for i, row in recipes.iterrows():
    item_id = row['id']
    ingredients = row['ingredients']
    id_to_ingredients[item_id] = ingredients

In [11]:
# Function to get the top k similar items for a given item
def get_similar_items(item_id, k):
    return item_sim[item_id].argsort()[:-k-1:-1]

In [12]:
# Function to predict the rating for a user and item using baseline estimation and item-item collaborative filtering
def predict_rating(user_id, item_id):
    rating = mean_rating + user_bias[user_id] + item_bias[item_id]
    sim_scores = item_sim[item_id]
    item_ids = ratings_matrix[:,user_id].indices
    sim_scores = sim_scores[item_ids]
    ratings = ratings_matrix[item_ids,user_id].toarray().ravel()
    weighted_ratings = np.sum(sim_scores * ratings)
    sum_of_sim_scores = np.sum(sim_scores)
    if sum_of_sim_scores != 0:
        rating += weighted_ratings / sum_of_sim_scores
    return rating

In [19]:
# Function to recommend top n recipes and respective ingredients for a given user
def recommend(user_id, n):
    # Convert the ratings matrix to csc format
    ratings_csc = ratings_matrix.tocsc()

    # Get the indices of items that the user has rated
    item_ids = ratings_csc[:,user_id].indices

    # Compute the predicted rating for each item
    scores = [predict_rating(user_id, item_id) for item_id in item_ids]

    # Get the top n items based on their predicted score
    top_items = sorted(zip(item_ids, scores), key=lambda x: x[1], reverse=True)[:n]

    # Get the recipe names and ingredients for the top items
    top_item_names = [id_to_item[item_id] for item_id, score in top_items]
    top_item_ingredients = [id_to_ingredients[item_id] for item_id, score in top_items]

    return top_item_names, top_item_ingredients


In [31]:
user_id = 126440
n = 5
Items, Ingredients = recommend(int(user_id),int(n))

In [32]:
print(Items,Ingredients)

[] []
