In [1]:
# Mounting drive...
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
# Important Libraries...
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Loading Data
data = pd.read_csv('/content/drive/MyDrive/Semester-2/Recommendation System/Assignment_2/RAW_interactions.csv')
data.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


In [3]:
# Selecting therelevant columns...
data = data[['user_id', 'recipe_id', 'rating']]
data.head()

Unnamed: 0,user_id,recipe_id,rating
0,38094,40893,4
1,1293707,40893,5
2,8937,44394,4
3,126440,85009,5
4,57222,85009,5


In [4]:
# Split the dataset into train and test set...
train_data, test_data = train_test_split(data,test_size=0.2)

In [5]:
train_data.shape

(905893, 3)

In [6]:
test_data.shape

(226474, 3)

In [12]:
test_data.head()

Unnamed: 0,user_id,recipe_id,rating
381961,423778,64364,5
642423,172760,30698,5
1055279,2356347,323749,0
465277,134972,95930,4
847234,129201,135664,5


In [7]:
# Create a dictionary to map user and recipe IDs to matrix indices
user_id_to_index = {id: i for i, id in enumerate(train_data['user_id'].unique())}
recipe_id_to_index = {id: i for i, id in enumerate(train_data['recipe_id'].unique())}

# Create the sparse user-item matrix
data = train_data['rating'].values
row_indices = np.array([user_id_to_index[id] for id in train_data['user_id']])
col_indices = np.array([recipe_id_to_index[id] for id in train_data['recipe_id']])
train_matrix = csr_matrix((data, (row_indices, col_indices)), shape=(len(user_id_to_index), len(recipe_id_to_index)))

In [17]:
# Create a dictionary to map recipe index to recipe ID
index_to_recipe_id = {}
for recipe_id, recipe_index in recipe_id_to_index.items():
    index_to_recipe_id[recipe_index] = recipe_id

In [15]:
print(train_matrix)

  (0, 0)	5
  (0, 2)	5
  (0, 73)	5
  (0, 122)	4
  (0, 140)	5
  (0, 228)	5
  (0, 237)	5
  (0, 254)	5
  (0, 260)	5
  (0, 287)	5
  (0, 302)	5
  (0, 593)	4
  (0, 597)	5
  (0, 650)	5
  (0, 677)	5
  (0, 772)	5
  (0, 790)	5
  (0, 816)	5
  (0, 899)	5
  (0, 918)	5
  (0, 993)	5
  (0, 1052)	5
  (0, 1148)	5
  (0, 1154)	5
  (0, 1230)	5
  :	:
  (192705, 159341)	4
  (192706, 93781)	5
  (192707, 86371)	4
  (192708, 211137)	5
  (192709, 10851)	4
  (192710, 521)	1
  (192711, 117602)	5
  (192712, 149684)	4
  (192713, 8657)	4
  (192714, 26568)	5
  (192715, 128480)	5
  (192716, 114506)	5
  (192717, 2717)	5
  (192718, 220)	5
  (192719, 649)	5
  (192720, 50989)	5
  (192721, 6150)	5
  (192722, 3633)	5
  (192723, 14720)	5
  (192724, 34910)	5
  (192725, 8442)	5
  (192726, 41549)	0
  (192727, 51234)	4
  (192728, 9696)	5
  (192729, 211145)	5


In [8]:
# Convert the user-item matrix into a sparse matrix
train_sparse = csr_matrix(train_matrix)

# Compute the cosine similarity matrix incrementally
batch_size = 1000
num_items = len(recipe_id_to_index)
similarity_matrix = np.zeros((num_items, num_items))

for i in range(0, num_items, batch_size):
    start = i
    end = min(i + batch_size, num_items)
    similarity_matrix[start:end, start:end] = cosine_similarity(train_sparse[:, start:end].T)


In [9]:
similarity_matrix.shape

(211146, 211146)

In [10]:
# For Baseline estimation

# Calculate the overall average rating
overall_avg_rating = train_data['rating'].mean()

# Calculate the average rating for each recipe and user
recipe_avg_rating = train_data.groupby('recipe_id')['rating'].mean()
user_avg_rating = train_data.groupby('user_id')['rating'].mean()

# Calculate the baseline rating for each recipe and user
train_data['baseline'] = train_data.apply(lambda x: overall_avg_rating + 
                                  recipe_avg_rating.loc[x['recipe_id']] - 
                                  user_avg_rating.loc[x['user_id']], axis=1)


In [19]:
# For Prediction

# Define a function to make predictions for a single user and recipe
# def predict(user_id, recipe_id, k=10):
#     # Find k most similar recipes
#     similarities = list(enumerate(similarity_matrix[recipe_id]))
#     similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[:k]
#     similar_indices = [i[0] for i in similarities]

#     # Calculate the weighted average of ratings for the k most similar recipes
#     ratings = train_matrix.loc[user_id, similar_indices]
#     sim_scores = similarity_matrix[recipe_id, similar_indices]
#     rating_predictions = np.dot(ratings, sim_scores) / np.sum(sim_scores)

#     # Add baseline rating to the prediction
#     prediction = rating_predictions + overall_avg_rating + \
#                  recipe_avg_rating.loc[recipe_id] - user_avg_rating.loc[user_id]

#     return prediction

def predict(user_id, recipe_id, k=10):
    # Find k most similar recipes
    similarities = list(enumerate(similarity_matrix[recipe_id]))
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[:k]
    similar_indices = [i[0] for i in similarities]

    # Calculate the weighted average of ratings for the k most similar recipes
    ratings = train_matrix[user_id, similar_indices].toarray().flatten()
    sim_scores = similarity_matrix[recipe_id, similar_indices]
    rating_predictions = np.dot(ratings, sim_scores) / np.sum(sim_scores)

    return rating_predictions


In [21]:
# Recommendations...
# Define a function to recommend top-K recipes to a user
def recommend(user_id, k=10):
    # Calculate the predicted ratings for all recipes
    predicted_ratings = []
    unrated_recipe_indices = train_matrix[user_id,:].nonzero()[1]
    for recipe_index in unrated_recipe_indices:
       for recipe_index in unrated_recipe_indices:
        # Map the recipe index to the recipe ID
        recipe_id = index_to_recipe_id[recipe_index]
        predicted_rating = predict(user_id, recipe_id)
        predicted_ratings.append((recipe_id, predicted_rating))
        # predicted_ratings = sorted(predicted_ratings, key=lambda x: x[1], reverse=True)[:k]
    # for recipe_id in train_matrix.columns:
    #     # Ignore recipes that the user has already rated
    #     if pd.isna(train_matrix.loc[user_id, recipe_id]):
    #         predicted_rating = predict(user_id, recipe_id)
    #         predicted_ratings.append((recipe_id, predicted_rating))

    # Sort the recipes by predicted rating in descending order
    predicted_ratings = sorted(predicted_ratings, key=lambda x: x[1], reverse=True)

    # Return the top-K recipes
    top_k_recipes = [x[0] for x in predicted_ratings[:k]]

    return top_k_recipes

# Make recommendations for all users in the test set
# test_users = test_data['user_id'].unique()
recommendations = []
# for user_id in test_users:
user_id = 134972
top_k_recipes = recommend(user_id)
recommendations.append((user_id, top_k_recipes))
recommendations

[(134972, [148921])]

In [26]:
import numpy as np
from sklearn.metrics import precision_score, recall_score

actual_ratings = []
for user_id in test_data['user_id']:
    if user_id in test_data['user_id']:
        actual_ratings.append(test_data['rating'].loc[user_id])
    else:
        actual_ratings.append(None)

# Get top-K recommendations for test users
k = 10
recommendations = [recommend(user_id, k) for user_id in test_data['user_id']]
flattened_recommendations = np.array(recommendations).ravel()

# Calculate precision and recall scores for top-K recommendations
precision = precision_score(actual_ratings, flattened_recommendations, average='micro')
recall = recall_score(actual_ratings, flattened_recommendations, average='micro')


IndexError: ignored

In [2]:
import numpy as np
import pandas as pd
from scipy.sparse import lil_matrix
from scipy.sparse import csr_matrix

# Load data
recipes_df = pd.read_csv('/content/drive/MyDrive/Semester-2/Recommendation System/Assignment_2/RAW_recipes.csv')
interactions_df = pd.read_csv('/content/drive/MyDrive/Semester-2/Recommendation System/Assignment_2/RAW_interactions.csv')



In [3]:
# Create a sparse matrix from interactions data
train_data = interactions_df[['user_id', 'recipe_id', 'rating']]

# Create a dictionary to map user and recipe IDs to matrix indices
user_id_to_index = {id: i for i, id in enumerate(train_data['user_id'].unique())}
recipe_id_to_index = {id: i for i, id in enumerate(train_data['recipe_id'].unique())}

# Create the sparse user-item matrix
data = train_data['rating'].values
row_indices = np.array([user_id_to_index[id] for id in train_data['user_id']])
col_indices = np.array([recipe_id_to_index[id] for id in train_data['recipe_id']])
train_matrix = csr_matrix((data, (row_indices, col_indices)), shape=(len(user_id_to_index), len(recipe_id_to_index)))


# Calculate the similarity between recipes
item_sim_matrix = np.dot(train_matrix.T, train_matrix)

# Normalize the similarity matrix
norms = np.array([np.sqrt(np.diagonal(item_sim_matrix, axis1=0, axis2=1))])
item_sim_matrix = item_sim_matrix / norms / norms.T

# Reshape the similarity matrix to have two dimensions
item_sim_matrix = np.reshape(item_sim_matrix, (item_sim_matrix.shape[0], item_sim_matrix.shape[1]))

def recommend(user_id, k):
    # Calculate the predicted ratings for all recipes
    predicted_ratings = []
    unrated_recipe_indices = train_matrix[user_id,:].nonzero()[1]
    for recipe_index in unrated_recipe_indices:
        similarity_scores = item_sim_matrix[recipe_index, train_matrix[user_id,:].nonzero()[1]]
        weighted_ratings = similarity_scores * train_matrix[user_id, train_matrix[user_id,:].nonzero()[1]]
        predicted_ratings.append((recipe_index, weighted_ratings.sum() / similarity_scores.sum()))

    # Sort the predicted ratings and get the top-K recommendations
    predicted_ratings.sort(key=lambda x: x[1], reverse=True)
    top_k_indices = [i[0] for i in predicted_ratings][:k]

    # Print the recommended recipe names with their ingredients
    recommended_recipes = recipes_df[recipes_df['recipe_id'].isin(top_k_indices)]
    for index, row in recommended_recipes.iterrows():
        print(f"{row['recipe_name']} - {row['ingredients']}")

# Example usage:
recommend(5, 10)  # Recommends 10 recipes to user with user_id = 5


ValueError: ignored