<a href="https://colab.research.google.com/github/vipashaaV321/Collaborative-Food-Recipe-Recommendation-System/blob/Vipasha/Item_Item_MF_SVD_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse.linalg import svds
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import train_test_split

### Item - Item collaborative filtering with Matrix factorization

#### SVD with surprise library

#### Load google drive and define base path

In [None]:
Base_path ='/content/drive/MyDrive/Data/RS2'

In [None]:
!pip install surprise

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#### Load Filtered data and applying operations using surprise library

In [None]:
# Load dataset
df = pd.read_csv(Base_path+"/short-recipes-20.csv")

In [None]:
# Define Reader
reader = Reader(rating_scale=(1, 5))

In [None]:
# Load data into Surprise dataset
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)

In [None]:
# Split dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

In [None]:
testset

[(962690, 335810, 5.0),
 (587766, 238889, 4.0),
 (394085, 208118, 4.0),
 (362983, 212706, 5.0),
 (220348, 171450, 5.0),
 (508949, 125282, 5.0),
 (237123, 157676, 5.0),
 (121690, 210693, 5.0),
 (154026, 65909, 4.0),
 (344087, 86593, 5.0),
 (174096, 219072, 5.0),
 (593927, 71846, 5.0),
 (383346, 243431, 5.0),
 (797706, 36359, 5.0),
 (55578, 60402, 5.0),
 (209603, 167693, 5.0),
 (424680, 286053, 5.0),
 (55380, 38261, 4.0),
 (653438, 287499, 4.0),
 (176615, 61591, 5.0),
 (41409, 68408, 5.0),
 (704950, 311778, 5.0),
 (212609, 428645, 5.0),
 (494867, 96865, 5.0),
 (461834, 463701, 5.0),
 (245408, 24476, 5.0),
 (281701, 137135, 5.0),
 (377039, 52366, 4.0),
 (465056, 158420, 4.0),
 (39301, 38673, 5.0),
 (783601, 16371, 4.0),
 (305531, 174291, 5.0),
 (926169, 181223, 5.0),
 (212609, 468451, 5.0),
 (463435, 450133, 5.0),
 (37449, 485107, 5.0),
 (424680, 88521, 5.0),
 (498271, 298097, 5.0),
 (383346, 297379, 5.0),
 (74652, 129615, 5.0),
 (798181, 105136, 5.0),
 (47892, 341569, 5.0),
 (288218, 116

In [None]:
# Create SVD model and fit on training set
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fdbc2263610>

In [None]:
# Make predictions on test set
predictions = model.test(testset)

In [None]:
# Calculate RMSE
accuracy.rmse(predictions)

RMSE: 0.7849


0.7848683468811428

##### Recommendations

In [None]:
# Recommend top 10 items for user 1
user_id = 1588
recipes_to_ignore = df[df['user_id'] == user_id]['recipe_id'].tolist()
recipes = df[~df['recipe_id'].isin(recipes_to_ignore)]['recipe_id'].unique()
predictions = [(iid, model.predict(user_id, iid).est) for iid in recipes]
top_recipes = sorted(predictions, key=lambda x: x[1], reverse=True)[:10]
print(top_recipes)


[(55309, 5), (107440, 5), (107059, 5), (295676, 4.9897461448833305), (28026, 4.98394272559119), (57591, 4.978667763447991), (53194, 4.972504815696459), (77497, 4.969154672315901), (186029, 4.966219103177965), (29084, 4.964935436567342)]


In [None]:
type(top_recipes)

list

In [None]:
df2=pd.DataFrame(top_recipes,columns =['recipe_id','avg_rating'])

In [None]:
df2

Unnamed: 0,recipe_id,avg_rating
0,55309,5.0
1,107440,5.0
2,107059,5.0
3,295676,4.989746
4,28026,4.983943
5,57591,4.978668
6,53194,4.972505
7,77497,4.969155
8,186029,4.966219
9,29084,4.964935


In [None]:
# recommendation = df2.join(df, on='recipe_id')

## SVD with Numpy

In [None]:
# Load dataset
df = pd.read_csv(Base_path+"/short-recipes-20.csv")

In [None]:
df

Unnamed: 0.1,Unnamed: 0,name,user_id,recipe_id,rating,minutes
0,21,calm your nerves tonic,65056,39959,5.0,5
1,76,homemade vegetable soup from a can,189616,87098,5.0,12
2,77,homemade vegetable soup from a can,369715,87098,4.0,12
3,170,i stole the idea from mirj sesame noodles,49304,90921,5.0,18
4,171,i stole the idea from mirj sesame noodles,82648,90921,5.0,18
...,...,...,...,...,...,...
122605,698886,zwiebeln salat swiss onion salad,169430,455209,5.0,10
122606,698890,zydeco green beans,283251,185979,0.0,20
122607,698891,zydeco salad,226863,367912,5.0,5
122608,698892,zydeco salad,621626,367912,5.0,5


In [None]:
# Calculate the mean rating for each recipe
mean_ratings = pd.DataFrame(df.groupby('name')['rating'].mean())



In [None]:
# Add a column for the number of ratings for each recipe
mean_ratings['num_ratings'] = df.groupby('name')['rating'].count()



In [None]:
mean_ratings

Unnamed: 0_level_0,rating,num_ratings
name,Unnamed: 1_level_1,Unnamed: 2_level_1
007 martini,5.0,1
1 2 3 black bean salsa dip,5.0,5
1 2 3 rice and chili burritos,4.0,1
1 dish caramel pecan coffee cake round,5.0,1
1 gram fat chocolate banana freezer pops,4.0,1
...,...,...
zwieback,5.0,1
zwiebeln salat swiss onion salad,5.0,1
zydeco green beans,0.0,1
zydeco salad,5.0,2


In [None]:
# Pivot the data to create a recipe-rating matrix
recipe_ratings = df.pivot_table(index='name', columns='user_id', values='rating').fillna(0)



In [None]:
recipe_ratings

user_id,1533,1535,1634,2310,2312,3205,3288,4291,4439,4470,...,2324285,2399085,2549237,2585084,2597942,1800054678,1802849661,2000431901,2000498330,2001102678
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 martini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1 2 3 black bean salsa dip,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1 2 3 rice and chili burritos,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
1 dish caramel pecan coffee cake round,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1 gram fat chocolate banana freezer pops,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zwieback,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zwiebeln salat swiss onion salad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zydeco green beans,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zydeco salad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Perform SVD on the normalized ratings matrix
## Normalize the ratings by subtracting the mean rating from each rating
#normalized_ratings = recipe_ratings - np.array(mean_ratings['rating'])
# U, sigma, Vt = svds(normalized_ratings, k=50)

from scipy.sparse.linalg import svds

U, sigma, Vt = svds(recipe_ratings, k=50)

sigma = np.diag(sigma)

In [None]:
sigma

array([[ 92.68204202,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,  93.11030595,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        ,  93.76309312, ...,   0.        ,
          0.        ,   0.        ],
       ...,
       [  0.        ,   0.        ,   0.        , ..., 200.21737402,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
        206.07067749,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        , 266.81389711]])

In [None]:
# Reconstruct the ratings matrix from the SVD components
predicted_ratings = np.dot(np.dot(U, sigma), Vt) + np.array(mean_ratings['rating']).reshape(-1, 1)


In [None]:
predicted_ratings

array([[4.9993916 , 4.99671492, 4.9991719 , ..., 5.00262075, 4.99895158,
        5.00182653],
       [5.00426398, 5.05444877, 5.00883362, ..., 5.01878373, 5.00153308,
        5.01071769],
       [4.00010497, 4.00143942, 4.00008833, ..., 4.00013253, 4.00068193,
        4.00021818],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [5.00021097, 4.97002797, 4.99819336, ..., 5.00242443, 5.00128009,
        4.99007471],
       [5.00018362, 4.99819334, 4.99990741, ..., 5.00130859, 5.00014713,
        4.99830278]])

In [None]:
# Convert the predicted ratings array to a DataFrame
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=recipe_ratings.index, columns=recipe_ratings.columns)


In [None]:

# Define a function to get the top N recommendations for a user
def get_top_n(user_id, n=10):
    # Get the user's ratings
    user_ratings = df[df['user_id'] == user_id][['name', 'rating']]
    # Merge with the predicted ratings
    user_ratings = user_ratings.join(predicted_ratings_df, on='name')
    # Sort by the predicted rating and select the top N
    top_n = user_ratings.sort_values(by=user_id, ascending=False).head(n)
    return top_n


In [None]:

# Test the recommendation function with a sample user
get_top_n(1533, 10)['name']

69715                                    margarita parfaits
69030     mandarin tossed chicken salad with cashew dres...
10406                                   basil garlic butter
18791                              californian apple crunch
29882                           costa rican marinated mango
29883                          costa rican stuffed tortilla
102278                     spicy banana fritters  zitumbuwa
84080                               peggy s cheese savories
58799            indecent breasts with noodles or wild rice
27166                                   cinnamon roll toast
Name: name, dtype: object