
#This Baseline Model is implemented from the paper : 
# **Factorization Meets the Neighborhood: a Multifaceted Collaborative Filtering Model** by Yehuda Koren

Sir told us to implement this paper. Based on this he gave us another paper **Scalable Collaborative Filtering with Jointly Derived Neighborhood Interpolation Weights**

# The given below math has been dervied by ourselves from understanding the paper by which we have implemented our code.

---
* **Baseline Equation**
---
\begin{equation}
b_{ui} = \mu + b_u + b_i
\end{equation}
---
---
* **Optimization Problem**
---
\begin{equation}
\min_{b_u, b_i} \sum_{(u,i) \in R_{train}} (r_{ui} - b_{ui})^2 + \lambda (||b_u||^2 + ||b_i||^2)
\end{equation}
---
---
* **Gradient Descent**
---
\begin{equation}
\frac{\partial}{\partial b_u} = -2 \sum_{i \in I_u} (r_{ui} - \mu - b_u - b_i) + 2 \lambda b_u
\end{equation}

\begin{equation}
\frac{\partial}{\partial b_i} = -2 \sum_{u \in U_i} (r_{ui} - \mu - b_u - b_i) + 2 \lambda b_i
\end{equation}

\begin{equation}
b_{u}^{(k+1)} = b_{u}^{(k)} - \gamma \cdot \frac{\partial}{\partial b_u} J(b_u^{(k)}, b_i^{(k)})
\end{equation}

\begin{equation}
b_{i}^{(k+1)} = b_{i}^{(k)} - \gamma \cdot \frac{\partial}{\partial b_i} J(b_u^{(k)}, b_i^{(k)})
\end{equation}

\begin{aligned}
b_{u}^{(k+1)} &= b_{u}^{(k)} + \gamma \cdot \left( \sum_{i \in I_u} (r_{ui} - \mu - b_u^{(k)} - b_i^{(k)}) - \lambda b_u^{(k)} \right) \\
b_{i}^{(k+1)} &= b_{i}^{(k)} + \gamma \cdot \left( \sum_{u \in U_i} (r_{ui} - \mu - b_u^{(k)} - b_i^{(k)}) - \lambda b_i^{(k)} \right)
\end{aligned}

---
* **Update Rule**
---
\begin{equation}
b_{u}^{(k+1)} = b_{u}^{(k)} + \gamma \cdot \left( e_{ui} - \lambda \cdot b_{u}^{(k)} \right)
\end{equation}


# Importing Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import os
import pickle
from IPython.display import display

# Importing Data and preprocessing

In [None]:
data = pd.read_csv('/content/drive/MyDrive/RS Data/Assignment 2/short-recipes-20.csv')


In [None]:
df = data.copy()

In [None]:
df = data.pivot_table(index='user_id',columns='recipe_id',values='rating')


## Train Test Split

In [None]:
zero_mask = df == 0
train = pd.DataFrame(columns=df.columns)
test = pd.DataFrame(columns=df.columns)
from sklearn.model_selection import train_test_split

for row_index, row_data in df.iterrows():
  train_data, test_data = train_test_split(row_data, test_size=0.2)
  train_data[zero_mask.loc[row_index]] = 0
  train = train.append(train_data)
  test = test.append(test_data)
train = train.fillna(0)


In [None]:
train = train.fillna(0)

## Initializing mu, bu and bi parameters 

In [None]:
mask = train != 0

# calculate mean of non-zero values
mu = np.mean(train[mask]).mean()

In [None]:
bu= np.random.randn(train.shape[0])
bi= np.random.randn(train.shape[1])

In [None]:
bu

array([-1.22220269, -0.19078965,  0.49580904, ..., -0.01070388,
       -1.85815435,  0.41626176])

In [None]:
bi

array([-0.80995746,  0.30386749, -0.68624227, ..., -0.52889583,
        0.49973738, -0.51527574])

# Baseline Estimate on Ratings

In [None]:
iter = 10
lam =0.1
lr=0.001

for _ in range(iter):
  print(f"iteration {_}")

  del_bu,del_bi =0,0
  delt=0
  for i in range(train.shape[0]):
    print(f'iteration {_}, user {i}')
    for j in (np.where(np.array(train.iloc[i,:]) != 0)[0]):
      b_u = bu[i]
      b_i = bi[j]
      delt = (train.iloc[i,j] - mu - b_u -b_i)
      del_bu = -delt + lam*bu
      del_bi = -delt + lam*bi
      bu[i] = bu[i]-(lr*del_bu)
      bi[j]= bi[j]-(lr*del_bi)



In [None]:
bu

array([-1.09561336, -0.0869398 ,  0.40595384, ..., -0.10250607,
       -1.36919407,  0.34473305])

In [None]:
bi

array([-0.79968182,  0.28890583, -0.66051284, ..., -0.53426238,
        0.49973738, -0.50594707])

took 1 hour 12 miniutes for 10 iterations 

# Predicting Rating Matrix with filled Values

In [None]:
BU=bu.reshape([-1,1])

In [None]:
BI = bi.reshape([-1,1])

In [None]:
R= np.dot(BU,BI.T)

In [None]:
R_ = R+mu

In [None]:
R_

array([[5.55798508, 4.36531392, 5.40550969, ..., 5.26718801, 4.13432405,
        5.23616537],
       [4.75136718, 4.65672559, 4.73926785, ..., 4.72829167, 4.63839593,
        4.72582994],
       [4.3572091 , 4.79912543, 4.41370528, ..., 4.46495714, 4.8847133 ,
        4.47645185],
       ...,
       [4.76381524, 4.6522284 , 4.74954957, ..., 4.73660814, 4.63061689,
        4.73370564],
       [5.7767626 , 4.28627486, 5.58621327, ..., 5.41335189, 3.99760555,
        5.37458272],
       [4.40616625, 4.78143839, 4.4541424 , ..., 4.4976651 , 4.85411899,
        4.50742633]])

In [None]:
np.save("bu.npy",BU)
np.save("bi.npy",BI)

In [None]:
mu

4.6818430002382145

In [None]:
train[mask]

recipe_id,52,92,93,181,205,207,240,245,293,346,...,532487,532736,532740,533125,533130,533250,535230,536044,536060,536729
1533,,,,,,,,,,,...,,,,,,,,,,
1535,,,,,,,,,,,...,,,,,,,,,,
1634,,,,,,,,,,,...,,,,,,,,,,
2310,,,,,,,,,,,...,,,,,,,,,,
2312,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1800054678,,,,,,,,,,,...,,,,,,,,,,
1802849661,,,,,,,,,,,...,,,,,,,,,,
2000431901,,,,,,,,,,,...,,,,,,,,,,
2000498330,,,,,,,,,,,...,,,,,,,,,,


In [None]:
diff = train[mask] - R_

# square the differences
squared_diff = np.square(diff)

# take mean of the squared differences
mean_squared_diff = np.mean(squared_diff)
mean_squared_diff = (np.nansum(mean_squared_diff)/np.count_nonzero(~np.isnan(mean_squared_diff)))
# take square root of the mean
rmse = np.sqrt(mean_squared_diff)


In [None]:
rmse

0.7199813462498669

## Creating Recipe Name Dictionary

In [None]:
ds =pd.read_csv("/content/drive/MyDrive/RS Data/Assignment 2/RAW_recipes.csv")
ds.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [None]:
recipe_dict = {recipe_id: ds.loc[ds['id'] == recipe_id, 'name'].iloc[0] for recipe_id in train.keys()}

In [None]:
recipe_dict

In [None]:
with open('recipe_names.pkl', 'wb') as fp:
    pickle.dump(recipe_dict, fp)
    print('dictionary saved successfully to file')

dictionary saved successfully to file


# Run From Here to directly use the Recommendation System

## Loading Parameters

In [None]:
mu= 4.6818430002382145
rating = pd.read_csv("/content/drive/MyDrive/RS Data/Assignment 2/Baseline/Train_data.csv") # Empty Rating Matrix
bi =np.load("/content/drive/MyDrive/RS Data/Assignment 2/Baseline/bi.npy")
bu =np.load("/content/drive/MyDrive/RS Data/Assignment 2/Baseline/bi.npy")


In [None]:
with open('/content/drive/MyDrive/RS Data/Assignment 2/Baseline/recipe_names.pkl', 'rb') as fp:
    names = pickle.load(fp)
    print('Recipe_Name dictionary imported successfully')
    

Recipe_Name dictionary imported successfully


# Recommendation System

In [None]:
class Recommendation_system():
  def __init__(self,train,mu,bi,bu,names):
    self.train = train
    self.mu = mu
    self.bi = bi
    self.bu = bu
    self.R_ = self.mu + np.dot(self.bu,self.bi.T)
    self.dct = {user_id: index for index, user_id in enumerate(self.train.index)}
    self.rec_name = names
  
  def BaseLine(self):

    user_id = int(input("Enter Your User Id:  "))
    os.system('cls')
    print("\n\n")
    
    print(f"Welcome User {user_id}")
    print("\n\n")
    
    uid = self.dct[user_id]
    unrated_items = np.where(self.train.iloc[uid, :] == 0)[0]

    # Sort the predicted ratings for unrated items in descending order
    sorted_ratings = np.argsort(self.R_[uid, unrated_items])[::-1]

    # Recommend the top N items to the user
    N = 5
    recommended_items = unrated_items[sorted_ratings][:N]
    print(f" We have these recommendations for you today: \n ")
    new_dict = {}
    for idx in recommended_items:
      key = list(self.rec_name.keys())[idx]
      value = self.rec_name[key]
      new_dict[key] = value

    output_df = pd.DataFrame.from_dict(new_dict, orient='index', columns=['Recpie_Name'])
    output_df = output_df.rename_axis('Recipe_ID')
    display(output_df)
    return recommended_items

  def get_recommendations(self):
    os.system('cls')
    print("You are using Baseline Estimated Recommendation System")
    self.BaseLine()

In [None]:
RS=Recommendation_system(train,mu,BI,BU,names)
RS.get_recommendations()

You are using Baseline Estimated Recommendation System
Enter Your User Id:  1533



Welcome User 1533



 We have this recommendations for you today: 
 


Unnamed: 0_level_0,Recpie_Name
Recipe_ID,Unnamed: 1_level_1
381956,orange chicken sauce
128158,garlic spaghetti with spinach
484348,mexican style beer marinade
478873,cajun rubbed tilapia
243269,twirly veggie pinwheels
