In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:

from keras.layers import Input, Embedding, Dot, Reshape, Dense
from keras.models import Model

In [None]:
from keras.models import load_model

In [None]:
from keras.layers import Add, Activation, Lambda

In [None]:
from tensorflow.keras.optimizers import Adam


In [None]:
merged_review_restaurants_user=pd.read_pickle('/content/drive/MyDrive/Restaurant_recommender_system/merged_user_review_restaurant')

In [None]:
merged_review_restaurants_user.columns

Index(['Unnamed: 0', 'user_id', 'user_name', 'user_review_count',
       'yelping_since', 'useful_x', 'funny_x', 'cool_x', 'friends', 'fans',
       'average_stars', 'compliment_hot', 'compliment_more',
       'compliment_profile', 'compliment_cute', 'compliment_list',
       'compliment_note', 'compliment_plain', 'compliment_cool',
       'compliment_funny', 'compliment_writer', 'compliment_photos',
       'review_id', 'business_id', 'user_stars', 'useful_y', 'funny_y',
       'cool_y', 'text', 'date', 'name_y', 'city', 'state', 'latitude',
       'longitude', 'stars', 'business_review_count', 'is_open', 'attributes',
       'categories', 'hours'],
      dtype='object')

*Dropped unnamed column because its not meaningful*

In [None]:
merged_review_restaurants_user.drop(['Unnamed: 0'],axis=1,inplace=True)

In [None]:
merged_review_restaurants_user.shape

(4724464, 40)

In [None]:
merged_review_restaurants_user_neural=merged_review_restaurants_user.copy()

*Using neural network based collaborative filtering technique*

*The number of categories is large so going with label encoding technique: https://www.analyticsvidhya.com/blog/2020/03/one-hot-encoding-vs-label-encoding-using-scikit-learn/*

In [None]:
encoder=LabelEncoder()

In [None]:
merged_review_restaurants_user_neural['user_label']=encoder.fit_transform(merged_review_restaurants_user_neural['user_id'].values)

In [None]:
merged_review_restaurants_user_neural['business_label']=encoder.fit_transform(merged_review_restaurants_user_neural['business_id'].values)

In [None]:
unique_users=merged_review_restaurants_user_neural['user_label'].nunique()

In [None]:
unique_restaurants=merged_review_restaurants_user_neural['business_label'].nunique()

In [None]:
unique_restaurants

52268

In [None]:
X = merged_review_restaurants_user_neural[['user_label', 'business_label']].values
y = merged_review_restaurants_user_neural['user_stars'].values

X_train_neural, X_test_neural, y_train_neural, y_test_neural = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:

n_factors = 50


# Creating separate arrays for user and restaurants for keras
X_train_array = [X_train_neural[:, 0], X_train_neural[:, 1]]
X_test_array = [X_test_neural[:, 0], X_test_neural[:, 1]]

In [None]:
def user_restaurant_embedding_model(n_factors):
  user_input=Input(shape=(1,))
  restaurants_input=Input(shape=(1,))

  user_embedding=Embedding(input_dim=unique_users,output_dim=n_factors)(user_input)

  restaurant_embedding=Embedding(input_dim=unique_restaurants,output_dim=n_factors)(restaurants_input)


  dot_product=Dot(normalize=True,axes=2)([user_embedding,restaurant_embedding])

  merged = Reshape(target_shape = [1])(dot_product)

  merged=Activation('sigmoid')(merged)

  ratings_calculated=Lambda(lambda x: x*4+1)(merged)

  model = Model(inputs=[user_input, restaurants_input], outputs=ratings_calculated)

  opt = Adam(lr=0.001)
  model.compile(loss='mean_squared_error', optimizer=opt)  
    
  return model


In [None]:
model=user_restaurant_embedding_model(n_factors)

  super(Adam, self).__init__(name, **kwargs)


In [None]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_5 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding_2 (Embedding)        (None, 1, 50)        72299200    ['input_4[0][0]']                
                                                                                                  
 embedding_3 (Embedding)        (None, 1, 50)        2613400     ['input_5[0][0]']                
                                                                                            

In [None]:
model.fit(x=X_train_array, y=y_train_neural, batch_size=64,\
                          epochs=5, verbose=1, validation_data=(X_test_array, y_test_neural))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fc76a6d36d0>

In [None]:
model.save('/content/drive/MyDrive/Restaurant_recommender_system/my_nn_model.h5')

In [None]:
nn_model = load_model('/content/drive/MyDrive/Restaurant_recommender_system/my_nn_model.h5')

In [None]:
predict_neural_network=nn_model.predict(X_test_array)

In [None]:
comparison=pd.DataFrame(y_test_neural,columns=['actual stars'])

In [None]:
comparison['predicted_stars']=predict_neural_network

In [None]:
comparison.head(10)

Unnamed: 0,actual stars,predicted_stars
0,2.0,3.528209
1,5.0,3.624906
2,4.0,3.469417
3,3.0,2.982687
4,5.0,3.069288
5,5.0,3.758043
6,5.0,3.903355
7,5.0,3.161418
8,5.0,3.117983
9,1.0,2.903583


In [None]:
restaurant_embeddings=nn_model.get_layer('embedding_3')

In [None]:
weights=restaurant_embeddings.get_weights()[0]

*We have 52268 restaurants and each is represented by a 50 dimensional vector*

In [None]:
weights.shape

(52268, 50)

In [None]:
weights_normalised = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))

In [None]:
unique_business_id=merged_review_restaurants_user_neural['business_id'].unique()

In [None]:
recommendation_dataframe=pd.DataFrame(weights_normalised)

In [None]:
recommendation_dataframe['business_id']=unique_business_id

In [None]:
recommendation_dataframe=recommendation_dataframe.set_index('business_id')

In [None]:
recommendation_dataframe.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1FURjeGJi_LBXcJQg8eskw,-0.038985,0.152899,0.073934,0.046184,-0.011672,0.005899,-0.101574,0.117205,0.036775,0.212906,...,0.061225,0.238206,0.203687,-0.127813,-0.068369,0.260656,0.000262,-0.213031,0.257549,-0.091089
mAqgsZBTN-wsShMpkz2o9g,-0.025398,0.213811,0.079798,0.04446,-0.102752,-0.093317,-0.093935,0.162764,0.010133,-0.092914,...,0.145437,0.24034,-0.102908,0.095997,-0.198395,-0.078582,0.227457,-0.106085,0.237445,0.143756
GnKtYBofkhA3ZmUBJPS-7w,0.135286,0.122788,-0.07684,-0.036663,0.148562,-0.23568,-0.002411,-0.08069,-0.032379,-0.143854,...,-0.128163,-0.076309,-0.102794,-0.234711,-0.106439,0.003814,0.062588,0.146957,0.103223,-0.126573
Qw7tz-UkPrpXaVidWuab4Q,0.206953,-0.192971,0.232455,-0.120145,0.061338,0.194372,0.129191,0.10229,-0.15221,0.114257,...,-0.142484,0.000156,-0.054862,-0.004516,-0.224264,-0.099131,0.10386,-0.044503,-0.098962,0.176562
QjQ-gtUnkne7jch0OLsPIg,0.177201,0.150076,0.043408,0.058002,0.000551,-0.088512,0.169321,-0.004292,-0.105609,0.046586,...,-0.169977,-0.009093,-0.08294,0.172254,-0.095489,0.151898,0.156279,-0.21184,0.053217,-0.046154


In [None]:
business_names=merged_review_restaurants_user_neural[['business_id','name_y']].drop_duplicates()

In [None]:
business_names.shape

(52268, 2)

In [None]:
restaurant_recommendation_dataframe=pd.merge(business_names,recommendation_dataframe,on='business_id')

In [None]:
restaurant_recommendation_dataframe.head()

Unnamed: 0,business_id,name_y,0,1,2,3,4,5,6,7,...,40,41,42,43,44,45,46,47,48,49
0,1FURjeGJi_LBXcJQg8eskw,Padaro Beach Grill,-0.038985,0.152899,0.073934,0.046184,-0.011672,0.005899,-0.101574,0.117205,...,0.061225,0.238206,0.203687,-0.127813,-0.068369,0.260656,0.000262,-0.213031,0.257549,-0.091089
1,mAqgsZBTN-wsShMpkz2o9g,Cantwell's Market & Deli,-0.025398,0.213811,0.079798,0.04446,-0.102752,-0.093317,-0.093935,0.162764,...,0.145437,0.24034,-0.102908,0.095997,-0.198395,-0.078582,0.227457,-0.106085,0.237445,0.143756
2,GnKtYBofkhA3ZmUBJPS-7w,Joes Diner,0.135286,0.122788,-0.07684,-0.036663,0.148562,-0.23568,-0.002411,-0.08069,...,-0.128163,-0.076309,-0.102794,-0.234711,-0.106439,0.003814,0.062588,0.146957,0.103223,-0.126573
3,Qw7tz-UkPrpXaVidWuab4Q,Philadelphia Museum of Art,0.206953,-0.192971,0.232455,-0.120145,0.061338,0.194372,0.129191,0.10229,...,-0.142484,0.000156,-0.054862,-0.004516,-0.224264,-0.099131,0.10386,-0.044503,-0.098962,0.176562
4,QjQ-gtUnkne7jch0OLsPIg,Rosie's Café,0.177201,0.150076,0.043408,0.058002,0.000551,-0.088512,0.169321,-0.004292,...,-0.169977,-0.009093,-0.08294,0.172254,-0.095489,0.151898,0.156279,-0.21184,0.053217,-0.046154


In [None]:
def similar_restaurants(name):
  restaurant_names=[]
  cosine_similarity_restaurants=[]

  for i in range(len(restaurant_recommendation_dataframe)):
    
    row_restaurant=restaurant_recommendation_dataframe[restaurant_recommendation_dataframe['name_y']==name].iloc[:,2:51].values.ravel()

    other_row_values=restaurant_recommendation_dataframe.iloc[i,2:51].values.ravel()
    

    cosine_per_row=np.dot(row_restaurant,other_row_values)

    cosine_similarity_restaurants.append(cosine_per_row)

    restaurant_names.append(restaurant_recommendation_dataframe.iloc[i,1])

  restaurant_similarity = pd.DataFrame({"similar_rest" : restaurant_names, "cosine" : cosine_similarity_restaurants})

  return restaurant_similarity

In [None]:
result = similar_restaurants('Joes Diner')

In [None]:
result.head()

Unnamed: 0,similar_rest,cosine
0,Padaro Beach Grill,-0.071286
1,Cantwell's Market & Deli,-0.013016
2,Joes Diner,0.983979
3,Philadelphia Museum of Art,0.209489
4,Rosie's Café,0.04656


In [None]:
result.sort_values('cosine', ascending=False).head(10)

Unnamed: 0,similar_rest,cosine
2,Joes Diner,0.983979
39600,Chinatown,0.602922
25329,Nudy's Cafe - Eagleview Blvd,0.564909
42997,Denny's,0.539956
43202,Pookie's Restaurant,0.493337
31297,Hugo's Deli,0.48514
18304,No 1 Chinese Restaurant,0.479488
13958,Trooper Diner,0.469073
38853,Better Days BBQ,0.462743
48513,Dunkin',0.456616
