### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import sklearn.metrics.pairwise as pw
from sklearn.metrics.pairwise import pairwise_distances

### Making Data Ready for Modelling 

In [62]:
df_merged['restaurantId'] = df_merged.groupby(['business_id']).ngroup()
df_merged['userId'] = df_merged.groupby(['user_id']).ngroup()
df_rating_restaurant=df_merged[['userId','restaurantId','name','stars']]

In [77]:
df_rating_restaurant.head()

Unnamed: 0,userId,restaurantId,name,stars
0,7388,234,Pine Cone Restaurant,4.0
1,6147,234,Pine Cone Restaurant,4.0
2,4292,234,Pine Cone Restaurant,4.0
3,3525,234,Pine Cone Restaurant,4.0
4,6289,234,Pine Cone Restaurant,4.0


### Removing Duplicates

In [81]:
df_rating_restaurant = df_rating_restaurant.drop_duplicates(['userId', 'name','restaurantId'])
restaurant_features = df_rating_restaurant.pivot_table(index = 'name', columns = 'userId', values = 'stars').fillna(0)

In [84]:
restaurant_features.head()

userId,0,1,2,3,4,5,6,7,8,9,...,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1855 Saloon and Grill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4&20 Bakery & Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
43 North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608 Restaurant and Bar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8 Seasons Grille,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Transform the values(restaurant_rating) of the matrix dataframe into a scipy sparse matrix for more efficient calculations.

In [85]:
restaurant_features_matrix = csr_matrix(restaurant_features.values)

### Item-based Collaborative filtering approach using K-NN

In [88]:
knn_recomm = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
knn_recomm.fit(restaurant_features_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

### Testing the Model

In [89]:
randomChoice = np.random.choice(restaurant_features.shape[0])
distances, indices = knn_recomm.kneighbors(restaurant_features.iloc[randomChoice].values.reshape(1, -1), n_neighbors = 11)
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for Restaurant {0} on priority basis:\n'.format(restaurant_features.index[randomChoice]))
    else:
        print('{0}: {1}'.format(i, restaurant_features.index[indices.flatten()[i]]))

Recommendations for Restaurant Salad Creations on priority basis:

1: Black Bear Inn
2: Ruby Oriental Restaurant
3: Falbo Bros Pizza
4: Stalzy's Deli
5: Pedro's Mexican Restaurante
6: Burrito Drive
7: People's Bakery
8: Paul's Pel'meni
9: Paul's Club
10: Paul's Neighborhood Bar


### Item Rating Based Cosine Similarity

In [91]:
recommender = pw.cosine_similarity(restaurant_features_matrix)
recommender_df = pd.DataFrame(recommender, 
                                  columns=restaurant_features.index,
                                  index=restaurant_features.index)

In [92]:
recommender_df.head()

name,1855 Saloon and Grill,4&20 Bakery & Cafe,43 North,608 Restaurant and Bar,8 Seasons Grille,A La Brasa,A Pig In a Fur Coat,A Sakura,A&W Restaurant,A8 China,...,Wildcat Lanes,Willalby's Cafe,Wingstop,World Buffet,Wurst German Bar,Yes Buffet,Yola's Cafe,Zen Sushi,Zoe's Pizzeria,ZuZu Cafe & Market
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1855 Saloon and Grill,1.0,0.029788,0.027618,0.0,0.0,0.0,0.0,0.062869,0.0,0.055728,...,0.0,0.035245,0.0,0.0,0.0,0.0,0.032174,0.0,0.0,0.0
4&20 Bakery & Cafe,0.029788,1.0,0.075688,0.0,0.0,0.073771,0.039792,0.043073,0.0,0.0,...,0.0,0.144884,0.0,0.062348,0.0,0.0,0.06613,0.0,0.071429,0.057143
43 North,0.027618,0.075688,1.0,0.076472,0.024183,0.0,0.086086,0.039936,0.0,0.0,...,0.0,0.067166,0.034199,0.0,0.0,0.0,0.061314,0.0,0.0,0.026491
608 Restaurant and Bar,0.0,0.0,0.076472,1.0,0.0,0.0,0.026803,0.0,0.0,0.0,...,0.0,0.0,0.074536,0.0,0.0,0.0,0.044544,0.0,0.0,0.0
8 Seasons Grille,0.0,0.0,0.024183,0.0,1.0,0.04714,0.050855,0.0,0.0,0.0,...,0.0,0.030861,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [97]:
## Item Rating Based Cosine Similarity
restaurant_name ='1855 Saloon and Grill'
cosine_df = pd.DataFrame(recommender_df[restaurant_name].sort_values(ascending=False))
cosine_df.reset_index(level=0, inplace=True)
cosine_df.columns = ['title','cosine_sim']

In [98]:
cosine_df.head()

Unnamed: 0,title,cosine_sim
0,1855 Saloon and Grill,1.0
1,The Admiralty,0.147442
2,Bourbon Street Grille,0.136505
3,Subway,0.124832
4,Springers,0.120386
