In [1]:
# import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# load dataset
df = pd.read_csv('data/zomato.csv')
df.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [3]:
# summary of the data
df.describe()

Unnamed: 0,votes
count,51717.0
mean,283.697527
std,803.838853
min,0.0
25%,7.0
50%,41.0
75%,198.0
max,16832.0


Extract features for recommendation

In [4]:
df = df[['name','location','dish_liked','cuisines','reviews_list','rate']]

Data cleaning

In [5]:
# checking for missing values
df.isnull().sum()

name                0
location           21
dish_liked      28078
cuisines           45
reviews_list        0
rate             7775
dtype: int64

In [6]:
# fill missing values with empty string
df['dish_liked'].fillna('',inplace=True)

In [7]:
# remove missing values
df.dropna(inplace=True)

###### Extrcat reviews 

In [8]:
# convert reviews_list column in appropriate format
import ast
df['reviews_list']=df['reviews_list'].apply(ast.literal_eval)

# extract reviews from review_list column
def extract_reviews(reviews):
    if(len(reviews)>0):
        f=0
        for review in reviews:
            if(len(review)>1):
                f=1
                return review[1]
            if(f==1):
                break
    else:
        return "unknown"        
                    
df['reviews'] = df['reviews_list'].apply(lambda x: extract_reviews(x))

In [9]:
# remove reviews_list column
df.drop(['reviews_list'],axis=1,inplace=True)

In [10]:
# clean reviews column
import re
def reviews_clean(data):
    cleaned_review = re.sub(r'^RATED\s+ ','',data)
    return cleaned_review.strip(' ')
df['reviews'] = df['reviews'].apply(lambda x: reviews_clean(x))

In [11]:
# clean rate column
df['rate'] = df['rate'].str.replace('NEW','0')
df['rate'] = df['rate'].str.replace('-','0')
df['rating'] = df['rate'].apply(lambda x : x.split('/')[0].strip(' ')).astype('float')
df.drop(['rate'],axis=1,inplace=True)

In [12]:
# clean cuisines column
def clean_cuisine(entry):
    parts = entry.split(', ') 
    cleaned_parts = [part.strip() for part in parts]
    cuisine = ' '.join(cleaned_parts)
    return cuisine
df['cuisines'] = df['cuisines'].apply(lambda x : clean_cuisine(x))    

In [13]:
df.drop_duplicates(inplace=True)

In [14]:
df.shape

(15620, 6)

In [15]:
df.to_csv('data/recommend_data.csv',index=True)

Model Building

In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [18]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['cuisines'])
print(tfidf_matrix)

  (0, 28)	0.39898132934679953
  (0, 77)	0.7931491296704164
  (0, 50)	0.31043456098364924
  (0, 80)	0.33964502098898885
  (1, 106)	0.8170691483797171
  (1, 28)	0.37769818449853915
  (1, 50)	0.29387482938383236
  (1, 80)	0.32152709504359755
  (2, 53)	0.4996094832907776
  (2, 71)	0.7287151365153985
  (2, 23)	0.46836376249584727
  (3, 97)	0.5822889891451192
  (3, 50)	0.7132349783094122
  (3, 80)	0.3901735496369887
  (4, 87)	0.9352117277082559
  (4, 50)	0.2388872632407853
  (4, 80)	0.2613654526104526
  (5, 50)	0.6746531493913729
  (5, 80)	0.7381348982511949
  (6, 4)	0.6241265890019283
  (6, 97)	0.413583808985512
  (6, 28)	0.3255443002793827
  (6, 50)	0.5065911334919466
  (6, 80)	0.2771295110030781
  (7, 85)	0.6242563409135657
  :	:
  (15613, 75)	0.9022796488217206
  (15613, 28)	0.43115129052590395
  (15614, 5)	0.6452429508564611
  (15614, 75)	0.6172274652107408
  (15614, 28)	0.2949400648913728
  (15614, 50)	0.22948339389951153
  (15614, 80)	0.25107672254871544
  (15615, 75)	0.65982354992081

In [19]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [20]:
def get_recommendations(restaurant_name):
    idx = df[df['name'] == restaurant_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Exclude the current restaurant
    restaurant_indices = [i[0] for i in sim_scores]
    return list(df['name'].iloc[restaurant_indices])
    

In [21]:
restaurant_name = "Grand Village"
recommendations = get_recommendations(restaurant_name)
print(recommendations)

['Sri Rajasthani Foods', 'Sri Rajasthani Foods', 'Kesar Rajasthani Parotha Point', 'RRoyal Rajasthan', 'Marwari Basa', 'Veg Rasoi', 'Rasovara', 'Rasovara', 'The Daily', 'Veg Rasoi']


Unnamed: 0,name,location,dish_liked,cuisines,reviews,rating
0,Jalsa,Banashankari,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...",North Indian Mughlai Chinese,A beautiful place to dine in.The interiors tak...,4.1
1,Spice Elephant,Banashankari,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...",Chinese North Indian Thai,Had been here for dinner with family. Turned o...,4.1
2,San Churro Cafe,Banashankari,"Churros, Cannelloni, Minestrone Soup, Hot Choc...",Cafe Mexican Italian,Ambience is not that good enough and it's not ...,3.8
3,Addhuri Udupi Bhojana,Banashankari,Masala Dosa,South Indian North Indian,Great food and proper Karnataka style full mea...,3.7
4,Grand Village,Basavanagudi,"Panipuri, Gol Gappe",North Indian Rajasthani,Very good restaurant in neighbourhood. Buffet ...,3.8
...,...,...,...,...,...,...
51620,Fujian Express,"ITPL Main Road, Whitefield","Noodles, Chicken Noodle, Momos, American Chops...",Thai Chinese Momos,This restaurant specially provide Chinese food...,3.8
51628,Nook - Aloft Bengaluru Cessna Business Park,Bellandur,"Chicken Quesadilla, Naan, Breakfast Buffet, Ch...",North Indian Continental Asian,Aloft has been designed uniquely with vibrant ...,4.2
51632,SeeYa Restaurant,KR Puram,,North Indian Kerala Chinese,"Good food, take bit time to get the food. Coz ...",3.3
51655,Amoeba Sports Bar,Whitefield,"Pasta, Fish, Beer, Chicken Wings, Mocktails, G...",Continental Chinese,Amoeba is affordable place for bowling enthusi...,3.9
