### Zomato Spatial Data Analysis

In [1]:
# Import the required libraries
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
# store the data from csv file in a dataframe
df = pd.read_csv('zomato.csv')

# Print the first 5 rows using head()
df.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [3]:
# First step is to find out if there are any missing values in the dataset
df.isna().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [4]:
# Drop any missing values from the data
# Here, we wish to drop any missing values of the 'location' column as we want to perform spatial data analysis 
# for the current dataset 

df.dropna(axis='index', subset=['location'],inplace=True)

In [5]:
df.isna().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7754
votes                              0
phone                           1187
location                           0
rest_type                        206
dish_liked                     28057
cuisines                          24
approx_cost(for two people)      325
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [6]:
# Now we dont have any missing values for the location column 
# Let's find out the unique locations

df['location'].unique()

array(['Banashankari', 'Basavanagudi', 'Mysore Road', 'Jayanagar',
       'Kumaraswamy Layout', 'Rajarajeshwari Nagar', 'Vijay Nagar',
       'Uttarahalli', 'JP Nagar', 'South Bangalore', 'City Market',
       'Nagarbhavi', 'Bannerghatta Road', 'BTM', 'Kanakapura Road',
       'Bommanahalli', 'CV Raman Nagar', 'Electronic City', 'HSR',
       'Marathahalli', 'Sarjapur Road', 'Wilson Garden', 'Shanti Nagar',
       'Koramangala 5th Block', 'Koramangala 8th Block', 'Richmond Road',
       'Koramangala 7th Block', 'Jalahalli', 'Koramangala 4th Block',
       'Bellandur', 'Whitefield', 'East Bangalore', 'Old Airport Road',
       'Indiranagar', 'Koramangala 1st Block', 'Frazer Town', 'RT Nagar',
       'MG Road', 'Brigade Road', 'Lavelle Road', 'Church Street',
       'Ulsoor', 'Residency Road', 'Shivajinagar', 'Infantry Road',
       'St. Marks Road', 'Cunningham Road', 'Race Course Road',
       'Commercial Street', 'Vasanth Nagar', 'HBR Layout', 'Domlur',
       'Ejipura', 'Jeevan Bhima

In [7]:
# How many unique locations are there??

len(df['location'].unique())

93

In [8]:
# There are 93 unique locations. 
# That means, I have to fetch the geographical coordinates for all these 93 locations

# Lets create a new dataframe 'locations' which contains a list of all these new locations

locations = pd.DataFrame()
locations['Name'] = df['location'].unique()

In [9]:
# Check the first few rows of the 'locations' dataframe

locations.head()

Unnamed: 0,Name
0,Banashankari
1,Basavanagudi
2,Mysore Road
3,Jayanagar
4,Kumaraswamy Layout


In [10]:
# To extract latitudes and longitudes, we need to install a Python library 'Geopy' 
# I have already installed it, so lets check the version of geopy
# So, I will be getting a message mentioning that the requirement is already satisfied
!pip3 install geopy



In [11]:
import geopy as gp
gp.__version__

'2.2.0'

In [12]:
from geopy.geocoders import Nominatim

In [13]:
# Nominatim is a tool to evaluate/figure out latitude and longitude for any location
# From the parameters of nominatim, user_agent is important parameter

# user_agent is an http request header that is sent with each request
# user_agent is defined to 'app' to establish connection with http

geolocator = Nominatim(user_agent='app')

In [14]:
# Define new lists for latitude and longitude
lat = []
lon = []

# Write a simple loop to iterate over each location and record the latitude and longitude of the locations
for location in locations['Name']:
    # currently the location is in a string format, so to convert the string format to 
    # geographical latitude and longitude, we use geocode function on geolocator method
    location = geolocator.geocode(location)
    
    # if location is None, then we append 'NaN' to the list
    if location is None:
        lat.append(np.nan)
        lon.append(np.nan)
    else:
        lat.append(location.latitude)
        lon.append(location.longitude)

In [15]:
# Just check the latitudes and longitudes for first 5 locations
lat[:5]

[15.8876779, 12.9417261, 12.3872141, 27.64392675, 12.9081487]

In [16]:
lon[:5]

[75.7046777, 77.5755021, 76.6669626, 83.05280519687284, 77.5553179]

In [17]:
# Now lets add the latitude and longitude to the 'locations' dataframe
locations['Latitude'] = lat
locations['Longitude'] = lon

In [18]:
# Check the first few rows of new dataframe
locations.head()

Unnamed: 0,Name,Latitude,Longitude
0,Banashankari,15.887678,75.704678
1,Basavanagudi,12.941726,77.575502
2,Mysore Road,12.387214,76.666963
3,Jayanagar,27.643927,83.052805
4,Kumaraswamy Layout,12.908149,77.555318


In [19]:
# If we have to convert the above dataframe to a csv file 
locations.to_csv('zomato_locations.csv', index=False)

In [20]:
# If we have to plot heatmaps for different locations, 
# the count of restaurants at a particular location is helpful

df['location'].value_counts()

BTM                      5124
HSR                      2523
Koramangala 5th Block    2504
JP Nagar                 2235
Whitefield               2144
                         ... 
West Bangalore              6
Yelahanka                   6
Jakkur                      3
Rajarajeshwari Nagar        2
Peenya                      1
Name: location, Length: 93, dtype: int64

In [21]:
# In case if we want to convert the above info into a dataframe

df['location'].value_counts().reset_index()

Unnamed: 0,index,location
0,BTM,5124
1,HSR,2523
2,Koramangala 5th Block,2504
3,JP Nagar,2235
4,Whitefield,2144
...,...,...
88,West Bangalore,6
89,Yelahanka,6
90,Jakkur,3
91,Rajarajeshwari Nagar,2


In [22]:
# Lets make some changes in the above dataframe 
# Save it first under a new variable, change the header/column names
rest_locations = df['location'].value_counts().reset_index()
rest_locations

Unnamed: 0,index,location
0,BTM,5124
1,HSR,2523
2,Koramangala 5th Block,2504
3,JP Nagar,2235
4,Whitefield,2144
...,...,...
88,West Bangalore,6
89,Yelahanka,6
90,Jakkur,3
91,Rajarajeshwari Nagar,2


In [23]:
# modify the column names manually
rest_locations.columns = ['Name','Count']
rest_locations

Unnamed: 0,Name,Count
0,BTM,5124
1,HSR,2523
2,Koramangala 5th Block,2504
3,JP Nagar,2235
4,Whitefield,2144
...,...,...
88,West Bangalore,6
89,Yelahanka,6
90,Jakkur,3
91,Rajarajeshwari Nagar,2


In [24]:
# Since we already have the latitude and longitude dataframe as well 
# for each location, we can merge the above dataframe and latitude 
# longitude dataframe based on the Name of the location

# Here, we merge the latitude longitude dataframe TO the restaurant_locations dataframe
all_restaurants_location = rest_locations.merge(locations, on='Name', how='left').dropna()

In [25]:
all_restaurants_location

Unnamed: 0,Name,Count,Latitude,Longitude
0,BTM,5124,45.954851,-112.496595
1,HSR,2523,18.147500,41.538889
2,Koramangala 5th Block,2504,12.934377,77.628415
3,JP Nagar,2235,12.265594,76.646540
4,Whitefield,2144,44.373058,-71.611858
...,...,...,...,...
88,West Bangalore,6,12.984852,77.540063
89,Yelahanka,6,13.100698,77.596345
90,Jakkur,3,13.078474,77.606894
91,Rajarajeshwari Nagar,2,12.927441,77.515522


In [26]:
!pip3 install folium



In [27]:
import folium

In [28]:
# define a base function whenever i need a base map
def generatebasemap(default_location=[12.97,77.59], default_zoom_start=12):
    ''' Returns a base map for the final restaurant locations '''
    # basemap = folium.Map(location=default_location, zoom_start=default_zoom_start)
    basemap = folium.Map(location=default_location, zoom_start=default_zoom_start)
    return basemap

In [29]:
basemap = generatebasemap()
basemap

In [30]:
# First requirement: Generate a heatmap of the restaurants in Bangalore
from folium.plugins import HeatMap

In [31]:
# Extract the columns from the final dataframe (all_reall_restaurants_location)
# for plotting on heatmap 

df_for_heatmap = all_restaurants_location[['Latitude','Longitude','Count']]
df_for_heatmap

Unnamed: 0,Latitude,Longitude,Count
0,45.954851,-112.496595,5124
1,18.147500,41.538889,2523
2,12.934377,77.628415,2504
3,12.265594,76.646540,2235
4,44.373058,-71.611858,2144
...,...,...,...
88,12.984852,77.540063,6
89,13.100698,77.596345,6
90,13.078474,77.606894,3
91,12.927441,77.515522,2


In [32]:
# Pass the dataframe to the heatmap function and add it to the basemap
HeatMap(df_for_heatmap,zoom=20,blur=5).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x122e01b10>

In [33]:
# The heatmap object is created. Now we have to call the basemap
basemap

In [34]:
from folium.plugins import FastMarkerCluster

In [35]:
FastMarkerCluster(df_for_heatmap, zoom=20).add_to(basemap)

<folium.plugins.fast_marker_cluster.FastMarkerCluster at 0x122df78d0>

In [36]:
basemap

In [37]:
# Where are the restaurants which higher average rating?
df.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [38]:
df['rate'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', nan, '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [39]:
# We also see some NaN values in the rating, so we are gonna drop it
df.dropna(axis=0, subset=['rate'],inplace=True)

In [40]:
df['rate'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [41]:
df_new = df[df['rate']=='NEW']
df_new

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
72,https://www.zomato.com/bangalore/spicy-tandoor...,"Opposite ICICi Bank, Hanuman Nagar, Banashanka...",Spicy Tandoor,No,No,NEW,0,+91 8050884222,Banashankari,Quick Bites,,North Indian,150,"[('Rated 4.0', 'RATED\n cost for chicken roll...",[],Delivery,Banashankari
75,https://www.zomato.com/bangalore/om-sri-vinaya...,"39, S B M Colony, Near-Sita Circle, 1st Stage,...",Om Sri Vinayaka Chats,No,No,NEW,0,+91 8553206035,Banashankari,"Takeaway, Delivery",,"Street Food, Fast Food",500,[],[],Delivery,Banashankari
110,https://www.zomato.com/bangalore/hari-super-sa...,"594, 24 The Cross BSK, Opposte Oriental Bank, ...",Hari Super Sandwich,No,No,NEW,0,+91 9886722163,Banashankari,"Takeaway, Delivery",,"Sandwich, Pizza, Beverages",200,"[('Rated 1.0', 'RATED\n I have been here many...",[],Delivery,Banashankari
130,https://www.zomato.com/bangalore/roll-magic-fa...,"28, 4th Cross Road, Dwaraka Nagar, Banashankar...",Roll Magic Fast Food,No,No,NEW,0,+91 9731409204,Banashankari,"Takeaway, Delivery",,"Biryani, Rolls, Chinese",200,[],[],Delivery,Banashankari
131,https://www.zomato.com/bangalore/foodlieious-m...,"3/16, 22 Main Road, Magenta Block, PES College...",Foodlieious Multi Cuisine,No,No,NEW,0,+91 9168753522\r\n+91 8217877100,Banashankari,Quick Bites,,North Indian,100,[],[],Delivery,Banashankari
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51585,https://www.zomato.com/bangalore/wow-tiffin-fo...,"Near Rama Temple, Immadihalli Road, Whitefield...",Wow Tiffin Food,No,No,NEW,0,+91 9535041209,Whitefield,Quick Bites,,North Indian,200,[],[],Dine-out,Whitefield
51586,https://www.zomato.com/bangalore/quick-bites-w...,"119/5, Pattandur Agrahara, Near Brigade Tech p...",Quick Bites,Yes,No,NEW,0,+91 9742377087\n+91 7899303299,Whitefield,Quick Bites,,Chinese,200,"[('Rated 5.0', 'RATED\n This is an awesome pl...","['Veg Chowmein + Manchurian', 'Paratha + Chick...",Dine-out,Whitefield
51603,https://www.zomato.com/bangalore/shiv-sagar-1-...,"Shop No 10, Food Court, 2nd Floor, Phoenix Mar...",Shiv sagar,Yes,No,NEW,0,+91 9513300048\n+91 8067266097,Whitefield,"Quick Bites, Food Court",,"South Indian, Chinese, North Indian",250,"[('Rated 3.0', 'RATED\n A highly expensive pl...","['Malai Paneer Tikka', 'Paneer Kofta', 'Mushro...",Dine-out,Whitefield
51604,https://www.zomato.com/bangalore/nawab-di-biry...,"17, Thubarahalli Village, Hobli, Varthur Main ...",Nawab Di Biryani,No,No,NEW,0,+91 7899298400\n+91 9818654628,Whitefield,"Takeaway, Delivery",,"Biryani, Mughlai",400,"[('Rated 3.0', 'RATED\n Biriyani had ordered ...",[],Dine-out,Whitefield


In [42]:
# We see that rating is in string format. If we want only the rating as float number
# lets define a simple function to return the rating
def split(x):
    ''' Splits the rating from string format and returns the rating out of 5'''
    return x.split('/')[0]

In [43]:
df['rating'] = df['rate'].apply(split)
df.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city),rating
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,4.1
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,4.1
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.8
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.7
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari,3.8


In [44]:
df['rating'].unique()

array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
       '3.0', '3.2', '3.3', '2.8', '4.4', '4.3', 'NEW', '2.9', '3.5',
       '2.6', '3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2',
       '2.3', '3.4 ', '-', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ',
       '3.7 ', '3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ',
       '3.2 ', '2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8',
       '4.6 ', '4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ',
       '2.2 ', '2.0 ', '1.8 '], dtype=object)

In [45]:
# We still see that there is a 'NEW' and '-' in the ratings, which we have to assign to 0
# since we have no specific rating assigned

df.replace(['NEW','-'],0,inplace=True)

In [46]:
df['rating'].unique()

array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
       '3.0', '3.2', '3.3', '2.8', '4.4', '4.3', 0, '2.9', '3.5', '2.6',
       '3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2', '2.3',
       '3.4 ', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '3.7 ',
       '3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ', '3.2 ',
       '2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8', '4.6 ',
       '4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
       '2.0 ', '1.8 '], dtype=object)

In [47]:
# Lets check the datatypes of the columns
df.dtypes

url                            object
address                        object
name                           object
online_order                   object
book_table                     object
rate                           object
votes                           int64
phone                          object
location                       object
rest_type                      object
dish_liked                     object
cuisines                       object
approx_cost(for two people)    object
reviews_list                   object
menu_item                      object
listed_in(type)                object
listed_in(city)                object
rating                         object
dtype: object

In [48]:
# We need to change the datatype of rating to numerical format
df['rating'] = pd.to_numeric(df['rating'])

In [49]:
# Now lets say we want to have the restaurant rating w.r.t each location
# We use the concept of groupby() for this 

df.groupby('location')['rating'].mean()

location
BTM                  3.296128
Banashankari         3.373292
Banaswadi            3.362926
Bannerghatta Road    3.271677
Basavanagudi         3.478185
                       ...   
West Bangalore       2.020000
Whitefield           3.384170
Wilson Garden        3.257635
Yelahanka            3.640000
Yeshwantpur          3.502679
Name: rating, Length: 92, dtype: float64

In [50]:
# Lets sort the above result in descending order
df.groupby('location')['rating'].mean().sort_values(ascending=False)

location
Lavelle Road             4.042886
St. Marks Road           4.017201
Koramangala 3rd Block    3.978756
Sankey Road              3.965385
Church Street            3.963091
                           ...   
Electronic City          3.041909
Bommanahalli             2.926752
Hebbal                   2.880000
North Bangalore          2.385714
West Bangalore           2.020000
Name: rating, Length: 92, dtype: float64

In [51]:
avg_rating = df.groupby('location')['rating'].mean().sort_values(ascending=False).values
avg_rating

array([4.04288577, 4.01720117, 3.97875648, 3.96538462, 3.96309091,
       3.90151197, 3.90105263, 3.85      , 3.84457237, 3.83965517,
       3.81435185, 3.80740741, 3.79642857, 3.74784206, 3.74055024,
       3.7270073 , 3.72222222, 3.70492958, 3.68801262, 3.68783784,
       3.66823708, 3.66246625, 3.65216942, 3.64      , 3.625     ,
       3.61525029, 3.6075    , 3.60645161, 3.59584871, 3.583174  ,
       3.58095238, 3.56487889, 3.54565217, 3.54139845, 3.52914439,
       3.51111111, 3.50267857, 3.49980952, 3.49891697, 3.48695652,
       3.48406955, 3.47894737, 3.4787234 , 3.47818471, 3.47362637,
       3.47355822, 3.45555556, 3.44615385, 3.44551724, 3.4375    ,
       3.42977099, 3.42238193, 3.41978022, 3.41292591, 3.40053227,
       3.4       , 3.38911917, 3.38554779, 3.38417011, 3.3746988 ,
       3.37329193, 3.36292585, 3.36      , 3.33333333, 3.32358974,
       3.3202381 , 3.3202381 , 3.30983302, 3.29892473, 3.29612767,
       3.2940678 , 3.27927928, 3.278125  , 3.27167674, 3.26393

In [52]:
location_list = df.groupby('location')['rating'].mean().sort_values(ascending=False).index

In [53]:
# Lets define a new dataframe
rating = pd.DataFrame()

In [54]:
# Define new lists for latitude and longitude
lat = []
lon = []

# Write a simple loop to iterate over each location and record the latitude and longitude of the locations
for loc in location_list:
    # currently the location is in a string format, so to convert the string format to 
    # geographical latitude and longitude, we use geocode function on geolocator method
    loc = geolocator.geocode(loc)
    
    # if location is None, then we append 'NaN' to the list
    if loc is None:
        lat.append(np.nan)
        lon.append(np.nan)
    else:
        lat.append(loc.latitude)
        lon.append(loc.longitude)

In [55]:
rating['location'] = location_list
rating['Latitude'] = lat
rating['Longitude'] = lon
rating['Average_Rating']=avg_rating

In [56]:
rating.head()

Unnamed: 0,location,Latitude,Longitude,Average_Rating
0,Lavelle Road,40.765284,-76.373824,4.042886
1,St. Marks Road,33.119391,-84.826086,4.017201
2,Koramangala 3rd Block,12.927187,77.626625,3.978756
3,Sankey Road,38.780108,-121.505644,3.965385
4,Church Street,40.716059,-74.00706,3.963091


In [57]:
# check if there are any missing values
rating.isna().sum()

location          0
Latitude          1
Longitude         1
Average_Rating    0
dtype: int64

In [58]:
# We have 1 missing data, which we can drop
rating.dropna(inplace=True)

In [59]:
# Now we have our rating dataframe cleaned, we can create a heatmap
HeatMap(rating[['Latitude','Longitude','Average_Rating']]).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x1236d9cd0>

In [60]:
basemap

In [61]:
df.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city),rating
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,4.1
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,4.1
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.8
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.7
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari,3.8


In [62]:
# Lets say we want to analyse the data only for restaurants with north indian cuisines
north_cuisine = (df['cuisines']=='North Indian')
df_north_cuisine = df[north_cuisine]
df_north_cuisine.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city),rating
5,https://www.zomato.com/bangalore/timepass-dinn...,"37, 5-1, 4th Floor, Bosco Court, Gandhi Bazaar...",Timepass Dinner,Yes,No,3.8/5,286,+91 9980040002\r\n+91 9980063005,Basavanagudi,Casual Dining,"Onion Rings, Pasta, Kadhai Paneer, Salads, Sal...",North Indian,600,"[('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/...",[],Buffet,Banashankari,3.8
50,https://www.zomato.com/bangalore/petoo-banasha...,"276, Ground Floor, 100 Feet Outer Ring Road, B...",Petoo,No,No,3.7/5,21,+91 8026893211,Banashankari,Quick Bites,,North Indian,450,"[('Rated 2.0', 'RATED\n This is a neatly made...",[],Delivery,Banashankari,3.7
72,https://www.zomato.com/bangalore/spicy-tandoor...,"Opposite ICICi Bank, Hanuman Nagar, Banashanka...",Spicy Tandoor,No,No,0,0,+91 8050884222,Banashankari,Quick Bites,,North Indian,150,"[('Rated 4.0', 'RATED\n cost for chicken roll...",[],Delivery,Banashankari,0.0
87,https://www.zomato.com/bangalore/krishna-sagar...,"38, 22nd Main, 22nd Cross, Opposite BDA, 2nd S...",Krishna Sagar,No,No,3.5/5,31,+91 8892752997\r\n+91 7204780429,Banashankari,Quick Bites,,North Indian,200,"[('Rated 1.0', 'RATED\n Worst experience with...",[],Delivery,Banashankari,3.5
94,https://www.zomato.com/bangalore/nandhini-delu...,"304, Opposite Apollo Public School, 100 Feet R...",Nandhini Deluxe,No,No,2.6/5,283,080 26890011\r\n080 26890033,Banashankari,Casual Dining,"Biryani, Chicken Guntur, Thali, Buttermilk, Ma...",North Indian,600,"[('Rated 3.0', 'RATED\n Ididnt like much.\n\n...",[],Delivery,Banashankari,2.6


In [63]:
# Lets say we want the dataframe for all the restaurants of a particular location
# for the above dataframe (specific to north indian cuisine)
df_north_indian_restaurant = df_north_cuisine.groupby('location')['url'].count().reset_index()
df_north_indian_restaurant

Unnamed: 0,location,url
0,BTM,274
1,Banashankari,35
2,Banaswadi,9
3,Bannerghatta Road,60
4,Basavanagudi,17
...,...,...
59,"Varthur Main Road, Whitefield",3
60,Vasanth Nagar,12
61,Whitefield,148
62,Wilson Garden,37


In [64]:
df_north_indian_restaurant.columns = ['Name', 'Restaurant Count']
df_north_indian_restaurant

Unnamed: 0,Name,Restaurant Count
0,BTM,274
1,Banashankari,35
2,Banaswadi,9
3,Bannerghatta Road,60
4,Basavanagudi,17
...,...,...
59,"Varthur Main Road, Whitefield",3
60,Vasanth Nagar,12
61,Whitefield,148
62,Wilson Garden,37


In [65]:
# Lets use the initial dataframe for locations to capture the latitude and longitude 
# for the above dataframe
north_india_final_df = df_north_indian_restaurant.merge(locations, on='Name', how = 'left').dropna()
north_india_final_df

Unnamed: 0,Name,Restaurant Count,Latitude,Longitude
0,BTM,274,45.954851,-112.496595
1,Banashankari,35,15.887678,75.704678
2,Banaswadi,9,13.014162,77.651854
3,Bannerghatta Road,60,12.939939,77.602327
4,Basavanagudi,17,12.941726,77.575502
...,...,...,...,...
59,"Varthur Main Road, Whitefield",3,12.941466,77.747094
60,Vasanth Nagar,12,12.988721,77.585169
61,Whitefield,148,44.373058,-71.611858
62,Wilson Garden,37,12.948934,77.596827


In [66]:
HeatMap(north_india_final_df[['Latitude','Longitude','Restaurant Count']], zoom =20,radius=15).add_to(basemap)
basemap

In [67]:
# We can do the same for all other cuisines as well
# We can automate that task instead of doing it for all cuisines

# We can create a dedicated heatmap based on the cuisine we want
# by writing a simple function which takes cuisine as the output
def heatmap_zone(cuisine):
    ''' Creates a heatmap based on the input cuisine '''
    result = (df['cuisines']==cuisine)
    df_cuisine = df[result]
    df_restaurant = df_cuisine.groupby('location')['url'].count().reset_index()
    df_restaurant.columns = ['Name', 'Restaurant Count']
    final_df = df_restaurant.merge(locations, on='Name', how = 'left').dropna()
    HeatMap(final_df[['Latitude','Longitude','Restaurant Count']], zoom =20,radius=15,blur=8).add_to(basemap)
    return basemap

In [68]:
# Lets check the unique cuisines we have
df['cuisines'].unique()

array(['North Indian, Mughlai, Chinese', 'Chinese, North Indian, Thai',
       'Cafe, Mexican, Italian', ..., 'Tibetan, Nepalese',
       'North Indian, Street Food, Biryani',
       'North Indian, Chinese, Arabian, Momos'], dtype=object)

In [69]:
# Lets say we want to see the heatmap for south indian cuisine restaurants
heatmap_zone('South Indian')

In [70]:
heatmap_zone('Biryani')