In [2]:
!pip install geopy

Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.0-py3-none-any.whl.metadata (1.4 kB)
Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/125.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m122.9/125.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.4/125.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading geographiclib-2.0-py3-none-any.whl (40 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/40.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-

#### load necessary libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from IPython.core.display import display,HTML
import time


In [4]:
import warnings
warnings.filterwarnings("ignore")
sns.set_style("whitegrid")


### Load Data

In [85]:
data1 = pd.read_csv("zomato_data.csv")
data2 = pd.read_csv("Geographical_Coordinates.csv")

In [86]:
data1.shape

(51717, 10)

In [143]:
display(data1.sample(5))
display(data2.sample(5))

Unnamed: 0,online_order,book_table,rate,votes,rest_type,dish_liked,cuisines,approx_costfor_two_people,listed_intype,listed_incity
11976,0,0,3.7,0,Beverage Shop,Not Available,"Beverages, Desserts",150,Delivery,Church Street
27683,0,0,3.6,21,Quick Bites,Not Available,South Indian,200,Delivery,Koramangala 4th Block
33314,0,0,3.0,15,Bar,Not Available,North Indian,1000,Delivery,Koramangala 6th Block
27932,0,0,3.7,0,Quick Bites,Not Available,"Juices, Beverages, Fast Food",250,Delivery,Koramangala 4th Block
18092,0,0,2.9,539,"Dessert Parlor, Cafe","Sandwiches, Brownie, Chicken Sandwich, Pizza, ...","Desserts, Cafe, Bakery",500,Delivery,Indiranagar


Unnamed: 0,listed_incity,Latitude,Longitude
24,New BEL Road,13.039186,77.564284
21,Malleshwaram,13.002735,77.570325
7,Church Street,12.974914,77.605247
22,Marathahalli,12.955257,77.698416
25,Old Airport Road,12.960632,77.6425


## Data Cleaning
### Step 1: Rating Column (rate)
- Replace '-' values with NaN
- Remove /5 and retain only the numeric value
- Convert to float using pd.to_numeric()
- Fill missing values using the median rating


### Step 2: Cost Column (approx_costfor_two_people)
- Remove commas from numeric strings ('1,000' → '1000')
- Convert to numeric
- Fill missing values using the median cost

### Step 3: Categorical Columns
- dish_liked → Replace NaN with "Not Available"
- cuisines → Replace NaN with "Other"
- rest_type → Replace NaN with "Unknown"

### Step 4: Votes Column
- Fill missing values in votes with median


### Step 5: Binary Encoding
##### Convert the following binary fields:

- online_order
Yes → 1, No → 0

- book_table
Yes → 1, No → 0

### Step 6: Data Type Conversion
 Ensure the following conversions:
- rate → float
- votes → integer
- approx_costfor_two_people → integer


In [87]:
data1['rate'] = data1['rate'].replace('-',np.nan)
data1['rate'] = data1['rate'].str.replace('/5', '')
data1['rate'] = pd.to_numeric(data1['rate'], errors='coerce').astype(float)
data1['rate'].fillna(data1['rate'].median(),inplace=True)

In [104]:
print(data1['rate'].isnull().sum())
print(data1['rate'].info())
print(data1['rate'].describe())

0
<class 'pandas.core.series.Series'>
RangeIndex: 51717 entries, 0 to 51716
Series name: rate
Non-Null Count  Dtype  
--------------  -----  
51717 non-null  float64
dtypes: float64(1)
memory usage: 404.2 KB
None
count    51717.000000
mean         3.700362
std          0.395391
min          1.800000
25%          3.500000
50%          3.700000
75%          3.900000
max          4.900000
Name: rate, dtype: float64


In [88]:
data1['approx_costfor_two_people'] = data1['approx_costfor_two_people'].astype(str).str.replace(',','')
data1['approx_costfor_two_people'] = pd.to_numeric(data1['approx_costfor_two_people'],errors='coerce')
data1['approx_costfor_two_people'].fillna(data1['approx_costfor_two_people'].median(),inplace=True)

In [105]:
print(data1['approx_costfor_two_people'].isnull().sum())
print(data1['approx_costfor_two_people'].info())
print(data1['approx_costfor_two_people'].describe())

0
<class 'pandas.core.series.Series'>
RangeIndex: 51717 entries, 0 to 51716
Series name: approx_costfor_two_people
Non-Null Count  Dtype
--------------  -----
51717 non-null  int64
dtypes: int64(1)
memory usage: 404.2 KB
None
count    51717.000000
mean       554.391689
std        437.563723
min         40.000000
25%        300.000000
50%        400.000000
75%        650.000000
max       6000.000000
Name: approx_costfor_two_people, dtype: float64


In [91]:
data1['dish_liked'].fillna("Not Available",inplace=True)
data1['cuisines'].fillna("Other",inplace=True)
data1['rest_type'].fillna("Unknown",inplace=True)

In [106]:
print(data1['dish_liked'].isnull().sum())
print(data1['dish_liked'].info())
print(data1['dish_liked'].describe())

0
<class 'pandas.core.series.Series'>
RangeIndex: 51717 entries, 0 to 51716
Series name: dish_liked
Non-Null Count  Dtype 
--------------  ----- 
51717 non-null  object
dtypes: object(1)
memory usage: 404.2+ KB
None
count             51717
unique             5272
top       Not Available
freq              28078
Name: dish_liked, dtype: object


In [107]:
print(data1['cuisines'].isnull().sum())
print(data1['cuisines'].info())
print(data1['cuisines'].describe())

0
<class 'pandas.core.series.Series'>
RangeIndex: 51717 entries, 0 to 51716
Series name: cuisines
Non-Null Count  Dtype 
--------------  ----- 
51717 non-null  object
dtypes: object(1)
memory usage: 404.2+ KB
None
count            51717
unique            2724
top       North Indian
freq              2913
Name: cuisines, dtype: object


In [108]:
print(data1['rest_type'].isnull().sum())
print(data1['rest_type'].info())
print(data1['rest_type'].describe())

0
<class 'pandas.core.series.Series'>
RangeIndex: 51717 entries, 0 to 51716
Series name: rest_type
Non-Null Count  Dtype 
--------------  ----- 
51717 non-null  object
dtypes: object(1)
memory usage: 404.2+ KB
None
count           51717
unique             94
top       Quick Bites
freq            19132
Name: rest_type, dtype: object


In [92]:
data1['votes'].fillna(data1['votes'].median(),inplace=True)
data1['votes'] = data1['votes'].astype(int)

In [93]:
data1['online_order'] = data1['online_order'].apply(lambda x: 1 if x=="Yes" else 0)
data1['book_table'] = data1['book_table'].apply(lambda x: 1 if x=="Yes" else 0)

In [100]:
data1['approx_costfor_two_people']=data1['approx_costfor_two_people'].astype(np.integer)

  npdtype = np.dtype(dtype)


In [101]:
np.dtype(data1['approx_costfor_two_people'])

dtype('int64')

## 1. What is the shape of the given dataset?


In [182]:
data1.shape

(51717, 11)

# 2. How many restaurants serve North Indian cuisine?


In [113]:
north_indian_count = data1['cuisines'].apply(lambda x: 'North Indian' in str(x)).sum()
print(north_indian_count)

21085


In [115]:
from collections import Counter

# What cuisine is most commonly offered by restaurants in Bangalore?


In [118]:
cusine_list = data1['cuisines'].apply(lambda x: [i.strip() for i in x.split(',')])
item_list = [item for sublist in cusine_list for item in sublist]
cusine_counter = Counter(item_list)


In [131]:
cusine_counter['Desserts']
cusine_counter['North Indian']
cusine_counter['South Indian']
cusine_counter['Bakery']
cusine_counter['Biryani']


print("Desserts count: ",cusine_counter['Desserts'])
print("North Indian count: ",cusine_counter['North Indian'])
print("South Indian count: ",cusine_counter['South Indian'])
print("Bakery count: ",cusine_counter['Bakery'])
print("Biryani count: ",cusine_counter['Biryani'])



Desserts count:  5633
North Indian count:  21085
South Indian count:  8644
Bakery count:  2840
Biryani count:  6492


In [121]:
cusine_counter.most_common(1)

[('North Indian', 21085)]

# Which locality in Bangalore has the highest average cost for dining (for two people)?

In [132]:
avg_cost_for_two_people = data1.groupby('listed_incity')['approx_costfor_two_people'].mean()
highest_avg_costfor_two_people = avg_cost_for_two_people.sort_values(ascending=False).head(1)
highest_avg_costfor_two_people

Unnamed: 0_level_0,approx_costfor_two_people
listed_incity,Unnamed: 1_level_1
Church Street,770.361248


#  Which restaurant type has the top rating with over 1000 votes?


In [133]:
filtered_df = data1[data1['votes']>1000]

avg_rating_by_type = filtered_df.groupby('rest_type')['rate'].mean()

In [134]:
top_rated_restaurent =avg_rating_by_type.sort_values(ascending=False).head(1)
top_rated_restaurent

Unnamed: 0_level_0,rate
rest_type,Unnamed: 1_level_1
Bakery,4.8


# How much does it cost at minimum to eat out in Bangalore?


In [144]:
dine_out_df = data1[data1['listed_intype'].str.contains('Dine-out',case=False)]

In [147]:
dine_out_df.head(3)

Unnamed: 0,online_order,book_table,rate,votes,rest_type,dish_liked,cuisines,approx_costfor_two_people,listed_intype,listed_incity
553,1,1,3.7,0,Casual Dining,Not Available,"Continental, Asian, North Indian, Tea",500,Dine-out,Banashankari
554,1,0,3.9,888,Casual Dining,"Spring Dosa, Coffee, Spring Roll, Masala Papad...","North Indian, South Indian, Chinese, Fast Food...",750,Dine-out,Banashankari
555,1,1,4.0,349,Casual Dining,"Sea Food, Neer Dosa, Anjal Masala Fry, Coconut...","Seafood, Biryani, Beverages, South Indian, Nor...",1200,Dine-out,Banashankari


In [146]:
min_prices_to_eat_out = dine_out_df.groupby('listed_incity')['approx_costfor_two_people'].min()
min_prices_to_eat_out.max()

np.int64(100)

In [154]:

eat_out_keywords = ['Buffet', 'Dine-out', 'Café', 'Dessert']


eat_out_df = data1[data1['listed_intype'].str.contains('|'.join(eat_out_keywords), case=False, na=False)]


min_cost_per_area = eat_out_df.groupby('listed_incity')['approx_costfor_two_people'].min()

min_safe_eat_out_cost = min_cost_per_area.max()

print(f"💰 Minimum budget to eat out anywhere in Bangalore is: ₹{min_safe_eat_out_cost}")

💰 Minimum budget to eat out anywhere in Bangalore is: ₹100


# What percentage of total online orders is received by restaurants in Banashankari?

In [165]:
total_banashankari_order = data1[data1['listed_incity']=='Banashankari'].shape[0]
total_banashankari_order

863

In [166]:
banashankari_online_orders = data1[(data1['online_order']==1) & (data1['listed_incity']=='Banashankari')].shape[0]
banashankari_online_orders

546

In [167]:


online_order_percentage = (banashankari_online_orders/total_banashankari_order)*100
online_order_percentage

63.26767091541136

# Which locality has the most restaurants with over 500 votes and a rating below 3.0?

In [168]:
low_rated_popular = data1[(data1['votes'] > 500) & (data1['rate'] < 3.0)]

In [169]:
locality_counts = low_rated_popular['listed_incity'].value_counts()

In [171]:
top_locality = locality_counts.idxmax()
top_count = locality_counts.max()
top_locality,top_count

('Brookefield', np.int64(8))

# Which locality in Bangalore should Zomato target for expansion based on restaurant type diversity?

In [172]:

diversity = data1.groupby('listed_incity')['rest_type'].nunique()


most_diverse_locality = diversity.sort_values(ascending=False).head(1)

print("📍 Zomato should expand in:", most_diverse_locality.index[0])
print("🔢 Unique restaurant types there:", most_diverse_locality.values[0])


📍 Zomato should expand in: BTM
🔢 Unique restaurant types there: 62


# cWhat's the average cost difference between buffet and delivery restaurants?

In [173]:
buffet_avg = data1[data1['listed_intype'] == 'Buffet']['approx_costfor_two_people'].mean()
delivery_avg = data1[data1['listed_intype'] == 'Delivery']['approx_costfor_two_people'].mean()


cost_diff = abs(buffet_avg - delivery_avg)

print(f"📦 Buffet Avg Cost: ₹{buffet_avg:.2f}")
print(f"🚚 Delivery Avg Cost: ₹{delivery_avg:.2f}")
print(f"💸 Average Cost Difference: ₹{cost_diff:.2f}")

📦 Buffet Avg Cost: ₹1295.35
🚚 Delivery Avg Cost: ₹464.10
💸 Average Cost Difference: ₹831.25


# What is the maximum number of votes received by any restaurant with online ordering?

In [174]:
max_votes_online = data1[data1['online_order'] == 1]['votes'].max()
print(f"💥 Max Votes for a Restaurant with Online Ordering: {max_votes_online}")


💥 Max Votes for a Restaurant with Online Ordering: 16832


# What is the average rating of restaurants that serve both North Indian and Chinese cuisines?

In [175]:
both_cuisines = data1[data1['cuisines'].str.contains('North Indian') & data1['cuisines'].str.contains('Chinese')]
avg_rating = both_cuisines['rate'].mean()
print(f"⭐ Average Rating of Restaurants serving both North Indian & Chinese: {avg_rating:.2f}")


⭐ Average Rating of Restaurants serving both North Indian & Chinese: 3.59


# What is the most profitable area for Zomato based on potential revenue estimation?

In [176]:

data1['estimated_revenue'] = data1['votes'] * data1['approx_costfor_two_people']


revenue_by_area = data1.groupby('listed_incity')['estimated_revenue'].sum().sort_values(ascending=False)

most_profitable_area = revenue_by_area.idxmax()
max_revenue = revenue_by_area.max()

print(f"💰 Most Profitable Area for Zomato: {most_profitable_area} with estimated revenue of ₹{max_revenue:,.0f}")

💰 Most Profitable Area for Zomato: Koramangala 7th Block with estimated revenue of ₹1,006,195,610


#  If Zomato wants to reduce customer complaints, which restaurant type should they focus on?

In [178]:

rest_type_rating = data1.groupby('rest_type').agg({
    'rate': 'mean',
    'votes': 'sum',
    'rest_type': 'count'
}).rename(columns={'rate': 'avg_rating', 'votes': 'total_votes', 'rest_type': 'restaurant_count'})


filtered = rest_type_rating[rest_type_rating['total_votes'] > 500]


focus_types = filtered.sort_values(by='avg_rating', ascending=True)

focus_types.head(5)


Unnamed: 0_level_0,avg_rating,total_votes,restaurant_count
rest_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Quick Bites, Food Court",3.347368,2276,19
Unknown,3.398678,15033,227
"Quick Bites, Cafe",3.402439,2269,41
"Bakery, Quick Bites",3.461592,11970,289
Takeaway,3.4975,2240,120


#  In which area should Zomato invest by considering high rating (rate > 4.2), high number of votes (> 500) and including online orders?

In [179]:
top_restaurants = data1[(data1['rate'] > 4.2) &
                        (data1['votes'] > 500) &
                        (data1['online_order'] == 1)]

In [180]:
investment_area = top_restaurants['listed_incity'].value_counts().reset_index()
investment_area.columns = ['listed_incity', 'top_restaurant_count']

In [181]:
investment_area.head()

Unnamed: 0,listed_incity,top_restaurant_count
0,MG Road,97
1,Koramangala 7th Block,97
2,Church Street,95
3,Koramangala 4th Block,95
4,Brigade Road,94


### merge zomato data and geographical coordinates data for mapping

In [185]:


# Merging the two datasets based on the 'listed_incity' column
merged_df = pd.merge(data1, data2, on='listed_incity', how='left')

# Display the first few rows of the merged dataset to verify
merged_df.head()

Unnamed: 0,online_order,book_table,rate,votes,rest_type,dish_liked,cuisines,approx_costfor_two_people,listed_intype,listed_incity,estimated_revenue,Latitude,Longitude
0,1,1,4.1,775,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet,Banashankari,620000,12.939333,77.553982
1,1,0,4.1,787,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet,Banashankari,629600,12.939333,77.553982
2,1,0,3.8,918,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet,Banashankari,734400,12.939333,77.553982
3,0,0,3.7,88,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet,Banashankari,26400,12.939333,77.553982
4,0,0,3.8,166,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet,Banashankari,99600,12.939333,77.553982


In [187]:
!pip install folium

Collecting folium
  Downloading folium-0.19.5-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.8.1-py3-none-any.whl.metadata (1.5 kB)
Collecting xyzservices (from folium)
  Downloading xyzservices-2025.1.0-py3-none-any.whl.metadata (4.3 kB)
Downloading folium-0.19.5-py2.py3-none-any.whl (110 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.9/110.9 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading branca-0.8.1-py3-none-any.whl (26 kB)
Downloading xyzservices-2025.1.0-py3-none-any.whl (88 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.4/88.4 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xyzservices, branca, folium
Successfully installed branca-0.8.1 folium-0.19.5 xyzservices-2025.1.0


In [189]:
import folium
from folium.plugins import HeatMap

# Create a base map centered around Bangalore
map_bangalore = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Extract latitude and longitude of all restaurants
locations = merged_df[['Latitude', 'Longitude']].dropna()

# Create a heat map layer for restaurant density
HeatMap(locations).add_to(map_bangalore)

# Save the map to an HTML file
map_bangalore.save('bangalore_restaurant_density_map.html')


In [195]:
import folium
from folium.plugins import MarkerCluster

# Create a base map centered around Bangalore
map_bangalore = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Initialize marker cluster to group nearby restaurants
marker_cluster = MarkerCluster().add_to(map_bangalore)

# Define a function to assign color markers based on cuisine
def cuisine_marker(cuisine):
    cuisine = cuisine.lower()
    if 'north indian' in cuisine:
        return 'red'
    elif 'south indian' in cuisine:
        return 'blue'
    elif 'biryani' in cuisine:
        return 'green'
    elif 'chinese' in cuisine:
        return 'purple'
    elif 'italian' in cuisine:
        return 'orange'
    else:
        return 'gray'

# Iterate over the merged DataFrame to add markers for each restaurant
for idx, row in merged_df.iterrows():
    if pd.notnull(row['Latitude']) and pd.notnull(row['Longitude']):
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=f"<strong>{row['cuisines']}</strong><br>Cuisine: {row['cuisines']}<br>Cost: ₹{row['approx_costfor_two_people']}",
            icon=folium.Icon(color=cuisine_marker(row['cuisines']))
        ).add_to(marker_cluster)

# Save the map to an HTML file
map_bangalore.save('bangalore_cuisine_map.html')
