In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("Zomato data.csv")

In [3]:
df

Unnamed: 0,name,online_order,book_table,rate,votes,approx_cost(for two people),listed_in(type)
0,Jalsa,Yes,Yes,4.1/5,775,800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,300,Buffet
4,Grand Village,No,No,3.8/5,166,600,Buffet
...,...,...,...,...,...,...,...
143,Melting Melodies,No,No,3.3/5,0,100,Dining
144,New Indraprasta,No,No,3.3/5,0,150,Dining
145,Anna Kuteera,Yes,No,4.0/5,771,450,Dining
146,Darbar,No,No,3.0/5,98,800,Dining


In [4]:
df.head()

Unnamed: 0,name,online_order,book_table,rate,votes,approx_cost(for two people),listed_in(type)
0,Jalsa,Yes,Yes,4.1/5,775,800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,300,Buffet
4,Grand Village,No,No,3.8/5,166,600,Buffet


In [5]:
df.shape

(148, 7)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148 entries, 0 to 147
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   name                         148 non-null    object
 1   online_order                 148 non-null    object
 2   book_table                   148 non-null    object
 3   rate                         148 non-null    object
 4   votes                        148 non-null    int64 
 5   approx_cost(for two people)  148 non-null    int64 
 6   listed_in(type)              148 non-null    object
dtypes: int64(2), object(5)
memory usage: 8.2+ KB


In [7]:
df.describe()

Unnamed: 0,votes,approx_cost(for two people)
count,148.0,148.0
mean,264.810811,418.243243
std,653.676951,223.085098
min,0.0,100.0
25%,6.75,200.0
50%,43.5,400.0
75%,221.75,600.0
max,4884.0,950.0


In [8]:
# Get a list of all column names in the DataFrame
df.columns.tolist()

['name',
 'online_order',
 'book_table',
 'rate',
 'votes',
 'approx_cost(for two people)',
 'listed_in(type)']

In [9]:
# Count the number of missing (null) values in each column
df.isnull().sum()

name                           0
online_order                   0
book_table                     0
rate                           0
votes                          0
approx_cost(for two people)    0
listed_in(type)                0
dtype: int64

In [10]:
# Count the number of unique values in each column
df.nunique()

name                           145
online_order                     2
book_table                       2
rate                            20
votes                           90
approx_cost(for two people)     18
listed_in(type)                  4
dtype: int64

In [11]:
# Remove '/5' from the 'rate' column
df['rate'] = df['rate'].astype(str).str.replace('/5', '', regex=False)
# Convert 'rate' column from string to numeric values
df['rate'] = pd.to_numeric(df['rate'], errors='coerce')

In [12]:
#column to numeric values
df['approx_cost(for two people)'] = df['approx_cost(for two people)'].astype(str).str.replace(',', '', regex=False)
df['approx_cost(for two people)'] = pd.to_numeric(df['approx_cost(for two people)'], errors='coerce')


In [13]:
# Rename columns for easier access and readability
df.rename(columns={
    'approx_cost(for two people)': 'cost_for_two',
    'listed_in(type)': 'restaurant_type',
    'online_order': 'order_mode'
}, inplace=True)

In [14]:
# Q1. What type of restaurant do the majority of customers order from?
q1 = df['restaurant_type'].mode()[0]
q1


'Dining'

In [15]:
# Q2. How many votes has each type of restaurant received?
q2 = df.groupby('restaurant_type')['votes'].sum()
q2


restaurant_type
Buffet     3028
Cafes      6434
Dining    20363
other      9367
Name: votes, dtype: int64

In [16]:
# Q3. Most common rating
q3 = df['rate'].mode()[0]
q3

np.float64(3.8)

In [17]:
# Q4. Average spending per person
q4 = df['cost_for_two'].mean() / 2
q4

np.float64(209.1216216216216)

In [18]:
# Q5. Mode with highest average rating
q5 = df.groupby('order_mode')['rate'].mean()
q5

order_mode
No     3.487778
Yes    3.858621
Name: rate, dtype: float64

In [19]:
# Q6. Restaurant type with most offline orders
offline_orders = df[df['order_mode'] == 'No']
q6 = offline_orders['restaurant_type'].value_counts().idxmax()
q6

'Dining'

In [20]:
print("1. Most ordered restaurant type:", q1)
print("\n2. Votes per restaurant type:\n", q2)
print("\n3. Most common restaurant rating:", q3)
print("\n4. Average spending per person: ₹{:.2f}".format(q4))
print("\n5. Average rating by order mode:\n", q5)
print("\n6. Restaurant type with most offline orders:", q6)

1. Most ordered restaurant type: Dining

2. Votes per restaurant type:
 restaurant_type
Buffet     3028
Cafes      6434
Dining    20363
other      9367
Name: votes, dtype: int64

3. Most common restaurant rating: 3.8

4. Average spending per person: ₹209.12

5. Average rating by order mode:
 order_mode
No     3.487778
Yes    3.858621
Name: rate, dtype: float64

6. Restaurant type with most offline orders: Dining
