# Step 1 : Importing Libraries. 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

# Step : Create the DataFrame.

In [None]:
df=pd.read_csv("dataset/zomato_data.csv")
df

In [None]:
df.shape

In [None]:
df.shape[0]

In [None]:
df.head(1)

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
# checking what percentage data is null
(df.isnull().sum()/df.shape[0])*100

In [None]:
# if there is any null or empty value then dropna will remove all the null and empty values.
df.dropna(inplace=True)

In [None]:
# replace '/5' with ' '
df["rate"] = df["rate"].str.replace("/5"," ")

In [None]:
# change the datatype (object-->float)
df['rate'] = df['rate'].astype(float)

In [None]:
df

# 1. What type of restaurant do the majority of customers order from?

In [None]:
sns.countplot(x=df['listed_in(type)'])
plt.xlabel("type of restaurant")

# Conclusion : Dining restaurant has the maximum customer.

# 2. How many votes has each type of restaurant received from customers?

In [None]:
grouped_data = df.groupby('listed_in(type)')['votes'].sum()
result = pd.DataFrame({'votes' : grouped_data})
plt.plot(result,color='green',marker="o")
plt.xlabel("type of restaurant",color='red',size='20')
plt.ylabel('votes',color='red',size='20')


# Conclusion : Dining restaurant has the maximum vote received.

# 3. What are the ratings that the majority of restaurants have received?

In [None]:
plt.hist(df["rate"],bins = 3)
plt.title('ratings distribution')
plt.show()

# Conclusion : The majority of  restaurants have received ratings from 3.25 - 4.00

# 4. Average order spending by couples.

In [None]:
couple_data = df["approx_cost(for two people)"]
sns.countplot(x=couple_data)

# Conclusion : The majority of couples prefer restaurant with an approx cost of ₹300.

# 5. Which mode(online/offline) has received maximum rating?

In [None]:
mode = {
    'No' : 'Offline',
    'Yes' : 'Online'
}

df['online_order'] = df['online_order'].replace(mode)
max_rating = df.groupby('online_order')['rate'].sum()
max_rating

In [None]:
result = pd.DataFrame({'rate' : max_rating})
result

In [None]:
plt.plot(result, color = 'green',marker='o')
plt.title('Mode which has maximum ratings.')
plt.xlabel('Online/Offline')
plt.ylabel('Ratings')
plt.show()

# Conclusion : Offline mode has max ratings.

# 6. Which type of restaurant received more offline orders, so that Zomato can give customers some good offers?

In [None]:
grouped_data = df.groupby(['listed_in(type)','online_order'])['rate'].count()
result = pd.DataFrame({'rate' : grouped_data}).reset_index()
filtered_data = result[result["online_order"] == 'Online']
filtered_data

In [None]:
grouped_data = df.groupby(['listed_in(type)','online_order'])['rate'].count()
result = pd.DataFrame({'rate' : grouped_data}).reset_index()
filtered_data = result[result["online_order"] == 'Offline']
filtered_data

In [None]:
pivot_table = df.pivot_table(index='listed_in(type)', columns='online_order', aggfunc='size', fill_value=0)
sns.heatmap(pivot_table, annot=True ,fmt='d' )
plt.title("Heatmap")
plt.xlabel("Mode")
plt.ylabel('Listed In(Type)')
plt.show()

# Conclusion : 