In [None]:
import numpy as np
import pandas  as pd
import matplotlib as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = '/content/zomato.csv'
data = pd.read_csv(path, encoding='latin-1')

In [None]:
data.shape

(51717, 17)

In [None]:
with open('zomato.csv', 'r', encoding='latin-1') as f:
    lines = f.readlines()
print(lines[919])  # Python uses 0-based indexing

https://www.zomato.com/bangalore/kadala-tarangaa-banashankari-bangalore?context=eyJzZSI6eyJlIjpbIjU5NDA1IiwiNjEwMzEiLCI1ODYzOSIsIjE4NTgyODk0IiwxODUyMzk0MCwiMTg5MjQ4NjgiLCI1NTk0MiIsIjE4NjM0OTIwIiwiMTgzNzk2NzIiLCIxODcxMDAzNyIsIjUxNDI0IiwiMTg2MTQzMDAiLCIxODU4MTE0MiIsIjUyODk0IiwiNTk5MjAiLCI1NDQxNSIsIjU1OTc4Il0sInQiOiJEaW5lLU91dCBSZXN0YXVyYW50cyBpbiBCYW5hc2hhbmthcmkifX0=,"27, 27th Cross, 1st Floor, Kadamba Complex, 2nd Stage, Banashankari, Bangalore",Kadala Tarangaa,Yes,No,3.6/5,129,"+91 9743491480



In [None]:
data.columns

Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)'],
      dtype='object')

In [None]:
data.drop(columns=['url', 'phone', 'dish_liked','reviews_list','menu_item'], inplace=True,errors='ignore')

In [None]:
data.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'approx_cost(for two people)',
       'listed_in(type)', 'listed_in(city)'],
      dtype='object')

In [None]:
data['rate'] = data['rate'].astype(str).str.extract(r'(\d+\.\d+)')  # extract numeric part
data['rate'] = pd.to_numeric(data['rate'], errors='coerce')

In [None]:
data.isnull().sum().sort_values(ascending=False)

Unnamed: 0,0
rate,10052
approx_cost(for two people),346
rest_type,227
cuisines,45
location,21
online_order,0
name,0
address,0
book_table,0
votes,0


In [None]:
data.dtypes

Unnamed: 0,0
address,object
name,object
online_order,object
book_table,object
rate,float64
votes,int64
location,object
rest_type,object
cuisines,object
approx_cost(for two people),object


In [None]:
data['rate'].value_counts(dropna=False).head(20)

Unnamed: 0_level_0,count
rate,Unnamed: 1_level_1
,10052
3.9,3972
3.8,3873
3.7,3821
3.6,3316
4.0,3183
4.1,2948
3.5,2784
3.4,2476
3.3,2310


In [None]:
data['approx_cost(for two people)'].unique()[:20]

array(['800', '300', '600', '700', '550', '500', '450', '650', '400',
       '900', '200', '750', '150', '850', '100', '1,200', '350', '250',
       '950', '1,000'], dtype=object)

In [None]:
data[data['approx_cost(for two people)'].isnull()].sample(5)

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,approx_cost(for two people),listed_in(type),listed_in(city)
39171,"127, NR Road, City Market, Bangalore",Al-Madina Restaurant,No,No,3.6,31,City Market,Casual Dining,,,Dine-out,Lavelle Road
32551,"6, Krishna Nagar Industrial Area, Behind Forum...",Punjabi by Nature 2.0,No,No,4.2,3236,BTM,"Casual Dining, Microbrewery",North Indian,,Delivery,Koramangala 6th Block
48022,"Metro Station, MG Road, Bangalore",Chef Baker's,No,No,,0,MG Road,"Bakery, Kiosk","Bakery, Desserts",,Desserts,Residency Road
38829,"2nd Floor, 1 MG Mall, Trinity Circle, MG Road,...",Mighty Small,No,No,4.1,259,MG Road,"Casual Dining, Bar","Continental, Chinese",,Dine-out,Lavelle Road
37204,"29/1, 30th Cross, 4th T Block, Thilknagar, Jay...",Tawa Sagara,No,No,3.8,122,Jayanagar,Casual Dining,"Mangalorean, Seafood",,Dine-out,Koramangala 7th Block


In [None]:
data['approx_cost(for two people)'] = data['approx_cost(for two people)'].astype(str).str.replace(',', '')

In [None]:
data['approx_cost(for two people)'] = pd.to_numeric(data['approx_cost(for two people)'], errors='coerce')


In [None]:
data = data.dropna(subset=['rate', 'votes', 'approx_cost(for two people)', 'location', 'cuisines'])


In [None]:
data = data[(data['votes'] > 0) & (data['rate'] > 0)]

In [None]:
data.rename(columns={
    'name': 'Restaurant_Name',
    'online_order': 'Online_Order',
    'book_table': 'Table_Booking',
    'approx_cost(for two people)': 'Cost_For_Two',
    'listed_in(type)': 'Category',
    'listed_in(city)': 'City',
    'address': 'Address',
    'rate':'Rating',
    'votes':'Votes',
    'location': 'Location',
    'cuisines': 'Cuisines',
    'rest_type': 'Restaurant_Type'

}, inplace=True)

In [None]:
data.reset_index(drop=True, inplace=True)

In [None]:
data.head()

Unnamed: 0,Address,Restaurant_Name,Online_Order,Table_Booking,Rating,Votes,Location,Restaurant_Type,Cuisines,Cost_For_Two,Category,City
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,Buffet,Banashankari


In [None]:
data.dtypes

Unnamed: 0,0
Address,object
Restaurant_Name,object
Online_Order,object
Table_Booking,object
Rating,float64
Votes,int64
Location,object
Restaurant_Type,object
Cuisines,object
Cost_For_Two,float64


In [None]:
data.to_csv('Cleaned_Zomato.csv', index=False)

from google.colab import files
files.download('Cleaned_Zomato.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>