# Data Science Internship

## LEVEL 2

# Task 1: 
Table Booking and Online Delivery

In [1]:
# lets import all required libraries
import numpy as np
import pandas as pd

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('whitegrid')

In [3]:
# load the dataset
data = pd.read_csv("Dataset  (1).csv")

In [4]:
data.head(1)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314


In [5]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

# Determine the percentage of restaurants that
# offer table booking and online delivery.

In [6]:
table_booking_resto = data[data['Has Table booking']=='Yes']

In [7]:
len(table_booking_resto)

1158

In [8]:
#calculate percentage of table booking resto

percentage_of_table_booking = (len(table_booking_resto) / len(data))*100

In [9]:
online_delivery_resto = data[data['Has Online delivery']=='Yes']

In [10]:
len(online_delivery_resto)

2451

In [11]:
percentage_of_online_delivery = (len(online_delivery_resto) / len(data)) * 100

In [12]:
print("Percentage of restaurants offering table booking:", percentage_of_table_booking)
print("Percentage of restaurants offering online delivery:", percentage_of_online_delivery)

Percentage of restaurants offering table booking: 12.124384881164275
Percentage of restaurants offering online delivery: 25.662234321013504


# Compare the average ratings of restaurants 
with table booking and those without.

In [13]:
Average_rating_with_table_booking = table_booking_resto['Aggregate rating'].mean()

In [14]:
without_table_booking_resto = data[data['Has Table booking']=='No']

In [15]:
Average_rating_without_table_booking = without_table_booking_resto['Aggregate rating'].mean()

In [16]:
print("Average rating for restaurants with table booking:", Average_rating_with_table_booking)
print("Average rating for restaurants without table booking:", Average_rating_without_table_booking)

Average rating for restaurants with table booking: 3.4419689119170984
Average rating for restaurants without table booking: 2.559358989634219


# Analyze the availability of online delivery among restaurants with different price ranges.


In [17]:
# group the resto with price range

group_resto_by_price_range = data.groupby('Price range')

In [18]:
# Initialize a dictionary to store percentage of online delivery availability for each price range
online_delivery_percentage = {}


In [19]:
# Calculate the percentage of restaurants in each price range that offer online delivery
for price_range, group_data in group_resto_by_price_range:
    total_restaurants_in_range = len(group_data)
    online_delivery_count = len(group_data[group_data['Has Online delivery'] == 'Yes'])
    online_delivery_percentage[price_range] = (online_delivery_count / total_restaurants_in_range) * 100


In [20]:

# Convert the dictionary into a DataFrame for easier analysis
online_delivery_df = pd.DataFrame.from_dict(online_delivery_percentage, orient='index', columns=['Online Delivery Percentage'])

print(online_delivery_df)

   Online Delivery Percentage
1                   15.774077
2                   41.310633
3                   29.190341
4                    9.044369


Based on the provided data, we can observe the following online delivery percentages among restaurants with different price ranges:

Price Range 1: 15.77%
Price Range 2: 41.31%
Price Range 3: 29.19%
Price Range 4: 9.04%
This analysis suggests that restaurants in price range 2 have the highest percentage of offering online delivery, followed by price range 3. On the other hand, restaurants in price range 4 have the lowest percentage of offering online delivery.

## TASK 2

# Task: Price Range Analysis

#Determine the most common price range among all the restaurants

In [21]:
data['Price range'].value_counts()

Price range
1    4444
2    3113
3    1408
4     586
Name: count, dtype: int64

In [22]:
#count the occurences of each price range
price_range_counts = data['Price range'].value_counts()

In [23]:
most_common_price_range = price_range_counts.idxmax()

In [24]:
print("The most common price range among all the restaurants is:", most_common_price_range)


The most common price range among all the restaurants is: 1


#Calculate the average rating for each price range

In [25]:
average_price_range = data.groupby('Price range')['Aggregate rating']

In [26]:
print("Average rating for each price range:")
print(average_price_range.mean())

Average rating for each price range:
Price range
1    1.999887
2    2.941054
3    3.683381
4    3.817918
Name: Aggregate rating, dtype: float64


Identify the color that represents the highest
average rating among different price ranges.


In [27]:
average_price_Range = average_price_range.mean()

In [28]:
# Initialize a dictionary to store the color associated with the highest average rating for each price range
highest_rating_color = {}



In [29]:
# Iterate over each price range
for price_range, avg_rating in average_price_Range.items():
    # Filter data for the current price range
    subset_data = data[data['Price range'] == price_range]
    # Find the color associated with the highest average rating
    highest_rating_color[price_range] = subset_data[subset_data['Aggregate rating'] == subset_data['Aggregate rating'].max()]['Rating color'].iloc[0]


In [30]:

print("Color representing the highest average rating for each price range:")
print(highest_rating_color)

Color representing the highest average rating for each price range:
{1: 'Dark Green', 2: 'Dark Green', 3: 'Dark Green', 4: 'Dark Green'}


## TASK 3
Task: Feature Engineering

Extract additional features from the existing
columns, such as the length of the restaurant
name or address.

In [31]:
# Extract the length of the restaurant name
data['Restaurant Name Length'] = data['Restaurant Name'].str.len()


In [32]:

# Extract the length of the address
data['Address Length'] = data['Address'].str.len()


In [33]:

# Display the DataFrame with the new features
data.head(2)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes,Restaurant Name Length,Address Length
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,No,No,No,3,4.8,Dark Green,Excellent,314,16,71
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,No,No,No,3,4.5,Dark Green,Excellent,591,16,67


In [34]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes', 'Restaurant Name Length', 'Address Length'],
      dtype='object')

Create new features like "Has Table Booking"
or "Has Online Delivery" by encoding
categorical variables.

In [35]:
# Perform one-hot encoding for 'Has Table Booking' and 'Has Online Delivery'
# data = pd.get_dummies(data, columns=['Has Table booking', 'Has Online delivery'])


In [36]:
# Perform one-hot encoding for 'Has Table Booking' and 'Has Online Delivery'
data['Has_Table_Booking'] = (data['Has Table booking'] == 'Yes').astype(int)
data['Has_Online_Delivery'] = (data['Has Online delivery'] == 'Yes').astype(int)



In [37]:
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes,Restaurant Name Length,Address Length,Has_Table_Booking,Has_Online_Delivery
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,No,3,4.8,Dark Green,Excellent,314,16,71,1,0
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,No,3,4.5,Dark Green,Excellent,591,16,67,1,0
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,No,4,4.4,Green,Very Good,270,22,56,1,0
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,No,4,4.9,Dark Green,Excellent,365,4,70,0,0
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,No,4,4.8,Dark Green,Excellent,229,11,64,1,0


In [38]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes', 'Restaurant Name Length', 'Address Length',
       'Has_Table_Booking', 'Has_Online_Delivery'],
      dtype='object')

In [39]:
# Drop the original columns
data = data.drop(['Has Table booking', 'Has Online delivery'], axis=1)


In [40]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Is delivering now',
       'Switch to order menu', 'Price range', 'Aggregate rating',
       'Rating color', 'Rating text', 'Votes', 'Restaurant Name Length',
       'Address Length', 'Has_Table_Booking', 'Has_Online_Delivery'],
      dtype='object')