In [2]:
import pandas as pd
import sqlite3
from pathlib import Path


In [4]:
orders_df = pd.read_csv("orders.csv")

orders_df.head()


Unnamed: 0,order_id,user_id,restaurant_id,order_date,total_amount,restaurant_name
0,1,2508,450,18-02-2023,842.97,New Foods Chinese
1,2,2693,309,18-01-2023,546.68,Ruchi Curry House Multicuisine
2,3,2084,107,15-07-2023,163.93,Spice Kitchen Punjabi
3,4,319,224,04-10-2023,1155.97,Darbar Kitchen Non-Veg
4,5,1064,293,25-12-2023,1321.91,Royal Eatery South Indian


In [6]:
users_df = pd.read_json("users.json")

users_df.head()


Unnamed: 0,user_id,name,city,membership
0,1,User_1,Chennai,Regular
1,2,User_2,Pune,Gold
2,3,User_3,Bangalore,Gold
3,4,User_4,Bangalore,Regular
4,5,User_5,Pune,Gold


In [8]:
conn = sqlite3.connect("restaurants.db")

# drop table so reruns donâ€™t fail
conn.execute("DROP TABLE IF EXISTS restaurants")

sql_text = Path("restaurants.sql").read_text(encoding="utf-8")
conn.executescript(sql_text)

restaurants_df = pd.read_sql("SELECT * FROM restaurants", conn)

restaurants_df.head()


Unnamed: 0,restaurant_id,restaurant_name,cuisine,rating
0,1,Restaurant_1,Chinese,4.8
1,2,Restaurant_2,Indian,4.1
2,3,Restaurant_3,Mexican,4.3
3,4,Restaurant_4,Chinese,4.1
4,5,Restaurant_5,Chinese,4.8


In [10]:
print("Orders:", orders_df.shape)
print("Users:", users_df.shape)
print("Restaurants:", restaurants_df.shape)


Orders: (10000, 6)
Users: (3000, 4)
Restaurants: (500, 4)


In [12]:
merged_df = pd.merge(
    orders_df,
    users_df,
    on="user_id",
    how="left"
)

merged_df.head()


Unnamed: 0,order_id,user_id,restaurant_id,order_date,total_amount,restaurant_name,name,city,membership
0,1,2508,450,18-02-2023,842.97,New Foods Chinese,User_2508,Hyderabad,Regular
1,2,2693,309,18-01-2023,546.68,Ruchi Curry House Multicuisine,User_2693,Pune,Regular
2,3,2084,107,15-07-2023,163.93,Spice Kitchen Punjabi,User_2084,Chennai,Gold
3,4,319,224,04-10-2023,1155.97,Darbar Kitchen Non-Veg,User_319,Bangalore,Gold
4,5,1064,293,25-12-2023,1321.91,Royal Eatery South Indian,User_1064,Pune,Regular


In [14]:
merged_df = pd.merge(
    merged_df,
    restaurants_df,
    on="restaurant_id",
    how="left",
    suffixes=("_order", "_restaurant")
)

merged_df.head()


Unnamed: 0,order_id,user_id,restaurant_id,order_date,total_amount,restaurant_name_order,name,city,membership,restaurant_name_restaurant,cuisine_order,rating_order,restaurant_name,cuisine_restaurant,rating_restaurant
0,1,2508,450,18-02-2023,842.97,New Foods Chinese,User_2508,Hyderabad,Regular,Restaurant_450,Mexican,3.2,Restaurant_450,Mexican,3.2
1,2,2693,309,18-01-2023,546.68,Ruchi Curry House Multicuisine,User_2693,Pune,Regular,Restaurant_309,Indian,4.5,Restaurant_309,Indian,4.5
2,3,2084,107,15-07-2023,163.93,Spice Kitchen Punjabi,User_2084,Chennai,Gold,Restaurant_107,Mexican,4.0,Restaurant_107,Mexican,4.0
3,4,319,224,04-10-2023,1155.97,Darbar Kitchen Non-Veg,User_319,Bangalore,Gold,Restaurant_224,Chinese,4.8,Restaurant_224,Chinese,4.8
4,5,1064,293,25-12-2023,1321.91,Royal Eatery South Indian,User_1064,Pune,Regular,Restaurant_293,Italian,3.0,Restaurant_293,Italian,3.0


In [16]:
merged_df.shape


(10000, 15)

In [17]:
merged_df["order_date"] = pd.to_datetime(
    merged_df["order_date"],
    errors="coerce"
)

merged_df["quarter"] = merged_df["order_date"].dt.to_period("Q")

merged_df[["order_date", "quarter"]].head()


  merged_df["order_date"] = pd.to_datetime(


Unnamed: 0,order_date,quarter
0,2023-02-18,2023Q1
1,2023-01-18,2023Q1
2,2023-07-15,2023Q3
3,2023-10-04,2023Q4
4,2023-12-25,2023Q4


In [22]:
merged_df.to_csv(
    "final_food_delivery_dataset.csv",
    index=False
)

print("Final dataset saved")


Final dataset saved


In [23]:
merged_df[merged_df["membership"] == "Gold"] \
    .groupby("city", as_index=False)["total_amount"] \
    .sum() \
    .sort_values("total_amount", ascending=False)


Unnamed: 0,city,total_amount
1,Chennai,1080909.79
3,Pune,1003012.32
0,Bangalore,994702.59
2,Hyderabad,896740.19


In [None]:
merged_df \
    .groupby("cuisine_restaurant", as_index=False)["total_amount"] \
    .mean() \
    .sort_values("total_amount", ascending=False)

KeyError: 'cuisine'

In [27]:
# Inspect merged_df for available columns and sample rows
print("columns:", list(merged_df.columns))
merged_df.head()

columns: ['order_id', 'user_id', 'restaurant_id', 'order_date', 'total_amount', 'restaurant_name_order', 'name', 'city', 'membership', 'restaurant_name_restaurant', 'cuisine_order', 'rating_order', 'restaurant_name', 'cuisine_restaurant', 'rating_restaurant', 'quarter']


Unnamed: 0,order_id,user_id,restaurant_id,order_date,total_amount,restaurant_name_order,name,city,membership,restaurant_name_restaurant,cuisine_order,rating_order,restaurant_name,cuisine_restaurant,rating_restaurant,quarter
0,1,2508,450,2023-02-18,842.97,New Foods Chinese,User_2508,Hyderabad,Regular,Restaurant_450,Mexican,3.2,Restaurant_450,Mexican,3.2,2023Q1
1,2,2693,309,2023-01-18,546.68,Ruchi Curry House Multicuisine,User_2693,Pune,Regular,Restaurant_309,Indian,4.5,Restaurant_309,Indian,4.5,2023Q1
2,3,2084,107,2023-07-15,163.93,Spice Kitchen Punjabi,User_2084,Chennai,Gold,Restaurant_107,Mexican,4.0,Restaurant_107,Mexican,4.0,2023Q3
3,4,319,224,2023-10-04,1155.97,Darbar Kitchen Non-Veg,User_319,Bangalore,Gold,Restaurant_224,Chinese,4.8,Restaurant_224,Chinese,4.8,2023Q4
4,5,1064,293,2023-12-25,1321.91,Royal Eatery South Indian,User_1064,Pune,Regular,Restaurant_293,Italian,3.0,Restaurant_293,Italian,3.0,2023Q4
