In [1]:
import pandas as pd
import numpy as np

In [2]:
analysis_df = pd.read_csv("final_food_delivery_dataset.csv")
analysis_df.head()

Unnamed: 0,order_id,user_id,restaurant_id,order_date,order_amount,name,city,membership,restaurant_name,cuisine,rating,order_month,order_year,order_day,order_value_band,user_order_count,loyalty_group
0,1,2508,450,2023-02-18,842.97,User_2508,Hyderabad,Regular,Restaurant_450,Mexican,3.2,2,2023,Saturday,High Spend,3,Returning User
1,2,2693,309,2023-01-18,546.68,User_2693,Pune,Regular,Restaurant_309,Indian,4.5,1,2023,Wednesday,High Spend,2,New User
2,3,2084,107,2023-07-15,163.93,User_2084,Chennai,Gold,Restaurant_107,Mexican,4.0,7,2023,Saturday,Low Spend,3,Returning User
3,4,319,224,2023-10-04,1155.97,User_319,Bangalore,Gold,Restaurant_224,Chinese,4.8,10,2023,Wednesday,Premium Spend,8,Loyal User
4,5,1064,293,2023-12-25,1321.91,User_1064,Pune,Regular,Restaurant_293,Italian,3.0,12,2023,Monday,Premium Spend,5,Returning User


In [3]:
total_rows = len(analysis_df)
distinct_users = analysis_df["user_id"].nunique()
total_rows, distinct_users

(10000, 2883)

In [4]:
gold_orders = analysis_df[analysis_df["membership"] == "Gold"].shape[0]
gold_orders

4987

In [5]:
gold_order_percentage = round(
    (analysis_df["membership"] == "Gold").mean() * 100
)
gold_order_percentage

50

In [6]:
avg_gold_aov = round(
    analysis_df[analysis_df["membership"] == "Gold"]["order_amount"].mean(), 2
)
avg_gold_aov

797.15

In [7]:
hyderabad_revenue = round(
    analysis_df[analysis_df["city"] == "Hyderabad"]["order_amount"].sum()
)
hyderabad_revenue

1889367

In [8]:
top_gold_city = (
    analysis_df[analysis_df["membership"] == "Gold"]
    .groupby("city")["order_amount"]
    .sum()
    .idxmax()
)
top_gold_city

'Chennai'

In [9]:
gold_city_aov = (
    analysis_df[analysis_df["membership"] == "Gold"]
    .groupby("city")["order_amount"]
    .mean()
    .idxmax()
)

gold_city_aov

'Chennai'

In [10]:
gold_orders_top_city = analysis_df[
    (analysis_df["membership"] == "Gold") &
    (analysis_df["city"] == top_gold_city)
].shape[0]

gold_orders_top_city

1337

In [11]:
top_aov_cuisine = (
    analysis_df.groupby("cuisine")["order_amount"]
    .mean()
    .idxmax()
)

top_aov_cuisine

'Mexican'

In [12]:
cuisine_summary = analysis_df.groupby("cuisine").agg(
    restaurant_count=("restaurant_id", "nunique"),
    total_revenue=("order_amount", "sum")
)

cuisine_summary.sort_values(
    ["restaurant_count", "total_revenue"],
    ascending=[True, False]
)

Unnamed: 0_level_0,restaurant_count,total_revenue
cuisine,Unnamed: 1_level_1,Unnamed: 2_level_1
Chinese,120,1930504.65
Italian,126,2024203.8
Indian,126,1971412.58
Mexican,128,2085503.09


In [13]:
user_spending = analysis_df.groupby("user_id")["order_amount"].sum()

high_value_users_count = user_spending[user_spending > 1000].count()
high_value_users_count

2544

In [14]:
high_rating_orders = analysis_df[analysis_df["rating"] >= 4.5].shape[0]
high_rating_orders

3374

In [15]:
analysis_df["rating_bucket"] = pd.cut(
    analysis_df["rating"],
    bins=[0, 3.5, 4.0, 4.5, 5.0],
    labels=["3.0–3.5", "3.6–4.0", "4.1–4.5", "4.6–5.0"]
)
analysis_df.groupby("rating_bucket")["order_amount"].sum().idxmax()

  analysis_df.groupby("rating_bucket")["order_amount"].sum().idxmax()


'4.6–5.0'

In [16]:
restaurant_stats = analysis_df.groupby("restaurant_name").agg(
    avg_order_value=("order_amount", "mean"),
    total_orders=("order_id", "count")
)
restaurant_stats[
    restaurant_stats["total_orders"] < 20
].sort_values("avg_order_value", ascending=False).head(1)

Unnamed: 0_level_0,avg_order_value,total_orders
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Restaurant_294,1040.222308,13


In [17]:
analysis_df.groupby(
    ["membership", "cuisine"]
)["order_amount"].sum().idxmax()

('Regular', 'Mexican')

In [18]:
analysis_df["order_date"] = pd.to_datetime(analysis_df["order_date"])
analysis_df["quarter"] = analysis_df["order_date"].dt.to_period("Q")

analysis_df.groupby("quarter")["order_amount"].sum().idxmax()

Period('2023Q3', 'Q-DEC')

In [19]:
summary = {
    "Total Orders": total_rows,
    "Distinct Users": distinct_users,
    "Gold Orders": gold_orders,
    "Gold Order %": gold_order_percentage,
    "Top Gold City": top_gold_city,
    "Top Cuisine by AOV": top_aov_cuisine
}
summary

{'Total Orders': 10000,
 'Distinct Users': 2883,
 'Gold Orders': 4987,
 'Gold Order %': 50,
 'Top Gold City': 'Chennai',
 'Top Cuisine by AOV': 'Mexican'}

In [20]:
combo_revenue = (
    analysis_df
    .groupby(["membership", "cuisine"])["order_amount"]
    .sum()
    .reset_index(name="total_revenue")
)

In [21]:
top_combo = combo_revenue.sort_values("total_revenue", ascending=False).iloc[0]

top_combo

membership         Regular
cuisine            Mexican
total_revenue    1072943.3
Name: 7, dtype: object

In [22]:
print(
    f"Highest revenue combination: {top_combo['membership']} + "
    f"{top_combo['cuisine']} (₹{int(top_combo['total_revenue'])})"
)


Highest revenue combination: Regular + Mexican (₹1072943)


In [23]:
analysis_df.groupby(["membership","cuisine"])["order_amount"].sum().idxmax()


('Regular', 'Mexican')