In [49]:
import pandas as pd
import numpy as np
import datetime as dt

In [50]:
# 'orders' table
orders = pd.read_csv(
    "https://www.dropbox.com/scl/fi/aous0qee7le68ar8pe5ol/JD_order_data.csv?rlkey=7zu1gc23v228yosjl1tpmc2xg&st=b8io8lhi&dl=1"
)

In [51]:
orders

Unnamed: 0,order_ID,user_ID,sku_ID,order_date,order_time,quantity,type,promise,original_unit_price,final_unit_price,direct_discount_per_unit,quantity_discount_per_unit,bundle_discount_per_unit,coupon_discount_per_unit,gift_item,dc_ori,dc_des
0,d0cf5cc6db,0abe9ef2ce,581d5b54c1,2018-03-01,2018-03-01 17:14:25.0,1,2,-,89.0,79.0,0.0,10.0,0.0,0.0,0,4,28
1,7444318d01,33a9e56257,067b673f2b,2018-03-01,2018-03-01 11:10:40.0,1,1,2,99.9,53.9,5.0,41.0,0.0,0.0,0,28,28
2,f973b01694,4ea3cf408f,623d0a582a,2018-03-01,2018-03-01 09:13:26.0,1,1,2,78.0,58.5,19.5,0.0,0.0,0.0,0,28,28
3,8c1cec8d4b,b87cb736cb,fc5289b139,2018-03-01,2018-03-01 21:29:50.0,1,1,2,61.0,35.0,0.0,26.0,0.0,0.0,0,4,28
4,d43a33c38a,4829223b6f,623d0a582a,2018-03-01,2018-03-01 19:13:37.0,1,1,1,78.0,53.0,19.0,0.0,0.0,6.0,0,3,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549984,3ad06b9fbe,a27b3ed4d4,a9109972d1,2018-03-31,2018-03-31 01:22:47.0,1,2,-,0.0,-1.0,0.0,0.0,0.0,1.0,1,2,2
549985,c9d77a7ed0,18f92434cd,7f53769d3f,2018-03-31,2018-03-31 08:55:57.0,1,1,3,118.0,55.0,63.0,0.0,0.0,0.0,0,59,2
549986,b9ad79338f,b5caf8a580,8dc4a01dec,2018-03-31,2018-03-31 13:31:01.0,1,1,2,78.0,78.0,0.0,0.0,0.0,0.0,0,2,2
549987,be3a9414b1,20ba6655f3,2dd6b818ec,2018-03-31,2018-03-31 12:51:18.0,1,2,-,189.0,78.0,111.0,0.0,0.0,0.0,0,4,28


In [None]:
# Create discount flags
orders["has_direct"] = orders["direct_discount_per_unit"] > 0
orders["has_quantity"] = orders["quantity_discount_per_unit"] > 0
orders["has_bundle"] = orders["bundle_discount_per_unit"] > 0
orders["has_coupon"] = orders["coupon_discount_per_unit"] > 0

In [None]:
# Count how many discount types are applied per row
orders["discount_count"] = orders[
    ["has_direct", "has_quantity", "has_bundle", "has_coupon"]
].sum(axis=1)

# Keep only non-stacked rows (<= 1 discount type)
orders_non_stacked = orders[orders["discount_count"] <= 1].copy()

In [None]:
# Label each row based on its discount type
def label_discount(row):
    if row["discount_count"] == 0:
        return "No Discount"
    elif row["has_direct"]:
        return "Direct Only"
    elif row["has_quantity"]:
        return "Quantity Only"
    elif row["has_bundle"]:
        return "Bundle Only"
    elif row["has_coupon"]:
        return "Coupon Only"


orders_non_stacked["discount_group"] = orders_non_stacked.apply(label_discount, axis=1)

# Show counts by type
print(orders_non_stacked["discount_group"].value_counts())

discount_group
Direct Only      176026
No Discount      159541
Quantity Only     37953
Coupon Only       21783
Bundle Only        5438
Name: count, dtype: int64


In [57]:
orders_non_stacked.columns

Index(['order_ID', 'user_ID', 'sku_ID', 'order_date', 'order_time', 'quantity',
       'type', 'promise', 'original_unit_price', 'final_unit_price',
       'direct_discount_per_unit', 'quantity_discount_per_unit',
       'bundle_discount_per_unit', 'coupon_discount_per_unit', 'gift_item',
       'dc_ori', 'dc_des', 'has_direct', 'has_quantity', 'has_bundle',
       'has_coupon', 'discount_count', 'discount_group'],
      dtype='object')

In [None]:
columns_to_keep = [
    "order_ID",
    "user_ID",
    "sku_ID",
    "order_date",
    "order_time",
    "quantity",
    "original_unit_price",
    "final_unit_price",
    "direct_discount_per_unit",
    "quantity_discount_per_unit",
    "bundle_discount_per_unit",
    "coupon_discount_per_unit",
    "discount_group",
]

df_analysis = orders_non_stacked[columns_to_keep].copy()
df_analysis

Unnamed: 0,order_ID,user_ID,sku_ID,order_date,order_time,quantity,original_unit_price,final_unit_price,direct_discount_per_unit,quantity_discount_per_unit,bundle_discount_per_unit,coupon_discount_per_unit,discount_group
0,d0cf5cc6db,0abe9ef2ce,581d5b54c1,2018-03-01,2018-03-01 17:14:25.0,1,89.0,79.0,0.0,10.0,0.0,0.0,Quantity Only
2,f973b01694,4ea3cf408f,623d0a582a,2018-03-01,2018-03-01 09:13:26.0,1,78.0,58.5,19.5,0.0,0.0,0.0,Direct Only
3,8c1cec8d4b,b87cb736cb,fc5289b139,2018-03-01,2018-03-01 21:29:50.0,1,61.0,35.0,0.0,26.0,0.0,0.0,Quantity Only
6,89286e5fd9,79154d0001,6717b7c979,2018-03-01,2018-03-01 22:18:41.0,1,0.0,0.0,0.0,0.0,0.0,0.0,No Discount
7,72585b87a6,d5e8910932,d829f03a28,2018-03-01,2018-03-01 15:28:49.0,2,79.9,40.9,0.0,39.0,0.0,0.0,Quantity Only
...,...,...,...,...,...,...,...,...,...,...,...,...,...
549984,3ad06b9fbe,a27b3ed4d4,a9109972d1,2018-03-31,2018-03-31 01:22:47.0,1,0.0,-1.0,0.0,0.0,0.0,1.0,Coupon Only
549985,c9d77a7ed0,18f92434cd,7f53769d3f,2018-03-31,2018-03-31 08:55:57.0,1,118.0,55.0,63.0,0.0,0.0,0.0,Direct Only
549986,b9ad79338f,b5caf8a580,8dc4a01dec,2018-03-31,2018-03-31 13:31:01.0,1,78.0,78.0,0.0,0.0,0.0,0.0,No Discount
549987,be3a9414b1,20ba6655f3,2dd6b818ec,2018-03-31,2018-03-31 12:51:18.0,1,189.0,78.0,111.0,0.0,0.0,0.0,Direct Only
