In [24]:
import pandas as pd
import numpy as np

ads_df = pd.read_csv("dataset/Ads_Performance.csv")

ads_df.head()


Unnamed: 0,date,SKU,impressions,clicks,costPerClick,clickThroughRate,cost,spend,currency-code,purchases1d,...,unitsSoldClicks14d,unitsSoldClicks30d,sales1d,sales7d,sales14d,sales30d,acosClicks7d,acosClicks14d,roasClicks7d,roasClicks14d
0,2025-09-01,MN-16,2016,14,1.998,0.694,27.97,27.97,USD,1,...,1,1,18.89,18.89,18.89,18.89,148.067761,148.067761,0.675366,0.675366
1,2025-09-01,MN-15,1810,33,2.062,1.823,68.06,68.06,USD,4,...,6,6,107.45,161.31,161.31,161.31,42.192053,42.192053,2.370115,2.370115
2,2025-09-01,MN-14,613,9,2.85,1.468,25.65,25.65,USD,0,...,1,1,0.0,90.98,90.98,90.98,28.193009,28.193009,3.546979,3.546979
3,2025-09-01,MN-17,1271,3,5.62,0.236,16.86,16.86,USD,2,...,9,9,274.41,274.41,274.41,274.41,6.144091,6.144091,16.275801,16.275801
4,2025-09-01,MN-25,1036,10,4.34,0.965,43.4,43.4,USD,1,...,8,8,218.3,851.24,851.24,851.24,5.098445,5.098445,19.613825,19.613825


In [25]:
ads_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12777 entries, 0 to 12776
Data columns (total 25 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   date                12777 non-null  object 
 1   SKU                 12777 non-null  object 
 2   impressions         12777 non-null  int64  
 3   clicks              12777 non-null  int64  
 4   costPerClick        9069 non-null   float64
 5   clickThroughRate    12715 non-null  float64
 6   cost                12777 non-null  float64
 7   spend               12777 non-null  float64
 8   currency-code       12777 non-null  object 
 9   purchases1d         12777 non-null  int64  
 10  purchases7d         12777 non-null  int64  
 11  purchases14d        12777 non-null  int64  
 12  purchases30d        12777 non-null  int64  
 13  unitsSoldClicks1d   12777 non-null  int64  
 14  unitsSoldClicks7d   12777 non-null  int64  
 15  unitsSoldClicks14d  12777 non-null  int64  
 16  unit

In [26]:
ads_df.columns

Index(['date', 'SKU', 'impressions', 'clicks', 'costPerClick',
       'clickThroughRate', 'cost', 'spend', 'currency-code', 'purchases1d',
       'purchases7d', 'purchases14d', 'purchases30d', 'unitsSoldClicks1d',
       'unitsSoldClicks7d', 'unitsSoldClicks14d', 'unitsSoldClicks30d',
       'sales1d', 'sales7d', 'sales14d', 'sales30d', 'acosClicks7d',
       'acosClicks14d', 'roasClicks7d', 'roasClicks14d'],
      dtype='object')

In [27]:
numeric_cols = [
    "impressions",
    "clicks",
    "spend",
    "sales30d"
]

ads_df[numeric_cols] = ads_df[numeric_cols].fillna(0)


In [28]:
# Click-Through Rate (CTR)
ads_df["CTR"] = (
    ads_df["clicks"] / ads_df["impressions"]
).replace([float("inf")], 0).fillna(0)

In [29]:
#ACOS (use 30-day sales)
ads_df["ACOS"] = (
    ads_df["spend"] / ads_df["sales30d"]
).replace([float("inf")], 0).fillna(0)

In [30]:
#Ads-Based Pricing Guardrail

ads_df["Ads_Pricing_Flag"] = "Neutral"

ads_df.loc[
    (ads_df["impressions"] > ads_df["impressions"].median()) &
    (ads_df["CTR"] < ads_df["CTR"].median()),
    "Ads_Pricing_Flag"
] = "Price / Value Issue"

ads_df.loc[
    (ads_df["impressions"] <= ads_df["impressions"].median()),
    "Ads_Pricing_Flag"
] = "Visibility Issue"


In [31]:
# Aggregate to SKU level  - Ads data is date-level, pricing is SKU-level.

ads_signals = (
    ads_df
    .groupby("SKU")
    .agg({
        "CTR": "mean",
        "ACOS": "mean",
        "Ads_Pricing_Flag": lambda x: x.value_counts().idxmax()
    })
    .reset_index()
)

ads_signals.head()

Unnamed: 0,SKU,CTR,ACOS,Ads_Pricing_Flag
0,MN-01,0.006615,0.159165,Visibility Issue
1,MN-02,0.006336,0.050161,Visibility Issue
2,MN-03,0.008717,0.017866,Visibility Issue
3,MN-04,0.006524,0.03352,Visibility Issue
4,MN-05,0.012886,0.107274,Neutral


In [32]:
ads_signals.to_csv(
    "final_outputs/ads_signals.csv",
    index=False
)

### Interpretation
- Visibility Issue = Don’t cut price
- Price / Value Issue = Price review justified

## Returns_Data: Minimal Quality Safeguard Analysis
- objective: To ensure pricing actions are not applied to poor-quality or mismatch products.


In [33]:
import pandas as pd
import numpy as np

In [34]:
# Load Return Data
returns_df = pd.read_csv("dataset/Returns_Data.csv")

returns_df.head()

Unnamed: 0,SKU,Return Quantity \n(Last 7 days),Return Quantity \n(Last 30 days),Return Quantity \n(Last 60 days),Return Quantity \n(Last 90 days)
0,MN-01,0,9,18,26
1,MN-02,0,4,14,24
2,MN-03,1,4,10,13
3,MN-04,1,4,6,16
4,MN-09,0,2,7,11


In [35]:

returns_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   SKU                              50 non-null     object
 1   Return Quantity 
(Last 7 days)   50 non-null     object
 2   Return Quantity 
(Last 30 days)  50 non-null     int64 
 3   Return Quantity 
(Last 60 days)  50 non-null     int64 
 4   Return Quantity 
(Last 90 days)  50 non-null     int64 
dtypes: int64(3), object(2)
memory usage: 2.1+ KB


In [36]:
returns_df = returns_df.rename(columns={
    "Return Quantity \n(Last 7 days)": "returns_7d",
    "Return Quantity \n(Last 30 days)": "returns_30d",
    "Return Quantity \n(Last 60 days)": "returns_60d",
    "Return Quantity \n(Last 90 days)": "returns_90d"
})


In [37]:
returns_cols = ["returns_7d"]

returns_df[returns_cols] = returns_df[returns_cols].apply(
    pd.to_numeric, errors="coerce"
)

In [38]:
# Create a Return Trend Signal 
returns_df["Return_Trend"] = (
    returns_df["returns_30d"] - returns_df["returns_7d"])

Positive → returns accumulating;
Near zero → stable;
Negative → improving (rare)

In [39]:
# Flag high-return SKUs 
returns_df["High_Return_Risk"] = (
    returns_df["returns_90d"] >
    returns_df["returns_90d"].median()
)

In [40]:
returns_df["High_Return_Risk"].value_counts()

High_Return_Risk
False    27
True     23
Name: count, dtype: int64

In [41]:
## Final returns signal table
returns_signals = returns_df[[
    "SKU",
    "returns_7d",
    "returns_30d",
    "returns_90d",
    "Return_Trend",
    "High_Return_Risk"
]]

returns_signals.head()

Unnamed: 0,SKU,returns_7d,returns_30d,returns_90d,Return_Trend,High_Return_Risk
0,MN-01,0.0,9,26,9.0,False
1,MN-02,0.0,4,24,4.0,False
2,MN-03,1.0,4,13,3.0,False
3,MN-04,1.0,4,16,3.0,False
4,MN-09,0.0,2,11,2.0,False


In [42]:
returns_signals.to_csv(
    "final_outputs/returns_signals.csv",
    index=False
)