## ROAS Analysis

This notebook extends the day-one analytics exploration by focusing on return on ad spend (ROAS). It performs the following:

- Resolve paths relative to the project root and connect to `data/processed/events.parquet` (with a CSV fallback).
- Load the processed event data and add a user-level ROAS calculation where spend is non-zero.
- Aggregate ROAS and related spend/revenue metrics by acquisition channel and by channel-platform pairs to highlight performance pockets.
- Provide utility outputs for further experimentation (e.g., head of the dataset) without altering upstream notebooks.

All computations assume the dataset already contains `CAC` (customer acquisition cost) and `revenue` columns for each user record, enabling ROAS = `revenue / CAC`.

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

try:
    import seaborn as sns
    sns.set_theme(style='whitegrid')
except Exception:
    try:
        plt.style.use('seaborn-whitegrid')
    except OSError:
        plt.style.use('ggplot')

PROJ_ROOT = Path.cwd()
while PROJ_ROOT != PROJ_ROOT.parent and not (PROJ_ROOT / 'data').exists():
    PROJ_ROOT = PROJ_ROOT.parent

DATA_DIR = PROJ_ROOT / 'data' / 'processed'
PARQUET_PATH = DATA_DIR / 'events.parquet'
CSV_PATH = DATA_DIR / 'clean_data.csv'

REPORTS_DIR = PROJ_ROOT / 'reports'
FIGURES_DIR = REPORTS_DIR / 'figures'
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
FIGURES_DIR.mkdir(parents=True, exist_ok=True)

ROAS_SUMMARY_PATH = REPORTS_DIR / 'roas_summary.csv'
PAYBACK_FIG_PATH = FIGURES_DIR / 'payback_curves.png'
HTML_REPORT_PATH = REPORTS_DIR / 'roas_payback.html'

print(f'Project root: {PROJ_ROOT}')
print(f'Parquet path exists: {PARQUET_PATH.exists()}')
print(f'CSV fallback exists: {CSV_PATH.exists()}')
print(f'Reports directory: {REPORTS_DIR}')
print(f'Figures directory: {FIGURES_DIR}')

Project root: c:\Users\umyana\Documents\mobile_game_analytics_pipeline
Parquet path exists: True
CSV fallback exists: True
Reports directory: c:\Users\umyana\Documents\mobile_game_analytics_pipeline\reports
Figures directory: c:\Users\umyana\Documents\mobile_game_analytics_pipeline\reports\figures


In [5]:
try:
    df = pd.read_parquet(PARQUET_PATH)
    source_used = PARQUET_PATH
except (ImportError, ValueError, FileNotFoundError) as err:
    print(f"Parquet unavailable ({err}); falling back to CSV")
    df = pd.read_csv(CSV_PATH)
    source_used = CSV_PATH

print(f"Loaded {len(df):,} rows from {source_used}")
df.head()

Loaded 90,189 rows from c:\Users\umyana\Documents\mobile_game_analytics_pipeline\data\processed\events.parquet


Unnamed: 0,userid,version,session_count,retention_1,retention_7,acquisition_channel,country,platform,purchase,CAC,revenue,ROI
0,116,gate_30,3,False,False,Facebook,USA,Google Play,0,2.8,0.038024,-0.98642
1,337,gate_30,38,True,False,TikTok,USA,Google Play,0,1.7,0.100486,-0.94089
2,377,gate_40,165,True,False,Facebook,USA,Google Play,0,2.8,0.140215,-0.949923
3,483,gate_40,1,False,False,Facebook,Mexico,Google Play,0,2.8,0.019012,-0.99321
4,488,gate_40,179,True,True,TikTok,USA,App Store,0,1.7,1.23444,-0.273859


In [6]:
df = df.copy()
df["user_roas"] = np.where(df["CAC"] > 0, df["revenue"] / df["CAC"], np.nan)

non_null_roas = df["user_roas"].notna().mean() * 100
print(f"Computed user-level ROAS for {non_null_roas:.2f}% of rows (CAC > 0)")
df[["userid", "acquisition_channel", "platform", "CAC", "revenue", "user_roas"]].head()

Computed user-level ROAS for 100.00% of rows (CAC > 0)


Unnamed: 0,userid,acquisition_channel,platform,CAC,revenue,user_roas
0,116,Facebook,Google Play,2.8,0.038024,0.01358
1,337,TikTok,Google Play,1.7,0.100486,0.05911
2,377,Facebook,Google Play,2.8,0.140215,0.050077
3,483,Facebook,Google Play,2.8,0.019012,0.00679
4,488,TikTok,App Store,1.7,1.23444,0.726141


In [8]:
channel_roas = (
    df.groupby("acquisition_channel")
    .agg(
        users=("userid", "count"),
        unique_users=("userid", "nunique"),
        spend=("CAC", "sum"),
        revenue=("revenue", "sum"),
        avg_cac=("CAC", "mean"),
        avg_revenue=("revenue", "mean"),
        median_roas=("user_roas", "median"),
    )
    .assign(roas=lambda t: np.where(t["spend"] > 0, t["revenue"] / t["spend"], np.nan))
    .sort_values("roas", ascending=False)
)

channel_roas.round({
    "spend": 2,
    "revenue": 2,
    "avg_cac": 2,
    "avg_revenue": 2,
    "median_roas": 2,
    "roas": 2,
})

Unnamed: 0_level_0,users,unique_users,spend,revenue,avg_cac,avg_revenue,median_roas,roas
acquisition_channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Organic,9153,9153,2745.9,11075.58,0.3,1.21,0.33,4.03
TikTok,17979,17979,30564.3,23171.15,1.7,1.29,0.06,0.76
Instagram,36281,36281,83446.3,42801.46,2.3,1.18,0.04,0.51
Facebook,26776,26776,74972.8,32186.4,2.8,1.2,0.04,0.43


In [9]:
non_zero_spend = df["CAC"] > 0
payback_curve = (
    df.loc[non_zero_spend]
    .sort_values("user_roas", ascending=False)
    .assign(
        cum_spend=lambda t: t["CAC"].cumsum(),
        cum_revenue=lambda t: t["revenue"].cumsum(),
    )
)

total_spend = payback_curve["CAC"].sum()
total_revenue = payback_curve["revenue"].sum()

payback_curve["spend_share"] = np.where(total_spend > 0, payback_curve["cum_spend"] / total_spend, np.nan)
payback_curve["payback_ratio"] = np.where(
    payback_curve["cum_spend"] > 0,
    payback_curve["cum_revenue"] / payback_curve["cum_spend"],
    np.nan,
)

fig, ax = plt.subplots(figsize=(9, 5))
ax.plot(payback_curve["spend_share"], payback_curve["payback_ratio"], label="Cumulative ROAS")
ax.axhline(1.0, color="tomato", linestyle="--", label="Break-even ROAS")

break_even_share = np.nan
if not payback_curve.empty and (payback_curve["payback_ratio"] >= 1).any():
    break_even_share = payback_curve.loc[payback_curve["payback_ratio"] >= 1, "spend_share"].iloc[0]
    ax.axvline(break_even_share, color="gray", linestyle=":", label="Break-even Spend Share")

ax.set_xlabel("Cumulative spend share")
ax.set_ylabel("Cumulative revenue / spend")
ax.set_title("Cumulative ROAS payback curve")
ax.set_xlim(0, 1)
ax.set_ylim(bottom=0)
ax.legend()
fig.tight_layout()
fig.savefig(PAYBACK_FIG_PATH, dpi=120, bbox_inches="tight")
plt.close(fig)

break_even_pct = break_even_share * 100 if not np.isnan(break_even_share) else np.nan
payback_metrics = {
    "total_spend": float(total_spend),
    "total_revenue": float(total_revenue),
    "break_even_share": float(break_even_share) if not np.isnan(break_even_share) else np.nan,
}

print(f"Saved payback curve figure to {PAYBACK_FIG_PATH}")
if np.isnan(break_even_pct):
    print("Break-even spend share not reached within available spend.")
else:
    print(f"Break-even spend share: {break_even_pct:.2f}% of cumulative spend")

payback_curve[["spend_share", "payback_ratio"]].head()

Saved payback curve figure to c:\Users\umyana\Documents\mobile_game_analytics_pipeline\reports\figures\payback_curves.png
Break-even spend share: 0.00% of cumulative spend


Unnamed: 0,spend_share,payback_ratio
36693,2e-06,659.021577
37414,3e-06,553.469325
68659,5e-06,516.263027
3547,6e-06,496.249346
36933,8e-06,483.397155


In [10]:
channel_summary = channel_roas.reset_index().rename(columns={"acquisition_channel": "channel"})
channel_summary.to_csv(ROAS_SUMMARY_PATH, index=False)
channel_summary_display = channel_summary.copy()
for col in ["spend", "revenue", "avg_cac", "avg_revenue", "median_roas", "roas"]:
    channel_summary_display[col] = channel_summary_display[col].round(4)
print(f"Saved ROAS summary to {ROAS_SUMMARY_PATH}")
channel_summary_display

Saved ROAS summary to c:\Users\umyana\Documents\mobile_game_analytics_pipeline\reports\roas_summary.csv


Unnamed: 0,channel,users,unique_users,spend,revenue,avg_cac,avg_revenue,median_roas,roas
0,Organic,9153,9153,2745.9,11075.5847,0.3,1.21,0.335,4.0335
1,TikTok,17979,17979,30564.3,23171.149,1.7,1.2888,0.0587,0.7581
2,Instagram,36281,36281,83446.3,42801.4578,2.3,1.1797,0.0431,0.5129
3,Facebook,26776,26776,74972.8,32186.3972,2.8,1.2021,0.0354,0.4293


In [11]:
plot_rel_path = PAYBACK_FIG_PATH.relative_to(REPORTS_DIR)
if np.isnan(payback_metrics["break_even_share"]):
    break_even_text = "Not reached within observed spend"
else:
    break_even_text = f"{payback_metrics['break_even_share'] * 100:.2f}% of spend"

html_table = channel_summary_display.to_html(index=False)
html_template = f"""<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='utf-8'/>
<title>ROAS & Payback Report</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 2rem; color: #1a1a1a; }}
h1 {{ font-size: 1.8rem; }}
h2 {{ margin-top: 2rem; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ccc; padding: 0.4rem; text-align: right; }}
th {{ background: #f1f3f5; text-align: left; }}
blockquote {{ margin: 1rem 0; padding: 0.5rem 1rem; background: #f8f9fa; border-left: 4px solid #4c6ef5; }}
</style>
</head>
<body>
<h1>ROAS & Payback Summary</h1>
<p><strong>Dataset source:</strong> {source_used}</p>
<section>
<h2>Key Metrics</h2>
<ul>
<li>Total spend (CAC &gt; 0): ${payback_metrics['total_spend']:,.2f}</li>
<li>Total revenue (CAC &gt; 0): ${payback_metrics['total_revenue']:,.2f}</li>
<li>Break-even spend share: {break_even_text}</li>
</ul>
</section>
<section>
<h2>Channel ROAS</h2>
{html_table}
</section>
<section>
<h2>Payback Curve</h2>
<p><img src='{plot_rel_path.as_posix()}' alt='Cumulative ROAS payback curve' style='max-width: 720px; width: 100%; height: auto;'/></p>
</section>
</body>
</html>
"""

HTML_REPORT_PATH.write_text(html_template, encoding="utf-8")
print(f"HTML report saved to {HTML_REPORT_PATH}")
HTML_REPORT_PATH

HTML report saved to c:\Users\umyana\Documents\mobile_game_analytics_pipeline\reports\3.0-roas_payback.html


WindowsPath('c:/Users/umyana/Documents/mobile_game_analytics_pipeline/reports/3.0-roas_payback.html')

In [12]:
channel_platform_roas = (
    df.groupby(["acquisition_channel", "platform"])
    .agg(
        users=("userid", "count"),
        spend=("CAC", "sum"),
        revenue=("revenue", "sum"),
        avg_cac=("CAC", "mean"),
        avg_revenue=("revenue", "mean"),
        median_roas=("user_roas", "median"),
    )
    .assign(roas=lambda t: np.where(t["spend"] > 0, t["revenue"] / t["spend"], np.nan))
    .sort_values("roas", ascending=False)
)

channel_platform_roas.round({
    "spend": 2,
    "revenue": 2,
    "avg_cac": 2,
    "avg_revenue": 2,
    "median_roas": 2,
    "roas": 2,
})

Unnamed: 0_level_0,Unnamed: 1_level_0,users,spend,revenue,avg_cac,avg_revenue,median_roas,roas
acquisition_channel,platform,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Organic,App Store,2363,708.9,8333.09,0.3,3.53,2.33,11.75
TikTok,App Store,4537,7712.9,17989.81,1.7,3.97,0.41,2.33
Instagram,App Store,8931,20541.3,31030.99,2.3,3.47,0.3,1.51
Organic,Google Play,6790,2037.0,2742.49,0.3,0.4,0.27,1.35
Facebook,App Store,6668,18670.4,23938.44,2.8,3.59,0.25,1.28
TikTok,Google Play,13442,22851.4,5181.34,1.7,0.39,0.05,0.23
Instagram,Google Play,27350,62905.0,11770.47,2.3,0.43,0.04,0.19
Facebook,Google Play,20108,56302.4,8247.95,2.8,0.41,0.03,0.15


Generated artifacts:
- `reports/figures/payback_curves.png`
- `reports/roas_summary.csv`
- `reports/roas_payback.html`