In [None]:
"""
E-commerce Funnel Analysis (GA4-style) — Minimal, Non-Destructive, Reproducible

Canonical decisions (document in submission):
- Funnel staging by GA4 `event_name`: page_view → view_item → add_to_cart → begin_checkout → purchase
- Session grain = (user_pseudo_id, param.ga_session_number)
- Revenue = event_value_in_usd
- Purchase de-duplication = one row per param.transaction_id (keep LAST by event_time)
- Attribution used as-is from traffic_source_* (no normalization in base data)
- param.ecomm_pagetype is used for sanity cross-checks only (not for staging)

Outputs mapped to assignment questions:
- Q1: Overall session funnel (counts + conversion + revenue)
- Q2: Daily session funnel trend
- Q3: Channel session funnel (by traffic_source_medium/source)
- Q4: Engaged vs ALL session funnel (quality lens)
- Q5: Overall user-level funnel (first-touch across period)
- (Optional) Q6: Pagetype sanity check vs event_name
"""

In [None]:
from __future__ import annotations

import numpy as np
from pathlib import Path
from typing import Tuple, Dict, Optional
import pandas as pd

In [10]:
# ------------------------------
# Utility helpers
# ------------------------------

def require_columns(df: pd.DataFrame, cols: list[str]) -> None:
    """Raise a clear error if required columns are missing."""
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

def require_columns(df: pd.DataFrame, cols: list[str]) -> None:
    """Raise a clear error if required columns are missing."""
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

def earliest_ts_by_group(df: pd.DataFrame, key: list[str], mask: pd.Series) -> pd.DataFrame:
    """
    Return earliest event_time per group for rows where `mask` is True.
    Result: DataFrame[key + ['event_time']] with earliest times (NaT if none).
    """
    sub = df.loc[mask, key + ["event_time"]]
    if sub.empty:
        return pd.DataFrame(columns=key + ["event_time"])
    return sub.groupby(key, as_index=False)["event_time"].min()

def first_non_null_in_session(df: pd.DataFrame, key: list[str], col: str) -> pd.DataFrame:
    """
    For each session key, take the first non-null value encountered by event_time for column `col`.
    Result: DataFrame[key + [col]]
    """
    if col not in df.columns:
        return pd.DataFrame(columns=key + [col])
    ordered = df.sort_values(key + ["event_time"])
    take_first = (
        ordered.dropna(subset=[col])
               .drop_duplicates(subset=key, keep="first")[key + [col]]
    )
    return take_first

In [11]:
# ------------------------------
# Core builders
# ------------------------------

def build_deduped_purchases(df: pd.DataFrame,
                            txn_col: str,
                            session_num_col: str) -> pd.DataFrame:
    """
    Return deduplicated purchase events (one row per transaction_id, LAST by event_time),
    with a ready-to-merge session_key for mapping revenue back to session-level results.
    """
    purch = df[df["event_name"].astype(str).str.lower() == "purchase"].dropna(subset=[txn_col]).copy()
    if purch.empty:
        return pd.DataFrame(columns=["user_pseudo_id", session_num_col, txn_col, "event_time", "event_value_in_usd", "session_key"])
    purch["event_time"] = pd.to_datetime(purch["event_time"], errors="coerce")
    purch = purch.sort_values(["event_time", txn_col])
    last_per_txn = purch.groupby(txn_col, as_index=False).tail(1).copy()
    last_per_txn["event_value_in_usd"] = pd.to_numeric(last_per_txn.get("event_value_in_usd"), errors="coerce")
    last_per_txn["session_key"] = list(zip(last_per_txn["user_pseudo_id"], last_per_txn[session_num_col]))
    return last_per_txn

def build_session_stage_table(df: pd.DataFrame,
                              session_num_col: str,
                              dedup_purchases: pd.DataFrame) -> pd.DataFrame:
    """
    Build one row per session with the earliest timestamps for each funnel stage (closed ordering later),
    and attach deduplicated purchase timestamp and revenue mapped by session.
    """
    key = ["user_pseudo_id", session_num_col]
    events = df.copy()
    events["event_time"] = pd.to_datetime(events["event_time"], errors="coerce")
    events = events.sort_values(key + ["event_time"])

    # Base: one row per session with first-event timestamp & session_date
    first_event = events.groupby(key, as_index=False)["event_time"].min().rename(columns={"event_time":"ts_first_event"})
    first_event["session_date"] = first_event["ts_first_event"].dt.date

    # Earliest timestamp per stage
    visit = earliest_ts_by_group(events, key, events["event_name"].isin(["first_visit", "session_start"])).rename(columns={"event_time":"ts_visit"})
    view  = earliest_ts_by_group(events, key, events["event_name"].eq("view_item")).rename(columns={"event_time":"ts_view"})
    add   = earliest_ts_by_group(events, key, events["event_name"].eq("add_to_cart")).rename(columns={"event_time":"ts_add"})
    chk   = earliest_ts_by_group(events, key, events["event_name"].eq("begin_checkout")).rename(columns={"event_time":"ts_checkout"})

    # Merge stages into one session table
    sess = first_event.merge(visit, on=key, how="left") \
                      .merge(view,  on=key, how="left") \
                      .merge(add,   on=key, how="left") \
                      .merge(chk,   on=key, how="left")

    # Purchase timestamp & revenue by session_key from deduped purchases
    if not dedup_purchases.empty:
        purch_by_sess = (dedup_purchases.groupby("session_key")
                         .agg(ts_purchase=("event_time","min"),
                              revenue_usd=("event_value_in_usd","sum"),
                              txn_count=("event_name","count"))
                         .reset_index())
        purch_by_sess[["user_pseudo_id", session_num_col]] = pd.DataFrame(
            purch_by_sess["session_key"].tolist(), index=purch_by_sess.index
        )
        sess = sess.merge(purch_by_sess.drop(columns=["session_key"]), on=key, how="left")
    else:
        sess["ts_purchase"] = pd.NaT
        sess["revenue_usd"] = np.nan
        sess["txn_count"] = 0

    # Closed-funnel flags (strict ordering)
    sess["hit_visit"]    = sess["ts_visit"].notna()
    sess["hit_view"]     = sess["ts_view"].notna() & (sess["ts_view"] > sess["ts_visit"])
    sess["hit_add"]      = sess["ts_add"].notna() & (sess["ts_add"] > sess["ts_view"])
    sess["hit_checkout"] = sess["ts_checkout"].notna() & (sess["ts_checkout"] > sess["ts_add"])
    sess["hit_purchase"] = sess["ts_purchase"].notna() & (sess["ts_purchase"] > sess["ts_checkout"])

    # Optional, more permissive (helps flag instrumentation gap):
    sess["hit_purchase_after_view"] = sess["ts_purchase"].notna() & sess["ts_view"].notna() & (sess["ts_purchase"] > sess["ts_view"])
    return sess

def attach_session_context(df: pd.DataFrame,
                           session_table: pd.DataFrame,
                           session_num_col: str) -> pd.DataFrame:
    """
    Attach session-level context using the first non-null values in the session:
    - platform
    - traffic_source_medium, traffic_source_source, traffic_source_name
    - engaged_any (from param.session_engaged as 0/1)
    """
    key = ["user_pseudo_id", session_num_col]
    df["event_time"] = pd.to_datetime(df["event_time"], errors="coerce")
    ordered = df.sort_values(key + ["event_time"])

    ctx_platform = first_non_null_in_session(ordered, key, "platform")
    ctx_medium   = first_non_null_in_session(ordered, key, "traffic_source_medium")
    ctx_source   = first_non_null_in_session(ordered, key, "traffic_source_source")
    ctx_campaign = first_non_null_in_session(ordered, key, "traffic_source_name")

    # engaged_any: session-level rollup (1 if any param.session_engaged == 1 in the session)
    engaged = ordered.groupby(key)["param.session_engaged"].apply(
        lambda s: pd.Series(s).astype("Int64").fillna(0).max()
    ).reset_index().rename(columns={"param.session_engaged":"engaged_any"})

    out = (session_table
           .merge(ctx_platform, on=key, how="left")
           .merge(ctx_medium,   on=key, how="left")
           .merge(ctx_source,   on=key, how="left")
           .merge(ctx_campaign, on=key, how="left")
           .merge(engaged,      on=key, how="left"))

    out["engaged_any"] = out["engaged_any"].fillna(0).astype(int)
    return out

In [12]:
# ------------------------------
# Aggregators (assignment outputs)
# ------------------------------

def funnel_rollup(session_df: pd.DataFrame) -> pd.Series:
    """
    Aggregate a session-level table with closed-funnel flags into counts, step rates, and monetization.
    Used for Q1 (overall), Q2 (daily), Q3 (channel), Q4 (engaged vs all).
    """
    v  = int(session_df["hit_visit"].sum())
    pv = int(session_df["hit_view"].sum())
    ac = int(session_df["hit_add"].sum())
    bc = int(session_df["hit_checkout"].sum())
    pu = int(session_df["hit_purchase"].sum())
    pu_after_view = int(session_df["hit_purchase_after_view"].sum())
    txns = int(session_df["txn_count"].sum()) if "txn_count" in session_df.columns else 0
    rev  = float(session_df["revenue_usd"].sum()) if "revenue_usd" in session_df.columns else 0.0

    return pd.Series({
        "sessions_visit": v,
        "reach_view": pv,
        "reach_add": ac,
        "reach_checkout": bc,
        "reach_purchase_closed": pu,           # closed-funnel purchase (strict)
        "reach_purchase_after_view": pu_after_view,  # permissive purchase (helps show tagging gap)
        "cr_visit_to_view": pv / v if v else np.nan,
        "cr_view_to_add": ac / pv if pv else np.nan,
        "cr_add_to_checkout": bc / ac if ac else np.nan,
        "cr_checkout_to_purchase": pu / bc if bc else np.nan,
        "cr_visit_to_purchase_closed": pu / v if v else np.nan,
        "cr_visit_to_purchase_after_view": pu_after_view / v if v else np.nan,
        "txn_count": txns,
        "revenue_usd": rev,
        "AOV_usd": (rev / txns) if txns else np.nan,
    })

def compute_overall_session_funnel(session_df: pd.DataFrame) -> pd.DataFrame:
    """Q1: Overall session funnel (closed + after-view) with monetization."""
    base = session_df[session_df["hit_visit"] == True]
    return funnel_rollup(base).to_frame().T

def compute_daily_session_funnel(session_df: pd.DataFrame) -> pd.DataFrame:
    """Q2: Daily session funnel trend using session start date."""
    base = session_df[session_df["hit_visit"] == True]
    return base.groupby("session_date").apply(funnel_rollup).reset_index()

def compute_channel_session_funnel(session_df: pd.DataFrame) -> pd.DataFrame:
    """Q3: Channel session funnel by raw traffic_source_medium/source."""
    base = session_df[session_df["hit_visit"] == True]
    out = (base.groupby(["traffic_source_medium", "traffic_source_source"])
             .apply(funnel_rollup)
             .reset_index()
             .sort_values("sessions_visit", ascending=False))
    return out

def compute_engaged_vs_all(session_df: pd.DataFrame) -> pd.DataFrame:
    """Q4: Engaged vs ALL cohorts to separate traffic quality from UX."""
    base = session_df[session_df["hit_visit"] == True]
    all_row = funnel_rollup(base)
    eng_row = funnel_rollup(base[base["engaged_any"] == 1])
    all_row["cohort"] = "ALL"
    eng_row["cohort"] = "ENGAGED"
    return pd.DataFrame([all_row, eng_row])

def compute_overall_user_funnel(df: pd.DataFrame,
                                dedup_purchases: pd.DataFrame) -> pd.DataFrame:
    """
    Q5: User-level funnel (first-touch across entire period).
    We take each user's earliest timestamp for each stage, and the earliest purchase from deduped purchases.
    """
    events = df.copy()
    events["event_time"] = pd.to_datetime(events["event_time"], errors="coerce")
    events = events.sort_values(["user_pseudo_id", "event_time"])
    g = events.groupby("user_pseudo_id", as_index=False)

    def first_ts(gdf: pd.DataFrame, name: str) -> pd.Timestamp:
        m = gdf["event_name"].eq(name)
        return gdf.loc[m, "event_time"].min() if m.any() else pd.NaT

    user = g.apply(lambda x: pd.Series({
        "ts_visit":    earliest_ts_by_group(x, ["user_pseudo_id"], x["event_name"].isin(["first_visit","session_start"]))["event_time"].min() if (x["event_name"].isin(["first_visit","session_start"]).any()) else pd.NaT,
        "ts_view":     first_ts(x, "view_item"),
        "ts_add":      first_ts(x, "add_to_cart"),
        "ts_checkout": first_ts(x, "begin_checkout"),
    })).reset_index(drop=True)

    # Attach purchase from deduped purchases
    if not dedup_purchases.empty:
        user_purch = dedup_purchases.groupby("user_pseudo_id").agg(
            ts_purchase=("event_time","min"),
            user_revenue_usd=("event_value_in_usd","sum"),
            user_txn_count=("event_name","count")
        ).reset_index()
        user = user.merge(user_purch, on="user_pseudo_id", how="left")
    else:
        user["ts_purchase"] = pd.NaT

    # Closed ordering at user grain
    user["hit_visit"]    = user["ts_visit"].notna()
    user["hit_view"]     = user["ts_view"].notna() & (user["ts_view"] > user["ts_visit"])
    user["hit_add"]      = user["ts_add"].notna() & (user["ts_add"] > user["ts_view"])
    user["hit_checkout"] = user["ts_checkout"].notna() & (user["ts_checkout"] > user["ts_add"])
    user["hit_purchase"] = user["ts_purchase"].notna() & (user["ts_purchase"] > user["ts_checkout"])

    # Aggregate
    v  = int(user["hit_visit"].sum())
    pv = int(user["hit_view"].sum())
    ac = int(user["hit_add"].sum())
    bc = int(user["hit_checkout"].sum())
    pu = int(user["hit_purchase"].sum())

    out = pd.DataFrame([{
        "users_visit": v,
        "reach_view": pv,
        "reach_add": ac,
        "reach_checkout": bc,
        "reach_purchase": pu,
        "cr_visit_to_view": pv / v if v else np.nan,
        "cr_view_to_add": ac / pv if pv else np.nan,
        "cr_add_to_checkout": bc / ac if ac else np.nan,
        "cr_checkout_to_purchase": pu / bc if bc else np.nan,
        "cr_visit_to_purchase": pu / v if v else np.nan,
    }])
    return out

def pagetype_sanity_check(df: pd.DataFrame) -> pd.DataFrame:
    """(Optional) Q6: Compare event_name counts with param.ecomm_pagetype (sparse but good for validation)."""
    if "param.ecomm_pagetype" not in df.columns:
        return pd.DataFrame()
    pv = (df[df["event_name"].isin(["page_view","view_item","add_to_cart","begin_checkout"])]
          .groupby(["event_name","param.ecomm_pagetype"])
          .size().reset_index(name="rows"))
    return pv.sort_values(["event_name","rows"], ascending=[True, False])


In [13]:
# ------------------------------
# Main runner (reproducible)
# ------------------------------

def run_funnel_pipeline(input_csv: str,
                        output_dir: str | Path = ".") -> Dict[str, pd.DataFrame]:
    """
    Load normalized CSV and produce all assignment tables as CSVs in output_dir.
    Returns a dict of DataFrames for interactive use.
    """
    out_dir = Path(output_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    # Load
    df = pd.read_csv(input_csv)
    require_columns(df, ["user_pseudo_id", "event_name", "event_time", "param.ga_session_number"])
    txn_col = "param.transaction_id" if "param.transaction_id" in df.columns else None
    if txn_col is None:
        raise ValueError("param.transaction_id not found; required for purchase de-dup & revenue.")
    
    df["event_time"] = pd.to_datetime(df["event_time"], errors="coerce")
    if "event_date" not in df.columns:
        df["event_date"] = df["event_time"].dt.date

    # Build deduped purchases (idempotent if no duplicates exist)
    dedup_purch = build_deduped_purchases(df, txn_col=txn_col, session_num_col="param.ga_session_number")

    # Build per-session staging table & attach context
    sess_raw = build_session_stage_table(df, session_num_col="param.ga_session_number", dedup_purchases=dedup_purch)
    sess = attach_session_context(df, sess_raw, session_num_col="param.ga_session_number")

    # Q1: overall session funnel
    q1_overall = compute_overall_session_funnel(sess)

    # Q2: daily session funnel trend
    q2_daily = compute_daily_session_funnel(sess)

    # Q3: channel session funnel
    q3_channel = compute_channel_session_funnel(sess).rename(
        columns={"traffic_source_medium":"medium", "traffic_source_source":"source"}
    )

    # Q4: engaged vs all
    q4_engaged = compute_engaged_vs_all(sess)

    # Q5: overall user funnel
    q5_user = compute_overall_user_funnel(df, dedup_purchases=dedup_purch)

    # Q6: optional pagetype sanity
    q6_pagetypes = pagetype_sanity_check(df)

    # Save all
    outputs = {
        "overall_session_funnel.csv": q1_overall,
        "daily_session_funnel.csv": q2_daily,
        "channel_session_funnel.csv": q3_channel,
        "engaged_vs_all_session_funnel.csv": q4_engaged,
        "overall_user_funnel.csv": q5_user,
    }
    if not q6_pagetypes.empty:
        outputs["pagetype_sanity_check.csv"] = q6_pagetypes

    for name, d in outputs.items():
        d.to_csv(out_dir / name, index=False)

    return outputs


In [3]:
# ------------------------------
# # Example usage (uncomment to run in your notebook)
# ------------------------------
results = run_funnel_pipeline("events_normalized_sample.csv", output_dir=".")
results["overall_session_funnel.csv"].head()
results["channel_session_funnel.csv"].head()

Unnamed: 0,medium,source,sessions_visit,reach_view,reach_add,reach_checkout,reach_purchase_closed,reach_purchase_after_view,cr_visit_to_view,cr_view_to_add,cr_add_to_checkout,cr_checkout_to_purchase,cr_visit_to_purchase_closed,cr_visit_to_purchase_after_view,txn_count,revenue_usd,AOV_usd
0,(none),(direct),1145.0,268.0,82.0,55.0,18.0,17.0,0.234061,0.30597,0.670732,0.327273,0.015721,0.014847,26.0,42361.0,1629.269231
68,cpc,google,1134.0,510.0,100.0,76.0,20.0,21.0,0.449735,0.196078,0.76,0.263158,0.017637,0.018519,27.0,38479.0,1425.148148
87,organic,google,739.0,119.0,21.0,14.0,4.0,5.0,0.161028,0.176471,0.666667,0.285714,0.005413,0.006766,7.0,6122.0,874.571429
67,cpc,bing,444.0,194.0,25.0,21.0,7.0,6.0,0.436937,0.128866,0.84,0.333333,0.015766,0.013514,10.0,16110.0,1611.0
49,Affiliates,Source816,224.0,124.0,8.0,4.0,1.0,1.0,0.553571,0.064516,0.5,0.25,0.004464,0.004464,2.0,407.0,203.5


In [11]:
results.keys()

dict_keys(['overall_session_funnel.csv', 'daily_session_funnel.csv', 'channel_session_funnel.csv', 'engaged_vs_all_session_funnel.csv', 'overall_user_funnel.csv', 'pagetype_sanity_check.csv'])

In [12]:
results['overall_session_funnel.csv']

Unnamed: 0,sessions_visit,reach_view,reach_add,reach_checkout,reach_purchase_closed,reach_purchase_after_view,cr_visit_to_view,cr_view_to_add,cr_add_to_checkout,cr_checkout_to_purchase,cr_visit_to_purchase_closed,cr_visit_to_purchase_after_view,txn_count,revenue_usd,AOV_usd
0,5427.0,1946.0,313.0,216.0,64.0,64.0,0.358577,0.160843,0.690096,0.296296,0.011793,0.011793,88.0,124414.0,1413.795455


In [14]:
results['daily_session_funnel.csv']

Unnamed: 0,session_date,sessions_visit,reach_view,reach_add,reach_checkout,reach_purchase_closed,reach_purchase_after_view,cr_visit_to_view,cr_view_to_add,cr_add_to_checkout,cr_checkout_to_purchase,cr_visit_to_purchase_closed,cr_visit_to_purchase_after_view,txn_count,revenue_usd,AOV_usd
0,2023-05-30,318.0,161.0,18.0,14.0,5.0,4.0,0.506289,0.111801,0.777778,0.357143,0.015723,0.012579,5.0,8220.0,1644.0
1,2023-05-31,490.0,216.0,23.0,17.0,7.0,4.0,0.440816,0.106481,0.73913,0.411765,0.014286,0.008163,8.0,13184.0,1648.0
2,2023-06-01,471.0,205.0,28.0,18.0,4.0,4.0,0.435244,0.136585,0.642857,0.222222,0.008493,0.008493,4.0,5220.0,1305.0
3,2023-06-02,440.0,209.0,22.0,17.0,5.0,5.0,0.475,0.105263,0.772727,0.294118,0.011364,0.011364,5.0,4770.0,954.0
4,2023-06-03,389.0,209.0,27.0,23.0,7.0,6.0,0.537275,0.129187,0.851852,0.304348,0.017995,0.015424,9.0,11446.0,1271.777778
5,2023-06-04,426.0,204.0,24.0,20.0,8.0,8.0,0.478873,0.117647,0.833333,0.4,0.018779,0.018779,9.0,19504.0,2167.111111
6,2023-06-05,491.0,229.0,21.0,14.0,1.0,2.0,0.466395,0.091703,0.666667,0.071429,0.002037,0.004073,4.0,5658.0,1414.5
7,2023-06-06,453.0,198.0,28.0,14.0,2.0,5.0,0.437086,0.141414,0.5,0.142857,0.004415,0.011038,6.0,6720.0,1120.0
8,2023-06-07,394.0,176.0,13.0,10.0,1.0,1.0,0.446701,0.073864,0.769231,0.1,0.002538,0.002538,3.0,4347.0,1449.0
9,2023-06-08,333.0,91.0,13.0,12.0,3.0,3.0,0.273273,0.142857,0.923077,0.25,0.009009,0.009009,5.0,8355.0,1671.0


In [15]:
results['channel_session_funnel.csv']

Unnamed: 0,medium,source,sessions_visit,reach_view,reach_add,reach_checkout,reach_purchase_closed,reach_purchase_after_view,cr_visit_to_view,cr_view_to_add,cr_add_to_checkout,cr_checkout_to_purchase,cr_visit_to_purchase_closed,cr_visit_to_purchase_after_view,txn_count,revenue_usd,AOV_usd
0,(none),(direct),1145.0,268.0,82.0,55.0,18.0,17.0,0.234061,0.305970,0.670732,0.327273,0.015721,0.014847,26.0,42361.0,1629.269231
68,cpc,google,1134.0,510.0,100.0,76.0,20.0,21.0,0.449735,0.196078,0.760000,0.263158,0.017637,0.018519,27.0,38479.0,1425.148148
87,organic,google,739.0,119.0,21.0,14.0,4.0,5.0,0.161028,0.176471,0.666667,0.285714,0.005413,0.006766,7.0,6122.0,874.571429
67,cpc,bing,444.0,194.0,25.0,21.0,7.0,6.0,0.436937,0.128866,0.840000,0.333333,0.015766,0.013514,10.0,16110.0,1611.000000
49,Affiliates,Source816,224.0,124.0,8.0,4.0,1.0,1.0,0.553571,0.064516,0.500000,0.250000,0.004464,0.004464,2.0,407.0,203.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,Affiliates,Source544,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,,,,0.000000,0.000000,0.0,0.0,
69,email,Source1382,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,,,,0.000000,0.000000,0.0,0.0,
4,Affiliates,Source1022,1.0,1.0,0.0,0.0,0.0,0.0,1.000000,0.000000,,,0.000000,0.000000,0.0,0.0,
5,Affiliates,Source1044,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,,,,0.000000,0.000000,0.0,0.0,


In [16]:
results['engaged_vs_all_session_funnel.csv']

Unnamed: 0,sessions_visit,reach_view,reach_add,reach_checkout,reach_purchase_closed,reach_purchase_after_view,cr_visit_to_view,cr_view_to_add,cr_add_to_checkout,cr_checkout_to_purchase,cr_visit_to_purchase_closed,cr_visit_to_purchase_after_view,txn_count,revenue_usd,AOV_usd,cohort
0,5427.0,1946.0,313.0,216.0,64.0,64.0,0.358577,0.160843,0.690096,0.296296,0.011793,0.011793,88.0,124414.0,1413.795455,ALL
1,1936.0,976.0,313.0,216.0,64.0,64.0,0.504132,0.320697,0.690096,0.296296,0.033058,0.033058,88.0,124414.0,1413.795455,ENGAGED


In [18]:
results['pagetype_sanity_check.csv']

Unnamed: 0,event_name,param.ecomm_pagetype,rows
0,add_to_cart,cart,857
1,begin_checkout,cart,268
2,view_item,product,5664
