In [1]:
import sys
import polars as pl

sys.path.append("..")
from src.ab_testing import analyze_campaigns

## Data

In [2]:
data = pl.read_parquet("../data/campaigns.parquet")

data

campaign_name,date,spend_usd,impressions_count,reach,website_clicks_count,searches_count,view_content_count,add_to_cart_count,purchase_count
cat,datetime[ns],i16,i32,i32,i32,i32,i32,i32,i32
"""Control Campaign""",2019-01-08 00:00:00,2280,82702,56930,7016,2290,2159,1819,618
"""Control Campaign""",2019-02-08 00:00:00,1757,121040,102513,8110,2033,1841,1219,511
"""Control Campaign""",2019-03-08 00:00:00,2343,131711,110862,6508,1737,1549,1134,372
"""Control Campaign""",2019-04-08 00:00:00,1940,72878,61235,3065,1042,982,1183,340
"""Control Campaign""",2019-05-08 00:00:00,1835,,,,,,,
…,…,…,…,…,…,…,…,…,…
"""Test Campaign""",2019-08-26 00:00:00,2311,80841,61589,3820,2037,1046,346,284
"""Test Campaign""",2019-08-27 00:00:00,2915,111469,92159,6435,2976,2552,992,771
"""Test Campaign""",2019-08-28 00:00:00,2247,54627,41267,8144,2432,1281,1009,721
"""Test Campaign""",2019-08-29 00:00:00,2805,67444,43219,7651,1920,1240,1168,677


In [3]:
ab_test_cols = [col for col in data.columns if col.endswith("count")] + ["reach"]
group_col = "campaign_name"

campaign_data = {
    "control": {
        col: data.filter(pl.col(group_col) == "Control Campaign")[col].to_numpy()
        for col in ab_test_cols
    },
    "test": {
        col: data.filter(pl.col(group_col) == "Test Campaign")[col].to_numpy()
        for col in ab_test_cols
    },
}

## Brunner Munzel Test

The estimated treatment effect is computed relative to the control group; that is, the treatment group is the group of interest. 

In [4]:
analyze_campaigns(campaign_data)

field,stat,estimated_treatment_effect_test_minus_control,p_value,conclusion
impressions_count,-5.20519286691524,0.1988505747126436,4.169465013020105e-06,Reject null hypothesis of no treatment effect
website_clicks_count,1.518468755559812,0.6126436781609195,0.1344436118189122,Fail to reject null hypothesis of no treatment effect
searches_count,1.2998472693623475,0.6022988505747127,0.2013058306187725,Fail to reject null hypothesis of no treatment effect
view_content_count,-0.1044707908778253,0.4919540229885057,0.917165185042214,Fail to reject null hypothesis of no treatment effect
add_to_cart_count,-4.217082153277794,0.2367816091954023,0.0001068072703705,Reject null hypothesis of no treatment effect
purchase_count,-0.0595465005043734,0.4954022988505747,0.9527321451111131,Fail to reject null hypothesis of no treatment effect
reach,-6.05810251680159,0.1735632183908046,2.0450442618232884e-07,Reject null hypothesis of no treatment effect


## Key Metrics

<center>

| **Metric**                         | **Formula**                                                   | **Description**                                         |
|------------------------------------|---------------------------------------------------------------|---------------------------------------------------------|
| Click-Through Rate (CTR)           | $ \text{CTR} = \frac{\text{Clicks}}{\text{Impressions}} \times 100 $                          | Percentage of impressions that result in a click.       |
| Conversion Rate (CR)               | $ \text{CR} = \frac{\text{Purchases}}{\text{Clicks}} \times 100 $                             | Percentage of clicks that result in a purchase.          |
| Cost Per Click (CPC)               | $ \text{CPC} = \frac{\text{Amount Spent}}{\text{Clicks}} $                                 | Cost paid per click on an ad.                           |
| Cost Per Thousand Impressions (CPM)| $ \text{CPM} = \frac{\text{Amount Spent}}{\text{Impressions}} \times 1000 $                   | Cost to generate 1,000 impressions.                     |
| Cost Per Acquisition (CPA)         | $ \text{CPA} = \frac{\text{Amount Spent}}{\text{Purchases}} $                              | Cost to acquire a customer.                             |
| Engagement Rate                    | $ \text{Engagement Rate} = \frac{\text{Add to Carts}}{\text{Impressions}} $                 | Proportion of impressions leading to an add-to-cart action. |

</center>

In [5]:
with pl.SQLContext(data=data, eager=True) as sql_context:
    query = """ 
            SELECT
                campaign_name,
                CAST(website_clicks_count AS FLOAT) / NULLIF(CAST(impressions_count AS FLOAT), 0) AS click_through_rate,
                CAST(purchase_count AS FLOAT) / NULLIF(CAST(website_clicks_count AS FLOAT), 0) AS conversion_rate,
                CAST(spend_usd AS FLOAT) / NULLIF(CAST(website_clicks_count AS FLOAT), 0) AS cost_per_click,
                CAST(spend_usd AS FLOAT) / NULLIF(CAST(impressions_count AS FLOAT), 0) * 1000 AS cost_per_thousand_impressions,
                CAST(spend_usd AS FLOAT) / NULLIF(CAST(purchase_count AS FLOAT), 0) AS cost_per_acquisition,
                CAST(add_to_cart_count AS FLOAT) / NULLIF(CAST(impressions_count AS FLOAT), 0) AS engagement_rate
            FROM
                data;
            """
    key_metrics = sql_context.execute(query)

key_metrics

campaign_name,click_through_rate,conversion_rate,cost_per_click,cost_per_thousand_impressions,cost_per_acquisition,engagement_rate
cat,f64,f64,f64,f64,f64,f64
"""Control Campaign""",0.084835,0.088084,0.324971,27.568862,3.68932,0.021995
"""Control Campaign""",0.067003,0.063009,0.216646,14.515863,3.438356,0.010071
"""Control Campaign""",0.049411,0.05716,0.360018,17.788947,6.298387,0.00861
"""Control Campaign""",0.042057,0.11093,0.632953,26.61983,5.705882,0.016233
"""Control Campaign""",,,,,,
…,…,…,…,…,…,…
"""Test Campaign""",0.047253,0.074346,0.604974,28.586979,8.137324,0.00428
"""Test Campaign""",0.057729,0.119814,0.452991,26.150768,3.780804,0.008899
"""Test Campaign""",0.149084,0.088531,0.275909,41.133505,3.116505,0.018471
"""Test Campaign""",0.113442,0.088485,0.366619,41.59006,4.143279,0.017318


In [6]:
campaign_metrics = {
    "control": {
        col: key_metrics.filter(pl.col("campaign_name") == "Control Campaign")[
            col
        ].to_numpy()
        for col in key_metrics.columns
        if col != "campaign_name"
    },
    "test": {
        col: key_metrics.filter(pl.col("campaign_name") == "Test Campaign")[
            col
        ].to_numpy()
        for col in key_metrics.columns
        if col != "campaign_name"
    },
}

In [7]:
analyze_campaigns(campaign_metrics)

field,stat,estimated_treatment_effect_test_minus_control,p_value,conclusion
click_through_rate,4.667064836829477,0.77816091954023,2.04999514255e-05,Reject null hypothesis of no treatment effect
conversion_rate,-1.0208369075899462,0.4218390804597701,0.3121009353891495,Fail to reject null hypothesis of no treatment effect
cost_per_click,-0.2241749490507892,0.4827586206896552,0.8234280774652407,Fail to reject null hypothesis of no treatment effect
cost_per_thousand_impressions,7.795429495593319,0.8689655172413794,7.307343058457828e-10,Reject null hypothesis of no treatment effect
cost_per_acquisition,1.1155673153058383,0.5839080459770114,0.2693156634051968,Fail to reject null hypothesis of no treatment effect
engagement_rate,0.4447575351759807,0.5344827586206896,0.6583602735124021,Fail to reject null hypothesis of no treatment effect
