In [68]:
from scipy.stats import norm
from scipy.stats import binom_test 
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd

In [30]:
data_contr = pd.read_csv('Final Project Results - Control.csv')
data_exp = pd.read_csv('Final Project Results - Experiment.csv')

In [31]:
data_contr.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7723,687,134.0,70.0
1,"Sun, Oct 12",9102,779,147.0,70.0
2,"Mon, Oct 13",10511,909,167.0,95.0
3,"Tue, Oct 14",9871,836,156.0,105.0
4,"Wed, Oct 15",10014,837,163.0,64.0


# Experiment Design

## Choosing Invariant Metric

1. Number of cookies: number of unique cookies to view the course overview page. (dmin=3000)
2. Number of clicks: number of unique cookies to click the "Start free trial" button (which happens before the free trial screener is trigger). (dmin=240)
3. Click-through-probability: number of unique cookies to click the "Start free trial" button divided by number of unique cookies to view the course overview page. (dmin=0.01)

## Choosing Evaluation Metrics

1. Gross conversion: # of user id completed check-out / # of cookies clicked the "start" button (d_min = 0.01)
2. Retention: # of user-ids to remain enrolled past the 14-day boundary (i.e., made at least 1 payment) / # of user-ids to complete checkout (dmin=0.01)
3. Net conversion: # of user-ids to remain enrolled past the 14-day boundary / # of unique cookies to click the "Start free trial" button. (dmin= 0.0075)

## Standard Deviation for Evaluation Metrics (Analytical Estimate)

Assumption on sample size: 5000 cookies visiting the course overview page

Baseline Values:
- Unique cookies to view course overview page per day: 40000
- Unique cookies to click "Start free trial" per day:	3200
- Enrollments per day:	660
- Click-through-probability on "Start free trial":	0.08
- Probability of enrolling, given click:	0.20625
- Probability of payment, given enroll:	0.53 
- Probability of payment, given click	0.1093125

#### Gross conversion: 
Analytical estimate likely underestimates the true value as unit of diversion (user id) and unit of analysis (cookies) are different

In [7]:
sample_size = 5000 * 3200 / 40000
p = 0.20625
std_gross_conversion = math.sqrt(p * (1-p) / sample_size)
print('Analytical std for gross conversion: {:.4f}'.format(std_gross_conversion))

Analytical std for gross conversion: 0.0202


#### Retention: 
Unit of diversion and unit of analysis are both user id, as such emplirical variance is likely be close to analytical estimate

In [8]:
sample_size = 5000 * 660 / 40000
p = 0.53
std_retention = math.sqrt(p * (1-p) / sample_size)
print('Analytical std for retention: {:.4f}'.format(std_retention))

Analytical std for retention: 0.0549


#### Net Conversion: 
Analytical estimate likely underestimates the true value, as unit of diversion (user id) and unit of analysis (cookies) are different

In [9]:
sample_size = 5000 * 3200 / 40000
p = 0.1093126
std_net_conversion = math.sqrt(p * (1-p) / sample_size)
print('Analytical std for net conversion: {:.4f}'.format(std_net_conversion))

Analytical std for net conversion: 0.0156


## Sizing

### Use Bonferroni correction?

No, as the evaluation metrics are highly correlated to each other, Bonferroni correct is too conservative in this case

### Choosing Number of Samples given Power

Using the analytic estimates of variance, how many pageviews total (across both groups) would you need to collect to adequately power the experiment? Use an alpha of 0.05 and a beta of 0.2. Make sure you have enough power for each metric.

In [15]:
# required setups
p = 0.95
alpha = 1-p  
beta = 0.2
d_min_gc = 0.01
d_min_ret = 0.01
d_min_nc = 0.0075
p_gc = 0.20625
p_ret = 0.53
p_nc = 0.1093125

In [13]:
# retrieve value <= probability 
# for 2-tailed test
critical_val_1 = norm.ppf(1-alpha/2)
print(f'Critical_val_1: {critical_val_1}')
critical_val_2 = norm.ppf(beta)
print(f'critical_val_2: {critical_val_2}')

Critical_val_1: 1.959963984540054
critical_val_2: -0.8416212335729142


#### Samples (# of pageviews) needed for Gross conversion

In [23]:
SE_required = d_min_gc/(critical_val_1 - critical_val_2)
print(f'SE_required: {SE_required}')
N_required_cookie = 2 * 2 * (p_gc * (1-p_gc)) / (SE_required ** 2) 
N_required_pageview = N_required_cookie / 3200 * 40000
print(f'N_required_pageview: {N_required_pageview}')

SE_required: 0.0035694077536344176
N_required_cookie to click: 51397.898385401604
642473.7298175201


#### Samples (# of pageviews) needed for Retention

In [25]:
SE_required = d_min_ret/(critical_val_1 - critical_val_2)
print(f'SE_required: {SE_required}')
N_required_cookie = 2 * 2 * (p_ret * (1-p_ret)) / SE_required ** 2 
#print(f'N_required_cookie to enroll: {N_required_cookie}')
N_required_pageview = N_required_cookie / 660 * 40000
print(f'N_required_pageview: {N_required_pageview}')

SE_required: 0.0035694077536344176
N_required_pageview: 4739771.980185109


#### Samples (# of pageviews) needed for Net Conversion

In [24]:
SE_required = d_min_nc/(critical_val_1 - critical_val_2)
print(f'SE_required: {SE_required}')
N_required_cookie = 2 * 2 * (p_nc * (1-p_nc)) / SE_required ** 2 
#print(f'N_required_cookie to enroll: {N_required_cookie}')
N_required_pageview = N_required_cookie / 3200 * 40000
print(f'N_required_pageview: {N_required_pageview}')

SE_required: 0.002677055815225813
N_required_pageview: 679282.3594783728


### Choosing Duration vs. Exposure

In [29]:
print(f'Duration if using Gross Conversion: {642473 / 40000}')
print(f'Duration if using Retention: {4739771.980185109 / 40000}')
print(f'Duration if using Net Conversion: {679282.3594783728 / 40000}')

Duration if using Gross Conversion: 16.061825
Duration if using Retention: 118.49429950462773
Duration if using Net Conversion: 16.98205898695932


Choose Net Convesersion as the evaluation metric, as Retention takes too long to run
The change is not too risky, we can use 100% of the traffic to perform the test, it will take about 17 days

# Experiment Analysis

## Sanity Checks 

- For invariant metrics we expect equal diversion into the experiment and control group.

- For each invariant metric, compute a 95% confidence interval for the value you expect to observe.

#### Number of cookies (on pageview)

In [38]:
total_pageview_exp = sum(data_exp['Pageviews'])
total_pageview_contr = sum(data_contr['Pageviews'])
se_pageview = math.sqrt(0.5 * 0.5 / (total_pageview_exp + total_pageview_contr))
m = 1.96 * se_pageview
ci_min,ci_max = 0.5-m, 0.5+m
print("Confidence Interval for pageviews: [{},{}]".format(round(ci_min,4),round(ci_max,4)))
print("Observed: ",round(total_pageview_contr/(total_pageview_exp+total_pageview_contr),4))

Confidence Interval for pageviews: [0.4988,0.5012]
Observed:  0.5006


#### Number of Clicks (on the "start free trial" button)

In [39]:
total_click_exp = sum(data_exp['Clicks'])
total_click_contr = sum(data_contr['Clicks'])
se_click = math.sqrt(0.5 * 0.5 / (total_click_exp + total_click_contr))
m = 1.96 * se_click
ci_min,ci_max = 0.5-m, 0.5+m
print("Confidence Interval for clicks: [{},{}]".format(round(ci_min,4),round(ci_max,4)))
print("Observed: ",round(total_click_contr/(total_click_exp+total_click_contr),4))

Confidence Interval for clicks: [0.4959,0.5041]
Observed:  0.5005


#### Click-through-probability

In [41]:
ctp_exp = total_click_exp / total_pageview_exp
ctp_contr = total_click_contr / total_pageview_contr
p_pool = (total_click_exp + total_click_contr) / (total_pageview_exp + total_pageview_contr)
se_ctp = math.sqrt(p_pool * (1-p_pool) * (1 / total_pageview_exp + 1 / total_pageview_contr))
m = 1.96 * se_ctp
ci_min,ci_max = -m, +m
print("Confidence Interval for click-thru-prob: [{},{}]".format(round(ci_min,4),round(ci_max,4)))
print("Observed: ",round(ctp_exp - ctp_contr,4))

Confidence Interval for click-thru-prob: [-0.0013,0.0013]
Observed:  0.0001


## Effect Size Tests
Check for Practical and Statistical Significance

#### Gross conversion: # of user id completed check-out / # of cookies clicked the "start" button (d_min = 0.01)

In [60]:
total_enroll_exp = sum(data_exp[data_exp.Payments.notnull()]['Enrollments'])
total_enroll_contr = sum(data_contr[data_contr.Payments.notnull()]['Enrollments'])
total_click_exp = sum(data_exp[data_exp.Payments.notnull()]['Clicks'])
total_click_contr = sum(data_contr[data_contr.Payments.notnull()]['Clicks'])
gc_exp = total_enroll_exp / total_click_exp
gc_contr = total_enroll_contr / total_click_contr
gc_diff = gc_exp - gc_contr
p_pool = (total_enroll_exp + total_enroll_contr) / (total_click_exp + total_click_contr)
se_gc = math.sqrt(p_pool * (1-p_pool) * (1 / total_click_exp + 1 / total_click_contr))
#print(se_gc)
m = 1.96 * se_gc
ci_min,ci_max = gc_diff-m, gc_diff+m
print("Confidence Interval for gross conversion: [{},{}]".format(round(ci_min,4),round(ci_max,4)))
print("Observed: ",round(gc_exp - gc_contr,4))
print("Both statitically and practically significant")

Confidence Interval for gross conversion: [-0.0291,-0.012]
Observed:  -0.0206
Both statitically and practically significant


In [52]:
data_exp[data_exp.Payments.notnull()] #['Payments']

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7716,686,105.0,34.0
1,"Sun, Oct 12",9288,785,116.0,91.0
2,"Mon, Oct 13",10480,884,145.0,79.0
3,"Tue, Oct 14",9867,827,138.0,92.0
4,"Wed, Oct 15",9793,832,140.0,94.0
5,"Thu, Oct 16",9500,788,129.0,61.0
6,"Fri, Oct 17",9088,780,127.0,44.0
7,"Sat, Oct 18",7664,652,94.0,62.0
8,"Sun, Oct 19",8434,697,120.0,77.0
9,"Mon, Oct 20",10496,860,153.0,98.0


#### Retention: # of user-ids to remain enrolled past the 14-day boundary (i.e., made at least 1 payment) / # of user-ids to complete checkout (dmin=0.01)

In [65]:
ret_exp = total_payment_exp / total_enroll_exp
ret_contr = total_payment_contr / total_enroll_contr
ret_diff = ret_exp - ret_contr
p_pool = (total_payment_exp + total_payment_contr) / (total_enroll_exp + total_enroll_contr)
se_ret = math.sqrt(p_pool * (1-p_pool) * (1 / total_enroll_exp + 1 / total_enroll_contr))
#print(se_nc)
m = 1.96 * se_ret
ci_min,ci_max = ret_diff-m, ret_diff+m
print("Confidence Interval for retention: [{},{}]".format(round(ci_min,4),round(ci_max,4)))
print("Observed: ",round(ret_exp - ret_contr,4))
print("Statitically significant, but not pratically significant")

Confidence Interval for retention: [0.0081,0.0541]
Observed:  0.0311
Statitically significant, but not pratically significant


#### Net conversion: # of user-ids to remain enrolled past the 14-day boundary / # of unique cookies to click the "Start free trial" button. (dmin= 0.0075)

In [67]:
total_payment_exp = sum(data_exp[data_exp.Payments.notnull()]['Payments'])
total_payment_contr = sum(data_contr[data_contr.Payments.notnull()]['Payments'])
total_click_exp = sum(data_exp[data_exp.Payments.notnull()]['Clicks'])
total_click_contr = sum(data_contr[data_contr.Payments.notnull()]['Clicks'])
nc_exp = total_payment_exp / total_click_exp
nc_contr = total_payment_contr / total_click_contr
nc_diff = nc_exp - nc_contr
p_pool = (total_payment_exp + total_payment_contr) / (total_click_exp + total_click_contr)
se_nc = math.sqrt(p_pool * (1-p_pool) * (1 / total_click_exp + 1 / total_click_contr))
#print(se_nc)
m = 1.96 * se_nc
ci_min,ci_max = nc_diff-m, nc_diff+m
print("Confidence Interval for net conversion: [{},{}]".format(round(ci_min,4),round(ci_max,4)))
print("Observed: ",round(nc_exp - nc_contr,4))
print("Neither statitically or practically significant")

Confidence Interval for net conversion: [-0.0116,0.0019]
Observed:  -0.0049
Neither statitically or practically significant


## Sign Tests

In [85]:
"""gross conversion"""
alpha=0.05
beta=0.2

gc_exp=[i/j for i,j in zip(data_exp['Enrollments'], data_exp['Clicks']) if str(i) != 'nan']
gc_cont=[i/j for i,j in zip(data_contr['Enrollments'], data_contr['Clicks']) if str(i) != 'nan']
gc_diff=sum([i>j for i,j in zip(gc_exp,gc_cont)])
days=len(gc_exp)
print(f"% of days experiment group is larger than control group: {gc_diff / days}")

# The prob of gross conversion of experiment group > gross conversion of control group is 0.5
p_value=binom_test(gc_diff, n=days, p=0.5)
print("p-value:",p_value,", Statistically Significant:",p_value<alpha)

% of days experiment group is larger than control group: 0.17391304347826086
p-value: 0.0025994777679443364 , Statistically Significant: True


In [86]:
"""retention"""
rt_exp=[i/j for i,j in zip(data_exp['Payments'], data_exp['Enrollments']) if str(i) != 'nan']
rt_cont=[i/j for i,j in zip(data_contr['Payments'], data_contr['Enrollments']) if str(i) != 'nan']
rt_diff=sum([i>j for i,j in zip(rt_exp,rt_cont)])
days=len(rt_exp)
print(f"% of days experiment group is larger than control group: {rt_diff / days}")

p_value=binom_test(rt_diff, n=days, p=0.5)
print("p-value:",p_value,", Statistically Significant:",p_value<alpha)

% of days experiment group is larger than control group: 0.5652173913043478
p-value: 0.6776394844055175 , Statistically Significant: False


In [87]:
"""net conversion"""
nc_exp=[i/j for i,j in zip(data_exp['Payments'], data_exp['Clicks']) if str(i) != 'nan']
nc_cont=[i/j for i,j in zip(data_contr['Payments'], data_contr['Clicks']) if str(i) != 'nan']
nc_diff=sum([i>j for i,j in zip(nc_exp,nc_cont)])
days=len(nc_exp)
print(f"% of days experiment group is larger than control group: {nc_diff / days}")

p_value=binom_test(nc_diff, n=days, p=0.5)
print("p-value:",p_value,", Statistically Significant:",p_value<alpha)

% of days experiment group is larger than control group: 0.43478260869565216
p-value: 0.6776394844055175 , Statistically Significant: False


In [90]:
p_value = binom_test(2, 3, 0.5)
p_value

1.0