In [49]:
import statsmodels.stats.power as power
import numpy as np
import pandas as pd

# 1. Power Analysis to Estimate Treatment Effects on Binary Outcomes

## Determine Sample Size Necessary to Detect Treatment Effects: Define our own function

In [19]:
def get_nobs_each_group(control_success_rate,pct_impact):
    treated_success_rate = control_success_rate * (1+pct_impact)
    nobs_each_group = (control_success_rate * (1-control_success_rate) + treated_success_rate*(1-treated_success_rate)) \
                  * ((2.8/(treated_success_rate-control_success_rate))**2)
    return nobs_each_group

In [29]:
control_success_rate = 0.60
pct_impact = 0.1

print("Sample Size per Group: {:.0f}".format(get_nobs_each_group(control_success_rate,pct_impact)))

Sample Size per Group: 1011


## Compare to Statsmodels Package: Requires Pooled Standard Errors

In [34]:
treated_success_rate = control_success_rate * (1+pct_impact)
pooled_success_rate = (control_success_rate + treated_success_rate)/2
pooled_se = np.sqrt(pooled_success_rate*(1-pooled_success_rate))
effect_size = (treated_success_rate-control_success_rate)/pooled_se

print("Sample Size per Group: {:.0f}".format(power.tt_ind_solve_power(effect_size=effect_size, 
       nobs1=None,alpha=.05,power=.80,ratio=1.0,alternative='two-sided')))

Sample Size per Group: 1017


## Required Sample Size is Sensitive to Both Control Success Rate and Treatment Effect

In [59]:
results = pd.DataFrame(columns=['control_success_rate','pct_impact','nobs_each_group'])
idx = 0
for csr in np.arange(0.5,0.71,0.05):
    for impact in np.arange(0.08,.13,.02):
        nobs1=get_nobs_each_group(csr,impact)
        tmp = pd.DataFrame({'control_success_rate':csr,'pct_impact':impact,'nobs_each_group':nobs1},index=[idx])
        results=results.append(tmp)
        idx += 1
print(results.round(2))

    control_success_rate  pct_impact  nobs_each_group
0                   0.50        0.08          2442.16
1                   0.50        0.10          1560.16
2                   0.50        0.12          1081.05
3                   0.55        0.08          1978.89
4                   0.55        0.10          1260.81
5                   0.55        0.12           871.19
6                   0.60        0.08          1592.83
7                   0.60        0.10          1011.36
8                   0.60        0.12           696.31
9                   0.65        0.08          1266.16
10                  0.65        0.10           800.28
11                  0.65        0.12           548.33
12                  0.70        0.08           986.16
13                  0.70        0.10           619.36
14                  0.70        0.12           421.49


## Add Experiment Runtime Based on Average Daily Users

In [65]:
avg_daily_users = 250
results['runtime_days'] = 2*results['nobs_each_group']/avg_daily_users
print(results.round(2))

    control_success_rate  pct_impact  nobs_each_group  runtime_days
0                   0.50        0.08          2442.16         19.54
1                   0.50        0.10          1560.16         12.48
2                   0.50        0.12          1081.05          8.65
3                   0.55        0.08          1978.89         15.83
4                   0.55        0.10          1260.81         10.09
5                   0.55        0.12           871.19          6.97
6                   0.60        0.08          1592.83         12.74
7                   0.60        0.10          1011.36          8.09
8                   0.60        0.12           696.31          5.57
9                   0.65        0.08          1266.16         10.13
10                  0.65        0.10           800.28          6.40
11                  0.65        0.12           548.33          4.39
12                  0.70        0.08           986.16          7.89
13                  0.70        0.10           6

## Calculate Minimum Detectable Treatment Effect for Possible Experiment Runtimes

In [72]:
mde_results = pd.DataFrame(columns=['Runtime','Total_Sample','N_Each_Group','MDE_pp','MDE_pct'])
control_success_rate = 0.6
pct_impact = 0.1
pooled_success_rate = 0.5*(control_success_rate + control_success_rate*(1+pct_impact))
idx = 0
for days in range(2,21,2):
    sample = avg_daily_users * days
    nobs1 = 0.5*sample
    mde_pp = 2.8*np.sqrt(pooled_success_rate*(1-pooled_success_rate)/(0.25*sample))
    mde_pct = mde_pp / control_success_rate
    tmp = pd.DataFrame({'Runtime':days,'Total_Sample':sample,'N_Each_Group':nobs1,
                        'MDE_pp':mde_pp,'MDE_pct':mde_pct},index=[idx])
    mde_results=mde_results.append(tmp)

print(mde_results.round(3))

  Runtime Total_Sample  N_Each_Group  MDE_pp  MDE_pct
0       2          500         250.0   0.121    0.202
0       4         1000         500.0   0.085    0.142
0       6         1500         750.0   0.070    0.116
0       8         2000        1000.0   0.060    0.101
0      10         2500        1250.0   0.054    0.090
0      12         3000        1500.0   0.049    0.082
0      14         3500        1750.0   0.046    0.076
0      16         4000        2000.0   0.043    0.071
0      18         4500        2250.0   0.040    0.067
0      20         5000        2500.0   0.038    0.064
