In [153]:
import pandas as pd
from scipy.stats import ttest_ind
import numpy as np

## Data

In [154]:
data = pd.read_csv('AB_test_data.csv')

In [155]:
data.head()

Unnamed: 0,purchase_TF,Variant,date,id
0,False,A,2019/11/8,0x25b44a
1,False,B,2020/8/27,0x46271e
2,False,A,2020/6/11,0x80b8f1
3,False,B,2020/8/22,0x8d736d
4,False,A,2020/8/5,0x96c9c8


In [156]:
data['Variant'].value_counts()

A    125000
B      5000
Name: Variant, dtype: int64

In [157]:
data['purchase_TF'].value_counts()

False    110415
True      19585
Name: purchase_TF, dtype: int64

In [158]:
data[['purchase_TF','Variant']].value_counts()

purchase_TF  Variant
False        A          106298
True         A           18702
False        B            4117
True         B             883
dtype: int64

In [159]:
data['purchase_TF'] = data['purchase_TF'].apply(lambda x:1 if x == True else 0)


In [160]:
data['purchase_TF'].value_counts()

0    110415
1     19585
Name: purchase_TF, dtype: int64

### 1. Conduct an A/B test to determine whether Alternative B improved conversion rates (siteusers book the property) over alternative A.

In [161]:
mean0 = np.mean(data[data['Variant']=='A']['purchase_TF'])
mean1 = np.mean(data[data['Variant']=='B']['purchase_TF'])
diff = mean1 - mean0
mean_total = (mean0 + mean1)/2

sd0 = data[data['Variant']=='A']['purchase_TF'].std()
sd1 = data[data['Variant']=='B']['purchase_TF'].std()
sd_avg = (sd0 + sd1)/2
n0 = data[data['Variant']=='A']['purchase_TF'].shape[0]
n1 = data[data['Variant']=='B']['purchase_TF'].shape[0]

In [162]:
diff

0.026984000000000008

In [163]:
ttest_ind(data[data['Variant']=='A']['purchase_TF'], data[data['Variant']=='B']['purchase_TF'],equal_var = False)


Ttest_indResult(statistic=-4.917887949732089, pvalue=9.009730353157316e-07)

### 2. Calculate the optimal sample size for a 95% confidence rate and test with 80% power.Conduct the test 10 times using samples of the optimal size. Report results

probability of type II error = 0.2
use this to calculate minimum detectable effect for the whole 


In [164]:
# ATE_STD=np.sqrt(sd1**2/n1+sd0**2/n0)

In [165]:
# effect = (0.2 * ATE_STD)/diff
# effect
#The minimum detectable effect of the dataset is aound 0.04 withnot equal size control and treatment group
#For convinience, the optimal sample size will be calculated with the average of the control group's data and the treatment group's data.
#0.1 will be used as the minimum detectable effect
#And equal size data of different groups will be used in the further testing 

In [166]:
n = 2*(1.96 + 0.84)**2 * (sd_avg/0.03)**2

In [167]:
sample_size = int((1.96*np.sqrt(2*mean_total*(1-mean_total)) + 0.84 *np.sqrt(mean0 * (1-mean0) + mean1 * (1-mean1)))**2 *1/(diff**2))
sample_size


2938

use 2938 as the optimal sample size for convience
for Conduct the test 10 times using samples of the optimal size:
randomly select 2938 samples from both treatment and control group for 10 tims and conduct test

In [168]:
i = 0
while i < 10:
    A_data = data[data['Variant']=='A'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*10, axis=None)
    B_data = data[data['Variant']=='B'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*10, axis=None)
    print(ttest_ind(A_data['purchase_TF'],B_data['purchase_TF'],equal_var = False))
    i += 1

Ttest_indResult(statistic=-1.8557253958456634, pvalue=0.0635428556011029)
Ttest_indResult(statistic=-1.9829930856885856, pvalue=0.0474148001195071)
Ttest_indResult(statistic=-3.069044843620316, pvalue=0.002157287907781532)
Ttest_indResult(statistic=-3.7165323768969865, pvalue=0.00020386863045311228)
Ttest_indResult(statistic=-3.6208601566855703, pvalue=0.00029611896564574593)
Ttest_indResult(statistic=-1.3863647469613187, pvalue=0.16568822528439978)
Ttest_indResult(statistic=-3.404530739847119, pvalue=0.0006672445740035767)
Ttest_indResult(statistic=-2.8533584159677545, pvalue=0.004341180243748014)
Ttest_indResult(statistic=-3.0173312942642014, pvalue=0.0025610996572183334)
Ttest_indResult(statistic=-2.1087879646615133, pvalue=0.03500521429655828)


### 3. Conduct a sequential test for the 10 samples. For any of the samples, were you able to stop the test prior to using the full sample? What was the average number of iterations required to stop the test?

In [169]:
import math

In [170]:
ln_a = math.log(1/0.05)
ln_b = math.log(0.2)


In [171]:
# H0: B-A = 0
# Ha: B-A > 0
# if B-A = 1 : ln(mean1/mean0)
# if B-A < 1 : ln((1-mean1)/mean0)

In [145]:
# i = 0
# ln = 0
# while i < 10:
#     ln_lambda = 0
#     j = 0
#     A_data = data[data['Variant']=='A'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*1000, axis=None)
#     mean0 = np.mean(A_data['purchase_TF'])
#     B_data = data[data['Variant']=='B'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*1000, axis=None)
#     mean1 = np.mean(B_data['purchase_TF'])
#     while j < 2900:
#         d = int(B_data['purchase_TF'][j:j+1]) - int(A_data['purchase_TF'][j:j+1])
#         if d == 1:
#             ln = math.log(mean1/mean0)
#         elif d < 1:
#             ln = math.log((1-mean1)/(1-mean0))
#         ln_lambda += ln
#         if ln_lambda <= ln_b:
#             print('this is sample ',i,', trail ',j ,' not reject H0')
#             print(ln_lambda)
#             break
#         elif ln_lambda >= ln_a:
#             print('this is sample ',i,', trail ',j, ' reject H0')
#             print(ln_lambda)
#             break
            
#         j += 1
#     i += 1

In [146]:
# i = 0
# ln = 0
# while i < 10:
#     ln_lambda = 0
#     j = 0
#     A_data = data[data['Variant']=='A'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i, axis=None)
#     B_data = data[data['Variant']=='B'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i, axis=None)
#     while j < 2400:
#         print(int(B_data['purchase_TF'][j:j+1]),int(A_data['purchase_TF'][j:j+1]))
#         d = int(B_data['purchase_TF'][j:j+1]) - int(A_data['purchase_TF'][j:j+1])
#         if int(B_data['purchase_TF'][j:j+1]) == 1 and int(A_data['purchase_TF'][j:j+1]) == 1:
#             ln = math.log(mean1/mean0)
#         elif int(B_data['purchase_TF'][j:j+1]) == 0 and int(A_data['purchase_TF'][j:j+1]) == 0:
#             ln = math.log((1-mean1)/(1-mean0))
#         elif int(B_data['purchase_TF'][j:j+1]) == 1 and int(A_data['purchase_TF'][j:j+1]) == 0:
#             ln = math.log((mean1)/(1-mean0))
#         elif int(B_data['purchase_TF'][j:j+1]) == 0 and int(A_data['purchase_TF'][j:j+1]) == 1:
#             ln = math.log((1-mean1)/(mean0))
#         ln_lambda += ln
#         if ln_lambda <= ln_b:
#             print('this is sample ',i,', trail ',j ,' not reject H0')
#             break
#         elif ln_lambda >= ln_a:
#             print('this is sample ',i,', trail ',j, ' reject H0')
#             break
            
#         j += 1
#     i += 1

In [147]:
# i = 0
# ln = 0
# while i < 10:
#     ln_lambda = 0
#     j = 0
#     A_data = data[data['Variant']=='A'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*10, axis=None)
#     mean0 = np.mean(A_data['purchase_TF'])
#     B_data = data[data['Variant']=='B'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*10, axis=None)
#     mean1 = np.mean(B_data['purchase_TF'])
#     while j < 2400:
#         d = int(B_data['purchase_TF'][j:j+1]) - int(A_data['purchase_TF'][j:j+1])
#         if d == 1:
#             ln = math.log(mean1/mean0)
#         elif d < 1:
#             ln = math.log((1-mean1)/(1-mean0))
#         ln_lambda += ln
#         if ln_lambda <= ln_b:
#             print('this is sample ',i,', trail ',j ,' not reject H0')
#             break
#         elif ln_lambda >= ln_a:
#             print('this is sample ',i,', trail ',j, ' reject H0')
#             break
            
#         j += 1
#     i += 1

In [148]:
a = (1/0.05)
b = (0.2)
((1-mean1)/(1-mean0))

0.9682684528401286

In [149]:
# i = 0
# ln = 0
# while i < 10:
#     ln_lambda = 1
#     j = 0
#     A_data = data[data['Variant']=='A'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*10, axis=None)
#     B_data = data[data['Variant']=='B'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*10, axis=None)
#     while j < 2400:
#         print(int(B_data['purchase_TF'][j:j+1]),int(A_data['purchase_TF'][j:j+1]))
#         d = int(B_data['purchase_TF'][j:j+1]) - int(A_data['purchase_TF'][j:j+1])
#         if int(B_data['purchase_TF'][j:j+1]) == 1 and int(A_data['purchase_TF'][j:j+1]) == 1:
#             ln = (mean1/mean0)
#         elif int(B_data['purchase_TF'][j:j+1]) == 0 and int(A_data['purchase_TF'][j:j+1]) == 0:
#             ln = ((1-mean1)/(1-mean0))
#         elif int(B_data['purchase_TF'][j:j+1]) == 1 and int(A_data['purchase_TF'][j:j+1]) == 0:
#             ln = ((mean1)/(1-mean0))
#         elif int(B_data['purchase_TF'][j:j+1]) == 0 and int(A_data['purchase_TF'][j:j+1]) == 1:
#             ln = ((1-mean1)/(mean0))
#         ln_lambda *= ln
#         print(ln_lambda)
#         if ln_lambda <= b:
#             print('this is sample ',i,', trail ',j ,' not reject H0')
#             break
#         elif ln_lambda >= a:
#             print('this is sample ',i,', trail ',j, ' reject H0')
#             break
            
#         j += 1
#     i += 1

In [150]:
# 如果都是1或者都是0 则h0
# 如果一个B1A0则 mean1/mean0
# 如果b0a1则 

In [151]:
math.log((mean1)/(1-mean0))

-1.5718007240054335

In [None]:
H0: mean(treatment) = mean(control)
H1: >

In [172]:
i = 0
ln = 0
sample_number = 0
while i < 10:
    ln_lambda = 0
    j = 0
    A_data = data[data['Variant']=='A'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*1000, axis=None)
    mean0 = np.mean(A_data['purchase_TF'])
    B_data = data[data['Variant']=='B'].sample(n=sample_size, frac=None, replace=False,  weights=None, random_state=i*1000, axis=None)
    mean1 = np.mean(B_data['purchase_TF'])
    while j < 2900:
        d = int(B_data['purchase_TF'][j:j+1]) 
        if d == 1:
            ln = math.log(mean1/mean0)
        elif d < 1:
            ln = math.log((1-mean1)/(1-mean0))
        ln_lambda += ln
        if ln_lambda <= ln_b:
            print('this is sample ',i,', trail ',j ,' not reject H0')
            print(ln_lambda)
            sample_number += j
            break
        elif ln_lambda >= ln_a:
            print('this is sample ',i,', trail ',j, ' reject H0')
            print(ln_lambda)
            sample_number += j
            break
            
        j += 1
    i += 1

this is sample  0 , trail  2303  reject H0
3.0466561000371057
this is sample  1 , trail  531  reject H0
3.0062362337396453
this is sample  2 , trail  558  reject H0
3.01908049756464
this is sample  3 , trail  856  reject H0
3.019227094190434
this is sample  4 , trail  2294  reject H0
3.099012531390155
this is sample  5 , trail  256  reject H0
3.106548470963303
this is sample  6 , trail  320  reject H0
3.1424214697156443
this is sample  7 , trail  248  reject H0
3.057703358577012
this is sample  8 , trail  814  reject H0
3.0248527553864113
this is sample  9 , trail  463  reject H0
3.030350183012101


In [173]:
sample_number/10

864.3