In [1]:
from ab_testing.stratification import stratified_ttest, get_stratified_statistics

In [2]:
help(stratified_ttest)

Help on function stratified_ttest in module ab_testing.stratification:

stratified_ttest(base: <built-in function array>, variant: <built-in function array>, strata_base: <built-in function array>, strata_variant: <built-in function array>, weights: Dict, alternative: str = 'two-sided') -> Tuple[float, float]



In [19]:
from scipy.stats import bernoulli
import numpy as np
from tqdm import tqdm
"""

"""

def generate_sample(n, base_rate):
    base = bernoulli(p=base_rate).rvs(n)
    return base

def get_dummy_predictions(conversions, true_positive_rate, false_positive_rate):
    return np.array([
        bernoulli(p=true_positive_rate).rvs(1) if x == 1 else bernoulli(p=false_positive_rate).rvs(1)
            for x in tqdm(conversions)
    ]).reshape(-1)

def generate_lift(conversions, lift):
    base_rate = conversions.mean()
    needed_delta = len(conversions) * base_rate * lift
    bump_rate = needed_delta / (len(conversions) - conversions.sum())
    lifted = np.array([bernoulli(p=bump_rate).rvs(1)[0] if x == 0 else 1 for x in tqdm(conversions)])
    return lifted


def simulate_stratified_test(n, base_rate, lift, tp, fp):
    base = generate_sample(n=n, base_rate=base_rate)
    preds_base = get_dummy_predictions(base, tp, fp)
    variant = generate_sample(n=n, base_rate=base_rate)
    preds_variant = get_dummy_predictions(variant, tp, fp)
    variant = generate_lift(variant, lift)
    weight = preds_base.mean()
    weights = {0: 1 - weight, 1: weight}

    p, _ = stratified_ttest(base, variant, base * 0, variant * 0, {0: 1})

    print(f"p value without stratification: {p}")


    p_strat, _ = stratified_ttest(
        base,
        variant,
        preds_base,
        preds_variant,
        weights = weights
    )

    print(f"p value with stratification: {p_strat}")
    
tp = .99
fp = .01
lift = .1
n=2000
base_rate=.1
simulate_stratified_test(n, base_rate, lift, tp, fp)

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 2536.26it/s]
100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 3036.92it/s]
100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 3426.63it/s]

p value without stratification: 0.8800920256451636
p value with stratification: 0.0009716210158559058





In [30]:
simulate_stratified_test(5000, .3, .05, .9, .01)

100%|█████████████████████████████████████| 5000/5000 [00:01<00:00, 3039.00it/s]
100%|█████████████████████████████████████| 5000/5000 [00:01<00:00, 3048.28it/s]
100%|█████████████████████████████████████| 5000/5000 [00:01<00:00, 4362.17it/s]


p value without stratification: 0.01345825652235244
p value with stratification: 1.2777179748457712e-05


In [41]:
simulate_stratified_test(10000, .03, .05, .999, .001)

100%|███████████████████████████████████| 10000/10000 [00:03<00:00, 3051.93it/s]
100%|███████████████████████████████████| 10000/10000 [00:03<00:00, 3085.89it/s]
100%|███████████████████████████████████| 10000/10000 [00:03<00:00, 3177.61it/s]

p value without stratification: 0.02314888960123862
p value with stratification: 0.00922523069489678



