# Power and Conversion Funnel

When running experiments on conversion optimiztion one can either measure the impact on overall conversion rate or alternatively measure the impact on CTR throughout the several steps in the fubnnel. Here, I want to explore what the difference in power is for these methods

In [1]:
import numpy as np

from statsmodels.stats.power import TTestPower

In [2]:
def get_power(mean: float, nobs: int, alternative='two-sided', alpha: float=0.5):
    """
    Based on bernoulli distribution
    """
    t = TTestPower()
    return t.power(
           effect_size = mean / np.sqrt(mean * (1 - mean)),
        nobs=nobs,
        alpha=alpha,
        alternative=alternative
    )

In [3]:
get_power(mean=.1, nobs=10)

0.6859282400085758

In [4]:
from statsmodels.stats.power import tt_ind_solve_power

help(tt_ind_solve_power)

Help on method solve_power in module statsmodels.stats.power:

solve_power(effect_size=None, nobs1=None, alpha=None, power=None, ratio=1.0, alternative='two-sided') method of statsmodels.stats.power.TTestIndPower instance
    solve for any one parameter of the power of a two sample t-test
    
    for t-test the keywords are:
        effect_size, nobs1, alpha, power, ratio
    
    exactly one needs to be ``None``, all others need numeric values
    
    Parameters
    ----------
    effect_size : float
        standardized effect size, difference between the two means divided
        by the standard deviation. `effect_size` has to be positive.
    nobs1 : int or float
        number of observations of sample 1. The number of observations of
        sample two is ratio times the size of sample 1,
        i.e. ``nobs2 = nobs1 * ratio``
    alpha : float in interval (0,1)
        significance level, e.g. 0.05, is the probability of a type I
        error, that is wrong rejections if the 

In [52]:
from typing import List
def analyze_conversion_funnel_experiment(
    nobs: int,
    funnel_ctr: List[float],
    funnel_performance: List[float],
    alpha=.05
):
    """
    nobs: number of total users in test
    funnel_means: list of clickthrough rates in the funnel,
        the length implicitly defines the number of steps in the funnel
    funnel_performance: list of relative increase in ctr per funnel step,
        e.g. [.1, .1, .1] means for every step in the funnel there is an increase of 10% in ctr
    """
    share = .5

    nobs1 = int(nobs * (1 - share))
    nobs2 = int(nobs * (share))
    
    funnel_ctr1 = funnel_ctr
    funnel_ctr2 = [ctr * (1 + funnel_performance[i]) for i, ctr in enumerate(funnel_ctr1)]
    
    def get_funnel_metrics(nobs, funnel_ctr):
        conversion_rate = 1  # conversion rate from topline to funnel step
        conversion_funnel_steps = []  # conversion rates from topline to step in funnel
        users_converted = []  # estimated total number of users that get up to funnel step

        for step_conversion in funnel_ctr:
            conversion_rate *= step_conversion 
            conversion_funnel_steps.append(conversion_rate)
            users_converted.append(int(round(conversion_rate * nobs, 0)))
    
        return conversion_funnel_steps, users_converted

    conversion_funnel_steps1, users_converted1 = get_funnel_metrics(nobs1, funnel_ctr1)
    conversion_funnel_steps2, users_converted2 = get_funnel_metrics(nobs2, funnel_ctr2)
    
    power_conversion_rate_topline_to_step = [
        round(tt_ind_solve_power(
            nobs1 = nobs1,
            ratio = nobs2 / nobs1,
            alpha = alpha,
            effect_size = (cr2 - cr1) / (
                    np.sqrt(.5 * (cr2 * (1 - cr2) + cr1 * (1 - cr1)))
            )
        ), 3) for cr1, cr2 in zip(conversion_funnel_steps1, conversion_funnel_steps2)
    ]
    
    def get_user_at_funnel_start(variant, step):
        return (users_converted1[step - 1] if step > 0 else nobs1) if variant == 1 else (
            users_converted2[step - 1] if step > 0 else nobs2
        )
    
    power_ctr_by_step = [
        round(tt_ind_solve_power(
            nobs1 = get_user_at_funnel_start(1, i),
            ratio = get_user_at_funnel_start(2, i) / get_user_at_funnel_start(1, i),
            alpha = alpha,
            effect_size = (ctr2 - ctr1) / (
                    np.sqrt(.5 * (ctr2 * (1 - ctr2) + ctr1 * (1 - ctr1)))
            )
        ), 3) for i, (ctr1, ctr2) in enumerate(zip(funnel_ctr1, funnel_ctr2))
    ]
        
    return {
        "users1": nobs1,
        "users2": nobs2,
        "expected_users_per_step1": users_converted1,
        "expected_users_per_step2": users_converted2,
        "funnel_ctr1": funnel_ctr1,
        "funnel_ctr2": funnel_ctr2,
        "conversion_rate_topline_to_step1": conversion_funnel_steps1,
        "conversion_rate_topline_to_step2": conversion_funnel_steps2,
        "power_conversion_rate_topline_to_step": power_conversion_rate_topline_to_step,
        "power_ctr_by_step": power_ctr_by_step,
    }


    
analyze_conversion_funnel_experiment(
    nobs=40000,
    funnel_ctr=[.5, .1, .1],
    funnel_performance=[.0, .0, .2],
)  # example validated with external power calculator



{'users1': 20000,
 'users2': 20000,
 'expected_users_per_step1': [10000, 1000, 100],
 'expected_users_per_step2': [10000, 1000, 120],
 'funnel_ctr1': [0.5, 0.1, 0.1],
 'funnel_ctr2': [0.5, 0.1, 0.12],
 'conversion_rate_topline_to_step1': [0.5, 0.05, 0.005000000000000001],
 'conversion_rate_topline_to_step2': [0.5, 0.05, 0.006],
 'power_conversion_rate_topline_to_step': [0.05, 0.05, 0.272],
 'power_ctr_by_step': [0.05, 0.05, 0.298]}

In [85]:
analyze_conversion_funnel_experiment(
    nobs=10000000,
    funnel_ctr=[.01, .2, .5],
    funnel_performance=[.0, .0, .02],
)

{'users1': 5000000,
 'users2': 5000000,
 'expected_users_per_step1': [50000, 10000, 5000],
 'expected_users_per_step2': [50000, 10000, 5100],
 'funnel_ctr1': [0.01, 0.2, 0.5],
 'funnel_ctr2': [0.01, 0.2, 0.51],
 'conversion_rate_topline_to_step1': [0.01, 0.002, 0.001],
 'conversion_rate_topline_to_step2': [0.01, 0.002, 0.00102],
 'power_conversion_rate_topline_to_step': [0.05, 0.05, 0.169],
 'power_ctr_by_step': [0.05, 0.05, 0.293]}

In [77]:
analyze_conversion_funnel_experiment(
    nobs=4000000,
    funnel_ctr=[.1, .02, .3],
    funnel_performance=[.0, .1, .0],
)

{'users1': 2000000,
 'users2': 2000000,
 'expected_users_per_step1': [200000, 4000, 1200],
 'expected_users_per_step2': [200000, 4400, 1320],
 'funnel_ctr1': [0.1, 0.02, 0.3],
 'funnel_ctr2': [0.1, 0.022000000000000002, 0.3],
 'conversion_rate_topline_to_step1': [0.1, 0.002, 0.0006],
 'conversion_rate_topline_to_step2': [0.1, 0.0022, 0.00066],
 'power_conversion_rate_topline_to_step': [0.05, 0.992, 0.667],
 'power_ctr_by_step': [0.05, 0.993, 0.05]}