## Frequentist statistics approach

#### Case 1: Target Metric is Conversion (Chi-squared test)

In [33]:
import numpy as np
import scipy.stats as stats

# data
control_conversions = 100
control_users = 1000
variant_conversions = 130
variant_users = 1000

# Create contingency table
observed = np.array([[control_conversions, control_users - control_conversions],
                     [variant_conversions, variant_users - variant_conversions]])

# Calculate expected frequencies
row_totals = np.sum(observed, axis=1)
col_totals = np.sum(observed, axis=0)
total = np.sum(observed)
expected = np.outer(row_totals, col_totals) / total

# Calculate chi-squared statistic
chi2_statistic = np.sum((observed - expected)**2 / expected)

# Calculate degrees of freedom
degrees_of_freedom = (observed.shape[0] - 1) * (observed.shape[1] - 1)

# Calculate p-value
p_value = 1 - stats.chi2.cdf(chi2_statistic, degrees_of_freedom)

# Display key info and check significance
print(f"Contingency table:\n {observed}\n")
print(f"Control conversion: {control_conversions / control_users * 100}%")
print(f"Variant conversion: {variant_conversions / variant_users * 100}%\n")
print(f"chi2-statistic: {round(chi2_statistic,3)}")
print(f"p-value: {round(p_value,3)}\n")
if p_value < 0.05:
    print("✅ Statistically significant difference")
else:
    print("❌ No statistically significant difference")

Contingency table:
 [[100 900]
 [130 870]]

Control conversion: 10.0%
Variant conversion: 13.0%

chi2-statistic: 4.422
p-value: 0.035

✅ Statistically significant difference


#### Case 2: Target Metric is ARPU

In [51]:
import numpy as np
import scipy.stats as stats

# Sample data for control group (revenue per user)
control_revenue_per_user = np.array([10, 20, 30, 15, 25])  # Sample revenue per user for control group
control_users = len(control_revenue_per_user)

# Sample data for variant group (revenue per user)
variant_revenue_per_user = np.array([20, 30, 50, 35, 45])  # Sample revenue per user for variant group
variant_users = len(variant_revenue_per_user)

# Calculate mean and standard deviation for revenue per user
control_mean = np.mean(control_revenue_per_user)
control_std = np.std(control_revenue_per_user, ddof=1)  # Use ddof=1 for sample standard deviation
variant_mean = np.mean(variant_revenue_per_user)
variant_std = np.std(variant_revenue_per_user, ddof=1)  # Use ddof=1 for sample standard deviation

# Perform two-sample t-test
t_statistic, p_value = stats.ttest_ind_from_stats(mean1=control_mean, std1=control_std, nobs1=control_users,
                                                  mean2=variant_mean, std2=variant_std, nobs2=variant_users)

# Display key info and check significance
print(f"Control ARPU: ${round(control_mean,2)}")
print(f"Variant ARPU: ${round(variant_mean,2)}\n")
print(f"Control standard deviation: ${round(control_std,2)}")
print(f"Variant standard deviation: ${round(variant_std,2)}\n")
print(f"t-statistic: {round(t_statistic,3)}")
print(f"p-value: {round(p_value,3)}\n")
if p_value < 0.05:
    print("✅ Statistically significant difference")
else:
    print("❌ No statistically significant difference")


Control ARPU: $20.0
Variant ARPU: $36.0

Control standard deviation: $7.91
Variant standard deviation: $11.94

t-statistic: -2.499
p-value: 0.037

✅ Statistically significant difference


## Bayesian statistics approach

#### Case 1: Target Metric is Conversion¶

In [4]:
import numpy as np
import pymc3 as pm

# Data for conversion rate
control_conversions = 500
control_users = 1000
variant_conversions = 550
variant_users = 1000

# Define Bayesian model for conversion rate
with pm.Model() as conversion_model:
    # Define priors
    control_conversions_prior = pm.Beta('control_conversions_prior', alpha=1, beta=1)
    variant_conversions_prior = pm.Beta('variant_conversions_prior', alpha=1, beta=1)

    # Define likelihood
    control_conversions_observed = pm.Binomial('control_conversions_observed', n=control_users, p=control_conversions_prior, observed=control_conversions)
    variant_conversions_observed = pm.Binomial('variant_conversions_observed', n=variant_users, p=variant_conversions_prior, observed=variant_conversions)

    # Define treatment effect
    treatment_effect_conversion = pm.Deterministic('treatment_effect_conversion', variant_conversions_prior - control_conversions_prior)

    # Sample from posterior distribution
    trace_conversion = pm.sample(1000, tune=1000, cores=1)

# Analyze posterior samples
pm.summary(trace_conversion)

# Calculate probability that variant beats control
conversion_diff = trace_conversion['treatment_effect_conversion']
prob_variant_beats_control = np.mean(conversion_diff > 0)
print("Probability that variant beats control:", prob_variant_beats_control)


  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [variant_conversions_prior, control_conversions_prior]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 30 seconds.
Got error No model on context stack. trying to find log_likelihood in translation.


Probability that variant beats control: 0.986


#### Case 2: Target Metric is ARPU

In [7]:
import numpy as np
import pymc3 as pm

# Example data for ARPU
control_revenues = [95, 110, 105, 90, 100, 115, 105, 120, 95, 110]
variant_revenues = [105, 100, 115, 120, 110, 100, 105, 110, 115, 100]

# Calculate ARPU
control_arpu = np.mean(control_revenues)
variant_arpu = np.mean(variant_revenues)

# Define Bayesian model for ARPU
with pm.Model() as arpu_model:
    # Define priors
    control_arpu_prior = pm.Normal('control_arpu_prior', mu=control_arpu, sd=20)
    variant_arpu_prior = pm.Normal('variant_arpu_prior', mu=variant_arpu, sd=20)

    # Define likelihood
    control_arpu_observed = pm.Normal('control_arpu_observed', mu=control_arpu_prior, sd=20, observed=control_arpu)
    variant_arpu_observed = pm.Normal('variant_arpu_observed', mu=variant_arpu_prior, sd=20, observed=variant_arpu)

    # Define treatment effect
    treatment_effect_arpu = pm.Deterministic('treatment_effect_arpu', variant_arpu_prior - control_arpu_prior)

    # Sample from posterior distribution
    trace_arpu = pm.sample(1000, tune=1000, cores=1)

# Analyze posterior samples
pm.summary(trace_arpu)

# Calculate probability that variant beats control
arpu_diff = trace_arpu['treatment_effect_arpu']
prob_variant_beats_control = np.mean(arpu_diff > 0)
print("Probability that variant beats control:", prob_variant_beats_control)


  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
  variables = ufunc(*ufunc_args, **ufunc_kwargs)
  variables = ufunc(*ufunc_args, **ufunc_kwargs)
Sequential sampling (2 chains in 1 job)
NUTS: [variant_arpu_prior, control_arpu_prior]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 13 seconds.
Got error No model on context stack. trying to find log_likelihood in translation.


Probability that variant beats control: 0.578
