# Experiment Parameters
Calculate the sample size and duration of marketing experiments

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sms
from statsmodels.stats.power import GofChisquarePower


### Two Sample T Test
Used to compare the means of continuous variables (e.g., average time spent on the page).

In [7]:
from statsmodels.stats.power import TTestIndPower


# Parameters
alpha = 0.05  # Significance level
power = 0.8   # Statistical power

effect_size = 0.05  # Effect size (Cohen's d)

# Calculate sample size
analysis = TTestIndPower()
sample_size = analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power, alternative='two-sided')

# Calculate duration
conversion_rate = (2783/211299)  # Example conversion rate
traffic_per_day = (211299/30.5)  # Example daily traffic

# Total traffic needed per group
total_traffic_per_group = sample_size / conversion_rate

# Duration in days
duration_days = total_traffic_per_group / traffic_per_day

print(f"Required sample size per group: {int(sample_size)}")
print(f"Total traffic needed per group: {int(total_traffic_per_group)}")
print(f"Duration needed (days): {int(np.ceil(duration_days))}")


Required sample size per group: 6280
Total traffic needed per group: 476812
Duration needed (days): 69


### One-Sided Z Test

Used to compare proportions (e.g., conversion rates) and sample sizes are sufficiently large.

In [3]:
import scipy.stats as stats
import numpy as np

def calculate_sample_size_one_sided(baseline_rate, effect_size, alpha, power):
    """
    Calculate the sample size required for a one-sided Z-test in an A/B test.

    Parameters:
    - baseline_rate: The current conversion rate of the control group.
    - effect_size: The minimum detectable effect (difference in proportions between control and test groups).
    - alpha: The significance level (commonly 0.05).
    - power: The desired power of the test (commonly 0.8).

    Returns:
    - Sample size for each group (number of sessions).
    """
    # Calculate pooled proportion
    pooled_rate = baseline_rate + effect_size / 2

    # Calculate standard deviation of the effect size
    effect_size_std = effect_size / np.sqrt(pooled_rate * (1 - pooled_rate))

    # Calculate Z-scores for the desired significance level and power
    z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # Calculate sample size
    sample_size = ((z_alpha + z_beta) ** 2) * (pooled_rate * (1 - pooled_rate)) / (effect_size ** 2)
    return sample_size

def calculate_test_duration(daily_sessions, sample_size_per_group):
    """
    Calculate the duration required for an A/B test based on daily sessions and required sample size.

    Parameters:
    - daily_sessions: The average number of sessions per day.
    - sample_size_per_group: The required sample size for each group.

    Returns:
    - Duration in days.
    """
    total_sample_size = sample_size_per_group * 2  # Considering both control and test groups
    duration = total_sample_size / daily_sessions
    return duration

# Example usage
baseline_rate = (2783/211299)  # Current conversion rate 
effect_size = 0.005    # Minimum detectable effect (not %)
alpha = 0.05          # Significance level
power = 0.8           # Power of the test

# Calculate sample size
sample_size_per_group = calculate_sample_size_one_sided(baseline_rate, effect_size, alpha, power)
print(f"Required sample size (sessions) for each group: {sample_size_per_group:.2f}")

# Example daily sessions
daily_sessions = 1000  # Average daily sessions

# Calculate test duration
duration = calculate_test_duration(daily_sessions, sample_size_per_group)
print(f"Estimated test duration in days: {duration:.2f}")



Required sample size (sessions) for each group: 3814.72
Estimated test duration in days: 7.63


### Two Sided Z Test

To test the difference between two metrics. 

In [7]:
import statsmodels.stats.api as sms
from statsmodels.stats.proportion import proportion_effectsize

# Parameters
alpha = 0.05  # Significance level
power = 0.8  # Power of the test
p1 = 0.0044  # Baseline CTR (e.g., 5%)
p2 = p1*1.1  # Expected CTR after change (e.g., 6%)

# Calculate effect size
effect_size = proportion_effectsize(p1, p2)

# Perform sample size calculation
power_analysis = sms.NormalIndPower()
sample_size = power_analysis.solve_power(effect_size=effect_size, 
                                         power=power, 
                                         alpha=alpha, 
                                         ratio=1)  # Ratio is 1 for 50/50 traffic split

# Calculate test duration 
daily_traffic = (150238/30.5)
duration = (sample_size*2)/daily_traffic

print(f"Required sample size per group: {sample_size:.0f}")
print("Requred test duration: ", duration)


Required sample size per group: 372663
Requred test duration:  151.30961613059867


### Chi Squared Test (Goodness of Fit) - Sample Size and Duration
It’s applied when you want to compare the distribution of categorical data to a theoretical distribution. For example, testing whether a dice is fair by comparing the observed outcomes of rolls to the expected equal probability for each face of the dice.

In [4]:
# Parameters
p1 = 0.145  # Baseline conversion rate
p2 = p1*1.05  # Expected conversion rate
alpha = 0.05  # Significance level
power = 0.80  # Statistical power

# Calculate effect size
effect_size = np.sqrt(((p1 - p2)**2) / p1)

# Calculate required sample size per group
power_analysis = GofChisquarePower()
sample_size_per_group = power_analysis.solve_power(effect_size=effect_size, nobs=None, alpha=alpha, power=power)

# Calculate total sample size (both groups)
total_sample_size = sample_size_per_group * 2

# Duration calculation
average_daily_visitors = (27492/30.5)  # Example average number of visitors per day
duration_days = total_sample_size / average_daily_visitors

print("Effect size: ", effect_size)
print(f"Required sample size per group: {sample_size_per_group:.0f}")
print(f"Total sample size (both groups): {total_sample_size:.0f}")
print(f"Duration required: {duration_days:.2f} days")


Effect size:  0.01903943276465979
Required sample size per group: 21652
Total sample size (both groups): 43304
Duration required: 48.04 days


### Chi Squared Test of Independence - Sample Size and Duration
Used to compare two samples of the same categorical variable to see if there is a significant difference. 

In [8]:
from statsmodels.stats.power import NormalIndPower
from statsmodels.stats.proportion import proportion_effectsize

# Parameters for the A/B test
alpha = 0.05  # Significance level
power = 0.8   # Power of the test
metric_A = 0.44  # CTR for Ad A (control group)
metric_B = metric_A*1.1  # CTR for Ad B (expected improvement)
metric_avg_per_day = ((514+96)/30.5)

# Calculate effect size (Cohen's h for proportions)
effect_size = proportion_effectsize(metric_A, metric_B)

# Create an instance of NormalIndPower
analysis = NormalIndPower()

# Calculate required sample size
sample_size = analysis.solve_power(effect_size, power=power, alpha=alpha, ratio=1)

# Calculate duration
duration = (sample_size*2)/metric_avg_per_day

print(f"Required sample size per group: {round(sample_size)}")
print(f"Total sample size needed: {round(sample_size) * 2}")
print(f"Test duration: ", duration)

Required sample size per group: 2014
Total sample size needed: 4028
Test duration:  201.40418216561673


### Chi Squared Test of Independence - Sample Size and Duration
Use this test when you want to evaluate whether two separate categorical variables are related or not. For example, testing if gender (male/female) is independent of voting preference (party A/party B).

In [7]:
from statsmodels.stats.power import GofChisquarePower

# Define parameters for sample size calculation
effect_size = 0.1  # Small effect size
alpha = 0.05      # Significance level
power = 0.8       # Desired power

# Create a power analysis object
power_analysis = GofChisquarePower()

# Calculate the required sample size
sample_size = power_analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power)
print(f"Required sample size: {sample_size}")

# Define traffic parameters for duration estimation
daily_traffic = (27492/30.5) # Number of visitors per day

# Calculate the estimated duration
duration_days = sample_size / daily_traffic
print(f"Required Sample Size: {sample_size}")
print(f"Estimated duration: {duration_days} days")


Required sample size: 784.8860506688932
Required Sample Size: 784.8860506688932
Estimated duration: 0.870763296428097 days
