# Experiment Parameters
Calculate the sample size and duration of marketing experiments

In [44]:
import numpy as np
import pandas as pd
import statsmodels.api as sms
from statsmodels.stats.power import GofChisquarePower


### Two Sample T Test
Used to compare the means of continuous variables (e.g., average time spent on the page).

In [42]:
# Parameters
alpha = 0.05  # Significance level
power = 0.8   # Statistical power

effect_size = 0.05  # Effect size (Cohen's d)

# Calculate sample size
analysis = sms.TTestIndPower()
sample_size = analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power, alternative='two-sided')

# Calculate duration
conversion_rate = 0.05  # Example conversion rate
traffic_per_day = 3000  # Example daily traffic

# Total traffic needed per group
total_traffic_per_group = sample_size / conversion_rate

# Duration in days
duration_days = total_traffic_per_group / traffic_per_day

print(f"Required sample size per group: {int(sample_size)}")
print(f"Total traffic needed per group: {int(total_traffic_per_group)}")
print(f"Duration needed (days): {int(np.ceil(duration_days))}")


Required sample size per group: 6280
Total traffic needed per group: 125600
Duration needed (days): 42


### One-Sided Z Test

Used to compare proportions (e.g., conversion rates) and sample sizes are sufficiently large.

In [32]:
import scipy.stats as stats
import numpy as np

def calculate_sample_size_one_sided(baseline_rate, effect_size, alpha, power):
    """
    Calculate the sample size required for a one-sided Z-test in an A/B test.

    Parameters:
    - baseline_rate: The current conversion rate of the control group.
    - effect_size: The minimum detectable effect (difference in proportions between control and test groups).
    - alpha: The significance level (commonly 0.05).
    - power: The desired power of the test (commonly 0.8).

    Returns:
    - Sample size for each group (number of sessions).
    """
    # Calculate pooled proportion
    pooled_rate = baseline_rate + effect_size / 2

    # Calculate standard deviation of the effect size
    effect_size_std = effect_size / np.sqrt(pooled_rate * (1 - pooled_rate))

    # Calculate Z-scores for the desired significance level and power
    z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # Calculate sample size
    sample_size = ((z_alpha + z_beta) ** 2) * (pooled_rate * (1 - pooled_rate)) / (effect_size ** 2)
    return sample_size

def calculate_test_duration(daily_sessions, sample_size_per_group):
    """
    Calculate the duration required for an A/B test based on daily sessions and required sample size.

    Parameters:
    - daily_sessions: The average number of sessions per day.
    - sample_size_per_group: The required sample size for each group.

    Returns:
    - Duration in days.
    """
    total_sample_size = sample_size_per_group * 2  # Considering both control and test groups
    duration = total_sample_size / daily_sessions
    return duration

# Example usage
baseline_rate = 0.05  # Current conversion rate 
effect_size = 0.005    # Minimum detectable effect (not %)
alpha = 0.05          # Significance level
power = 0.8           # Power of the test

# Calculate sample size
sample_size_per_group = calculate_sample_size_one_sided(baseline_rate, effect_size, alpha, power)
print(f"Required sample size (sessions) for each group: {sample_size_per_group:.2f}")

# Example daily sessions
daily_sessions = 1000  # Average daily sessions

# Calculate test duration
duration = calculate_test_duration(daily_sessions, sample_size_per_group)
print(f"Estimated test duration in days: {duration:.2f}")



Required sample size (sessions) for each group: 12301.74
Estimated test duration in days: 24.60


### Chi Squared Test
Used to compare categorical data, like comparing conversion rates (success/failure).

In [45]:
# Parameters
p1 = 0.10  # Baseline conversion rate
p2 = 0.12  # Expected conversion rate
alpha = 0.05  # Significance level
power = 0.80  # Statistical power

# Calculate effect size
effect_size = np.sqrt(((p1 - p2)**2) / p1)

# Calculate required sample size per group
power_analysis = GofChisquarePower()
sample_size_per_group = power_analysis.solve_power(effect_size=effect_size, nobs=None, alpha=alpha, power=power)

# Calculate total sample size (both groups)
total_sample_size = sample_size_per_group * 2

# Duration calculation
average_daily_visitors = 3000  # Example average number of visitors per day
duration_days = total_sample_size / average_daily_visitors

print(f"Required sample size per group: {sample_size_per_group:.0f}")
print(f"Total sample size (both groups): {total_sample_size:.0f}")
print(f"Duration required: {duration_days:.2f} days")


Required sample size per group: 1962
Total sample size (both groups): 3924
Duration required: 1.31 days
