[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wasim/Data-Science/blob/main/data-analyst-roadmap/05_statistics_for_data_analysis/05_ab_testing.ipynb)

# A/B Testing

Make data-driven product decisions.

## What is A/B Testing?
- Compare two versions (A vs B)
- Measure which performs better
- Used in product, marketing, UX
- Statistical decision making

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

sns.set_style('whitegrid')
np.random.seed(42)

## 1. A/B Test Setup

### Steps:
1. Define hypothesis
2. Choose metric
3. Determine sample size
4. Run experiment
5. Analyze results
6. Make decision

## 2. Conversion Rate Test

Compare conversion rates between variants.

In [None]:
# Sample data: Website button test
# Control (A): Blue button
# Treatment (B): Red button

visitors_a = 1000
conversions_a = 120

visitors_b = 1000
conversions_b = 145

# Calculate conversion rates
rate_a = conversions_a / visitors_a
rate_b = conversions_b / visitors_b

print("Control (A - Blue Button):")
print(f"Visitors: {visitors_a}")
print(f"Conversions: {conversions_a}")
print(f"Rate: {rate_a:.2%}")

print("\nTreatment (B - Red Button):")
print(f"Visitors: {visitors_b}")
print(f"Conversions: {conversions_b}")
print(f"Rate: {rate_b:.2%}")

print(f"\nLift: {(rate_b - rate_a)/rate_a:.2%}")

In [None]:
# Visualize
data = pd.DataFrame({
    'Variant': ['A (Blue)', 'B (Red)'],
    'Conversion_Rate': [rate_a, rate_b]
})

plt.figure(figsize=(10, 6))
bars = plt.bar(
    data['Variant'], 
    data['Conversion_Rate'],
    color=['blue', 'red'],
    alpha=0.7
)
plt.ylabel('Conversion Rate')
plt.title('A/B Test Results')
plt.ylim(0, max(data['Conversion_Rate']) * 1.2)

# Add value labels
for bar in bars:
    height = bar.get_height()
    plt.text(
        bar.get_x() + bar.get_width()/2., 
        height,
        f'{height:.2%}',
        ha='center', 
        va='bottom'
    )

plt.show()

In [None]:
# Statistical test (two-proportion z-test)
from statsmodels.stats.proportion import (
    proportions_ztest
)

count = np.array([conversions_a, conversions_b])
nobs = np.array([visitors_a, visitors_b])

z_stat, p_value = proportions_ztest(
    count, nobs, alternative='two-sided'
)

print("Two-Proportion Z-Test")
print(f"H₀: p_A = p_B")
print(f"H₁: p_A ≠ p_B")
print(f"\nz-statistic: {z_stat:.4f}")
print(f"p-value: {p_value:.4f}")

alpha = 0.05
if p_value < alpha:
    print(f"\nResult: Reject H₀ (p < {alpha})")
    print("Conclusion: Significant difference!")
    print(f"Winner: {'B' if rate_b > rate_a else 'A'}")
else:
    print(f"\nResult: Fail to reject H₀ "
          f"(p ≥ {alpha})")
    print("Conclusion: No significant difference")

## 3. Continuous Metric Test

Compare average values (e.g., revenue).

In [None]:
# Sample data: Pricing test
# A: $9.99 price point
# B: $12.99 price point

revenue_a = np.random.normal(100, 20, 500)
revenue_b = np.random.normal(110, 20, 500)

df_revenue = pd.DataFrame({
    'Revenue': np.concatenate([revenue_a, revenue_b]),
    'Variant': ['A']*len(revenue_a) + 
               ['B']*len(revenue_b)
})

print("Summary Statistics:")
print(df_revenue.groupby('Variant')['Revenue'].describe())

In [None]:
# Visualize distributions
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Box plot
sns.boxplot(
    data=df_revenue, 
    x='Variant', 
    y='Revenue',
    ax=axes[0]
)
axes[0].set_title('Revenue Distribution')

# Histogram
axes[1].hist(
    revenue_a, 
    alpha=0.6, 
    label='A', 
    bins=30
)
axes[1].hist(
    revenue_b, 
    alpha=0.6, 
    label='B', 
    bins=30
)
axes[1].set_xlabel('Revenue')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Revenue Distributions')
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
# Perform t-test
t_stat, p_value = stats.ttest_ind(
    revenue_a, revenue_b
)

print("Two-Sample t-Test")
print(f"H₀: μ_A = μ_B")
print(f"H₁: μ_A ≠ μ_B")
print(f"\nt-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

mean_diff = revenue_b.mean() - revenue_a.mean()
print(f"\nMean difference: ${mean_diff:.2f}")

alpha = 0.05
if p_value < alpha:
    print(f"\nResult: Reject H₀ (p < {alpha})")
    print("Conclusion: Significant difference!")
else:
    print(f"\nResult: Fail to reject H₀ "
          f"(p ≥ {alpha})")
    print("Conclusion: No significant difference")

## 4. Sample Size Calculation

Determine how many users needed.

In [None]:
from statsmodels.stats.power import (
    zt_ind_solve_power
)

# Parameters
baseline_rate = 0.10  # 10% conversion
mde = 0.02  # Minimum detectable effect: 2%
alpha = 0.05  # Significance level
power = 0.80  # Statistical power

# Calculate effect size
effect_size = mde / np.sqrt(
    baseline_rate * (1 - baseline_rate)
)

# Calculate sample size per group
sample_size = zt_ind_solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    alternative='two-sided'
)

print("Sample Size Calculation")
print(f"Baseline rate: {baseline_rate:.1%}")
print(f"Minimum detectable effect: {mde:.1%}")
print(f"Significance level (α): {alpha}")
print(f"Power (1-β): {power}")
print(f"\nSample size per group: "
      f"{int(np.ceil(sample_size))}")
print(f"Total sample size: "
      f"{int(np.ceil(sample_size * 2))}")

## 5. Confidence Intervals

Estimate range of true effect.

In [None]:
# Calculate confidence interval for difference
from statsmodels.stats.proportion import (
    confint_proportions_2indep
)

ci_low, ci_high = confint_proportions_2indep(
    conversions_a, visitors_a,
    conversions_b, visitors_b,
    method='wald'
)

print("95% Confidence Interval")
print(f"Difference in conversion rates:")
print(f"Point estimate: "
      f"{(rate_b - rate_a):.4f}")
print(f"CI: [{ci_low:.4f}, {ci_high:.4f}]")

if ci_low > 0:
    print("\nInterpretation: B is significantly "
          "better")
elif ci_high < 0:
    print("\nInterpretation: A is significantly "
          "better")
else:
    print("\nInterpretation: No clear winner")

## 6. Sequential Testing

Monitor test progress over time.

In [None]:
# Simulate test over time
days = 14
daily_visitors = 100

results = []
cumulative_a_conv = 0
cumulative_a_vis = 0
cumulative_b_conv = 0
cumulative_b_vis = 0

for day in range(1, days + 1):
    # Simulate daily results
    a_conv = np.random.binomial(
        daily_visitors, 0.10
    )
    b_conv = np.random.binomial(
        daily_visitors, 0.12
    )
    
    cumulative_a_conv += a_conv
    cumulative_a_vis += daily_visitors
    cumulative_b_conv += b_conv
    cumulative_b_vis += daily_visitors
    
    rate_a = cumulative_a_conv / cumulative_a_vis
    rate_b = cumulative_b_conv / cumulative_b_vis
    
    # Test significance
    if cumulative_a_vis > 50:  # Min sample
        _, p = proportions_ztest(
            [cumulative_a_conv, cumulative_b_conv],
            [cumulative_a_vis, cumulative_b_vis]
        )
    else:
        p = 1.0
    
    results.append({
        'Day': day,
        'Rate_A': rate_a,
        'Rate_B': rate_b,
        'p_value': p
    })

df_results = pd.DataFrame(results)
df_results.head(10)

In [None]:
# Visualize progress
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

# Conversion rates over time
axes[0].plot(
    df_results['Day'], 
    df_results['Rate_A'],
    'o-', 
    label='A'
)
axes[0].plot(
    df_results['Day'], 
    df_results['Rate_B'],
    's-', 
    label='B'
)
axes[0].set_xlabel('Day')
axes[0].set_ylabel('Conversion Rate')
axes[0].set_title('Conversion Rates Over Time')
axes[0].legend()
axes[0].grid(True)

# p-value over time
axes[1].plot(
    df_results['Day'], 
    df_results['p_value'],
    'o-'
)
axes[1].axhline(
    y=0.05, 
    color='r', 
    linestyle='--',
    label='α = 0.05'
)
axes[1].set_xlabel('Day')
axes[1].set_ylabel('p-value')
axes[1].set_title('Statistical Significance Over Time')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## 7. Common Pitfalls

### ❌ Mistakes to Avoid:
1. **Peeking** - Checking results too early
2. **Small samples** - Not enough data
3. **Multiple testing** - Testing many variants
4. **Selection bias** - Non-random assignment
5. **Stopping early** - Declaring winner too soon

## Practice Exercises

### Exercise 1
Calculate required sample size for 
detecting 5% lift in conversion rate.

In [None]:
# Your code here


### Exercise 2
Run A/B test on email subject lines 
and determine winner.

In [None]:
# Your code here


## Key Takeaways

✅ **Hypothesis** - Define before testing  
✅ **Sample size** - Calculate upfront  
✅ **Randomization** - Ensure fair assignment  
✅ **Significance** - Use p-value < 0.05  
✅ **Confidence intervals** - Estimate effect  
✅ **Avoid peeking** - Wait for full sample  

**Next:** [README](README.md) →