In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Simulate data for 10,000 users
n_users = 10000

# 50% users see old design, 50% see new design
group = np.random.choice(['A', 'B'], size=n_users)

# Simulate conversion rates (5% for old design, 6.5% for new design)
conversion_rate_A = 0.05
conversion_rate_B = 0.065

# Assign conversions
conversions = np.where(
    (group == 'A') & (np.random.rand(n_users) < conversion_rate_A), 1,
    np.where((group == 'B') & (np.random.rand(n_users) < conversion_rate_B), 1, 0)
)

# Create DataFrame
data = pd.DataFrame({'Group': group, 'Converted': conversions})

# Display first few rows
print(data.head())

  Group  Converted
0     A          0
1     B          0
2     A          0
3     A          0
4     A          0


In [2]:
# Check the distribution of groups
print(data['Group'].value_counts())

# Check conversion rates per group
conversion_rates = data.groupby('Group')['Converted'].mean()
print(conversion_rates)

Group
A    5013
B    4987
Name: count, dtype: int64
Group
A    0.050070
B    0.063365
Name: Converted, dtype: float64


In [3]:
from statsmodels.stats.proportion import proportions_ztest

# Count conversions in each group
conversions_A = data[data['Group'] == 'A']['Converted'].sum()
conversions_B = data[data['Group'] == 'B']['Converted'].sum()

# Total users in each group
n_A = data[data['Group'] == 'A'].shape[0]
n_B = data[data['Group'] == 'B'].shape[0]

# Perform two-proportion z-test
count = np.array([conversions_A, conversions_B])
nobs = np.array([n_A, n_B])
stat, pval = proportions_ztest(count, nobs)

print(f"Z-Statistic: {stat:.4f}")
print(f"P-Value: {pval:.4f}")

Z-Statistic: -2.8743
P-Value: 0.0040


A Z-statistic and p-value. If the p-value is less than 0.05, we reject the null hypothesis.

In [4]:
count

array([251, 316])

In [5]:
nobs

array([5013, 4987])