### T-test to check distribution

In [None]:
from statsmodels.stats.proportion import proportions_ztest
import pandas as pd

# Create a DataFrame with the provided data
data = {
    'Category': ['Never Use Coupon', 'Buy 1-3 Categories', 'Buy 50+ Categories'],
    'Customer Count Proportion 0.1%': [0.484003, 0.013765, 0.083333],
    'Profit Proportion 0.1%': [0.146583, 0.003421, 0.451492],
    'Customer Count Proportion 0.5%': [0.489452, 0.012824, 0.075960],
    'Profit Proportion 0.5%': [0.142310, 0.002219, 0.443786]
}

df = pd.DataFrame(data)

# We need to assume a total number of customers to convert proportions to counts
# Let's assume there are 1000 customers in each group for demonstration purposes
n_customers = 1000

# Calculate the observed counts of customers from the proportions
df['Customer Count 0.1%'] = df['Customer Count Proportion 0.1%'] * n_customers
df['Customer Count 0.5%'] = df['Customer Count Proportion 0.5%'] * n_customers

# Extract the counts for the category 'Buy 50+ Categories' to perform the z-test
count = df.loc[df['Category'] == 'Buy 50+ Categories', ['Customer Count 0.1%', 'Customer Count 0.5%']].values.flatten()
nobs = [n_customers, n_customers]  # The number of observations in each sample

# Perform the two-proportion z-test
stat, pval = proportions_ztest(count, nobs)

stat, pval


In [None]:
# Initialize a list to hold the z-test results
ztest_results = []

# There are three comparisons to make:
# 1. Never Use Coupon
# 2. Buy 1-3 Categories
# 3. Buy 50+ Categories

# Loop through each category to perform the z-tests
for category in df['Category']:
    # Extract the counts for the current category to perform the z-test
    count = df.loc[df['Category'] == category, ['Customer Count 0.1%', 'Customer Count 0.5%']].values.flatten()
    
    # Perform the two-proportion z-test
    stat, pval = proportions_ztest(count, nobs)
    
    # Append the results to our list
    ztest_results.append((category, stat, pval))

# Convert the z-test results to a DataFrame
output_df = pd.DataFrame(ztest_results, columns=['Category', 'Z-Score', 'P-Value'])

output_df
