In [2]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

# Step 1: Define and Split Audience
def split_audience(file_name):
    # Load user data
    user_data = pd.read_csv(file_name)

    # Randomly split users into Group A (Control) and Group B (Treatment)
    np.random.seed(42)
    user_data['group'] = np.random.choice(['Control', 'Treatment'], size=len(user_data), p=[0.5, 0.5])

    # Save datasets for each group
    group_a = user_data[user_data['group'] == 'Control']
    group_b = user_data[user_data['group'] == 'Treatment']

    # Save the groups as CSV files
    group_a.to_csv("group_a_control.csv", index=False)
    group_b.to_csv("group_b_treatment.csv", index=False)

    print(f"Group A (Control) size: {len(group_a)}")
    print(f"Group B (Treatment) size: {len(group_b)}")


# Step 2: Analyze A/B Test Results
def analyze_results(metrics_file):
    # Load engagement metrics
    metrics = pd.read_csv(metrics_file)

    # Calculate CTR for each group
    metrics['CTR'] = (metrics['clicks'] / metrics['total_emails_sent']) * 100

    # Print CTR for both groups
    print("CTR Metrics:")
    print(metrics)

    # Perform Chi-Square Test
    contingency_table = [
        metrics[metrics['group'] == 'Control'][['clicks', 'total_emails_sent']].values[0],
        metrics[metrics['group'] == 'Treatment'][['clicks', 'total_emails_sent']].values[0]
    ]
    chi2, p, _, _ = chi2_contingency(contingency_table)

    print(f"\nChi-Square Statistic: {chi2}, p-value: {p}")

    # Check statistical significance
    if p < 0.05:
        print("\nThe difference in CTR is statistically significant.")
    else:
        print("\nThe difference in CTR is not statistically significant.")


# Example Usage:
# Step 1: Split Audience
# Replace 'your_dataset.csv' with your actual dataset file
split_audience('email_table_with_opened_clicked.csv')

# Step 2: Analyze A/B Test Results
# Replace 'email_engagement_metrics.csv' with your actual metrics file
analyze_results('email_engagement_metrics.csv')

Group A (Control) size: 49934
Group B (Treatment) size: 50066
CTR Metrics:
       group  total_emails_sent  clicks    CTR
0    Control              10000     227   2.27
1  Treatment               5000     500  10.00

Chi-Square Statistic: 381.4514658532239, p-value: 6.011126757549432e-85

The difference in CTR is statistically significant.


# Results Summary

## Group Sizes:
- **Group A (Control):** 49,934 users  
- **Group B (Treatment):** 50,066 users  

## CTR Metrics:
- **Control Group CTR:** 2.27%  
- **Treatment Group CTR:** 10.00%  

## Statistical Analysis:
- **Chi-Square Statistic:** 381.45  
- **p-value:** 6.01  

## Conclusion:
The difference in CTR between the Control and Treatment groups is statistically significant (**p < 0.05**).  
This indicates that the predictive model used for the Treatment group has a substantial positive impact on the click-through rate (CTR).

---