# Enhanced A/B Test Analysis with Business Insights

## Comprehensive analysis of A/B test results with data visualizations and actionable business recommendations

**Author:** Enhanced Analysis  
**Date:** 2024  
**Objective:** Determine if the new product feature should be implemented based on statistical evidence and business impact

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.proportion import proportions_ztest, proportion_confint
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
try:
    plt.style.use('seaborn-v0_8')
except:
    plt.style.use('seaborn')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Data Loading and Exploration

In [None]:
# Load the dataset
df = pd.read_csv('data/ab_testing.csv')

print(f"Dataset Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst 5 rows:")
df.head()

In [None]:
# Basic data exploration
print("Dataset Info:")
df.info()
print("\nMissing Values:")
print(df.isnull().sum())
print("\nGroup Distribution:")
print(df['Group'].value_counts())
print("\nConversion Distribution:")
print(df['Conversion'].value_counts())

## 2. Basic Conversion Metrics

In [None]:
# Convert Yes/No to 1/0 for analysis
df['Conversion_Binary'] = df['Conversion'].map({'Yes': 1, 'No': 0})

# Calculate group-wise statistics
group_stats = df.groupby('Group').agg({
    'Conversion_Binary': ['count', 'sum', 'mean'],
    'Page Views': ['mean', 'median', 'std'],
    'Time Spent': ['mean', 'median', 'std']
}).round(4)

print("Group-wise Statistics:")
group_stats

In [None]:
# Extract key metrics
group_a = df[df['Group'] == 'A']['Conversion_Binary']
group_b = df[df['Group'] == 'B']['Conversion_Binary']

group_a_conversions = group_a.sum()
group_a_total = group_a.count()
group_a_rate = group_a.mean()

group_b_conversions = group_b.sum()
group_b_total = group_b.count()
group_b_rate = group_b.mean()

lift = ((group_b_rate - group_a_rate) / group_a_rate) * 100

print(f"Group A Conversion Rate: {group_a_rate:.4f} ({group_a_rate*100:.2f}%)")
print(f"Group B Conversion Rate: {group_b_rate:.4f} ({group_b_rate*100:.2f}%)")
print(f"Relative Lift: {lift:.2f}%")

## 3. Statistical Hypothesis Testing

In [None]:
# Perform Z-test for proportions
count = [group_a_conversions, group_b_conversions]
nobs = [group_a_total, group_b_total]

stat, pval = proportions_ztest(count, nobs)

# Calculate confidence intervals
ci_a = proportion_confint(group_a_conversions, group_a_total, alpha=0.05)
ci_b = proportion_confint(group_b_conversions, group_b_total, alpha=0.05)

# Effect size (Cohen's h)
cohens_h = 2 * (np.arcsin(np.sqrt(group_b_rate)) - np.arcsin(np.sqrt(group_a_rate)))

print(f"Z-statistic: {stat:.4f}")
print(f"P-value: {pval:.6f}")
print(f"Cohen's h (Effect Size): {cohens_h:.4f}")
print(f"Group A 95% CI: [{ci_a[0]:.4f}, {ci_a[1]:.4f}]")
print(f"Group B 95% CI: [{ci_b[0]:.4f}, {ci_b[1]:.4f}]")

# Statistical significance
alpha = 0.05
is_significant = pval < alpha
print(f"\nStatistically Significant (α = 0.05): {'YES' if is_significant else 'NO'}")

## 4. Data Visualizations

In [None]:
# Create comprehensive visualizations
fig = plt.figure(figsize=(20, 15))

# 1. Conversion Rate Comparison
ax1 = plt.subplot(2, 3, 1)
groups = ['Group A (Control)', 'Group B (Treatment)']
rates = [group_a_rate*100, group_b_rate*100]
colors = ['#FF6B6B', '#4ECDC4']

bars = ax1.bar(groups, rates, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
ax1.set_ylabel('Conversion Rate (%)', fontsize=12)
ax1.set_title('Conversion Rate Comparison', fontsize=14, fontweight='bold')
ax1.set_ylim(0, max(rates) * 1.2)

# Add value labels on bars
for bar, rate in zip(bars, rates):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
            f'{rate:.2f}%', ha='center', va='bottom', fontweight='bold')

# 2. Sample Size Comparison
ax2 = plt.subplot(2, 3, 2)
sample_sizes = [group_a_total, group_b_total]
bars2 = ax2.bar(groups, sample_sizes, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
ax2.set_ylabel('Sample Size', fontsize=12)
ax2.set_title('Sample Size Distribution', fontsize=14, fontweight='bold')

for bar, size in zip(bars2, sample_sizes):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 10,
            f'{size:,}', ha='center', va='bottom', fontweight='bold')

# 3. Confidence Intervals
ax3 = plt.subplot(2, 3, 3)
ci_lower = [ci_a[0]*100, ci_b[0]*100]
ci_upper = [ci_a[1]*100, ci_b[1]*100]

ax3.errorbar(groups, rates, 
            yerr=[np.array(rates) - np.array(ci_lower), 
                  np.array(ci_upper) - np.array(rates)],
            fmt='o', capsize=10, capthick=2, markersize=8, color='darkblue')
ax3.set_ylabel('Conversion Rate (%)', fontsize=12)
ax3.set_title('95% Confidence Intervals', fontsize=14, fontweight='bold')
ax3.grid(True, alpha=0.3)

# 4. Page Views Distribution
ax4 = plt.subplot(2, 3, 4)
group_a_data = df[df['Group'] == 'A']['Page Views']
group_b_data = df[df['Group'] == 'B']['Page Views']
ax4.boxplot([group_a_data, group_b_data], labels=['Group A', 'Group B'])
ax4.set_title('Page Views Distribution by Group', fontsize=14, fontweight='bold')
ax4.set_xlabel('Group', fontsize=12)
ax4.set_ylabel('Page Views', fontsize=12)

# 5. Time Spent Distribution
ax5 = plt.subplot(2, 3, 5)
group_a_time = df[df['Group'] == 'A']['Time Spent']
group_b_time = df[df['Group'] == 'B']['Time Spent']
ax5.boxplot([group_a_time, group_b_time], labels=['Group A', 'Group B'])
ax5.set_title('Time Spent Distribution by Group', fontsize=14, fontweight='bold')
ax5.set_xlabel('Group', fontsize=12)
ax5.set_ylabel('Time Spent (seconds)', fontsize=12)

# 6. Conversion by Device Type
ax6 = plt.subplot(2, 3, 6)
device_conversion = pd.crosstab(df['Device'], [df['Group'], df['Conversion']], normalize='index')
device_conversion.plot(kind='bar', ax=ax6, color=['lightcoral', 'lightblue', 'lightgreen', 'lightyellow'])
ax6.set_title('Conversion Rate by Device Type', fontsize=14, fontweight='bold')
ax6.set_xlabel('Device Type', fontsize=12)
ax6.set_ylabel('Conversion Rate', fontsize=12)
ax6.legend(title='Group & Conversion', bbox_to_anchor=(1.05, 1), loc='upper left')
ax6.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 5. Detailed Analysis

In [None]:
# Geographic Analysis
geo_analysis = df.groupby(['Location', 'Group'])['Conversion_Binary'].agg(['count', 'sum', 'mean']).reset_index()
geo_pivot = geo_analysis.pivot(index='Location', columns='Group', values='mean')

plt.figure(figsize=(12, 6))
geo_pivot.plot(kind='bar', color=['#FF6B6B', '#4ECDC4'])
plt.title('Conversion Rate by Geographic Location', fontsize=16, fontweight='bold')
plt.ylabel('Conversion Rate', fontsize=12)
plt.xticks(rotation=45)
plt.legend(title='Group')
plt.tight_layout()
plt.show()

print("Geographic Performance Analysis:")
for location in geo_pivot.index:
    a_rate = geo_pivot.loc[location, 'A'] * 100
    b_rate = geo_pivot.loc[location, 'B'] * 100
    geo_lift = ((b_rate - a_rate) / a_rate) * 100 if a_rate > 0 else 0
    print(f"• {location}: A={a_rate:.1f}%, B={b_rate:.1f}% (Lift: {geo_lift:+.1f}%)")

In [None]:
# Device Performance Analysis
device_analysis = df.groupby(['Device', 'Group'])['Conversion_Binary'].mean().unstack()

plt.figure(figsize=(10, 6))
device_analysis.plot(kind='bar', color=['#FF6B6B', '#4ECDC4'])
plt.title('Conversion Rate by Device Type', fontsize=16, fontweight='bold')
plt.ylabel('Conversion Rate', fontsize=12)
plt.xlabel('Device Type', fontsize=12)
plt.xticks(rotation=0)
plt.legend(title='Group')
plt.tight_layout()
plt.show()

print("Device Performance Analysis:")
for device in device_analysis.index:
    a_rate = device_analysis.loc[device, 'A'] * 100
    b_rate = device_analysis.loc[device, 'B'] * 100
    device_lift = ((b_rate - a_rate) / a_rate) * 100 if a_rate > 0 else 0
    print(f"• {device}: A={a_rate:.1f}%, B={b_rate:.1f}% (Lift: {device_lift:+.1f}%)")

## 6. Business Insights & Recommendations

In [None]:
# Generate comprehensive business insights
print("="*80)
print("BUSINESS INSIGHTS & RECOMMENDATIONS")
print("="*80)

# Calculate key business metrics
absolute_lift = group_b_rate - group_a_rate
relative_lift = lift

# Statistical significance interpretation
significance_level = "STATISTICALLY SIGNIFICANT" if is_significant else "NOT STATISTICALLY SIGNIFICANT"

print(f"📊 EXECUTIVE SUMMARY")
print(f"{'='*50}")
print(f"• Test Duration: Based on {group_a_total + group_b_total:,} total users")
print(f"• Control Group (A): {group_a_rate*100:.2f}% conversion rate")
print(f"• Treatment Group (B): {group_b_rate*100:.2f}% conversion rate")
print(f"• Absolute Lift: {absolute_lift*100:.2f} percentage points")
print(f"• Relative Lift: {relative_lift:.2f}%")
print(f"• Statistical Significance: {significance_level}")
print(f"• P-value: {pval:.6f}")

print(f"\n🎯 BUSINESS IMPACT ANALYSIS")
print(f"{'='*50}")

if is_significant:
    if relative_lift > 0:
        impact = "POSITIVE"
        recommendation = "IMPLEMENT"
        color = "🟢"
    else:
        impact = "NEGATIVE"
        recommendation = "DO NOT IMPLEMENT"
        color = "🔴"
else:
    impact = "INCONCLUSIVE"
    recommendation = "CONTINUE TESTING"
    color = "🟡"

print(f"{color} Impact Assessment: {impact}")
print(f"{color} Recommendation: {recommendation}")

In [None]:
# Detailed recommendations
print(f"\n📈 DETAILED RECOMMENDATIONS")
print(f"{'='*50}")

if is_significant and relative_lift > 0:
    print("✅ RECOMMENDATION: IMPLEMENT THE NEW FEATURE")
    print("\nReasons:")
    print(f"• Statistically significant improvement of {relative_lift:.2f}%")
    print(f"• P-value ({pval:.6f}) is below significance threshold (0.05)")
    print(f"• Effect size (Cohen's h = {cohens_h:.3f}) indicates meaningful difference")
    
    # Calculate potential business value
    monthly_users = 10000  # Assumption for business calculation
    additional_conversions = monthly_users * absolute_lift
    print(f"\n💰 PROJECTED BUSINESS VALUE:")
    print(f"• With {monthly_users:,} monthly users, expect {additional_conversions:.0f} additional conversions")
    print(f"• This represents a {relative_lift:.1f}% improvement in conversion rate")

elif is_significant and relative_lift < 0:
    print("❌ RECOMMENDATION: DO NOT IMPLEMENT THE NEW FEATURE")
    print("\nReasons:")
    print(f"• Statistically significant decrease of {abs(relative_lift):.2f}%")
    print(f"• The new feature is harming conversion rates")
    print(f"• Consider investigating what aspects of the feature are causing the negative impact")

else:
    print("⚠️ RECOMMENDATION: CONTINUE TESTING OR REDESIGN")
    print("\nReasons:")
    print(f"• Results are not statistically significant (p-value = {pval:.6f})")
    print(f"• Need larger sample size or longer test duration")
    print(f"• Consider improving the feature design before retesting")

## 7. Save Results

In [None]:
# Create comprehensive results summary
detailed_results = {
    'Metric': [
        'Group A Sample Size', 'Group B Sample Size',
        'Group A Conversions', 'Group B Conversions', 
        'Group A Conversion Rate', 'Group B Conversion Rate',
        'Absolute Lift', 'Relative Lift (%)',
        'Z-Statistic', 'P-Value', 'Cohens h',
        'Group A CI Lower', 'Group A CI Upper',
        'Group B CI Lower', 'Group B CI Upper',
        'Statistical Significance', 'Recommendation'
    ],
    'Value': [
        group_a_total, group_b_total,
        group_a_conversions, group_b_conversions,
        f"{group_a_rate:.4f}", f"{group_b_rate:.4f}",
        f"{group_b_rate - group_a_rate:.4f}",
        f"{lift:.2f}%",
        f"{stat:.4f}", f"{pval:.6f}",
        f"{cohens_h:.4f}",
        f"{ci_a[0]:.4f}", f"{ci_a[1]:.4f}",
        f"{ci_b[0]:.4f}", f"{ci_b[1]:.4f}",
        'Yes' if is_significant else 'No',
        'Implement' if is_significant and lift > 0 else 'Do Not Implement'
    ]
}

results_df = pd.DataFrame(detailed_results)
results_df.to_csv('results/comprehensive_ab_results.csv', index=False)

print("💾 Results saved to: results/comprehensive_ab_results.csv")
results_df

## Conclusion

This comprehensive A/B test analysis provides:

1. **Statistical rigor** with proper hypothesis testing and confidence intervals
2. **Business context** with actionable recommendations and projected impact
3. **Detailed insights** across different user segments (device, geography)
4. **Visual analysis** to communicate findings effectively
5. **Risk assessment** to understand the reliability of results

The analysis goes beyond simple statistical significance to provide meaningful business insights that can drive product decisions.