# Geo-Experiment Playground

This notebook provides an interactive environment for experimenting with the geo-experiment evaluation framework.

## Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import our modules
from data_simulation.generators import SimpleNullGenerator, DataConfig
from assignment.methods import RandomAssignment
from reporting.models import MeanMatchingModel
from evaluation.metrics import EvaluationRunner, EvaluationConfig
from diagnostics.plots import DiagnosticPlotter
from pipeline.runner import ExperimentRunner
from pipeline.config import ExperimentConfig

# Set style
plt.style.use('default')
sns.set_palette('husl')

print("✅ All modules imported successfully!")

## Quick Start: Single Experiment

Let's start with a simple single experiment to understand the framework.

In [None]:
# Create a simple configuration
config = ExperimentConfig(
    n_geos=20,
    n_days=60,
    pre_period_days=40,
    eval_period_days=20,
    seed=42
)

# Run a single experiment
runner = ExperimentRunner(config)
results = runner.run_single_experiment(show_plots=True)

print(f"\n📊 Single Experiment Results:")
print(f"iROAS Estimate: {results['iroas_estimate']:.4f}")
print(f"95% CI: [{results['iroas_ci'][0]:.4f}, {results['iroas_ci'][1]:.4f}]")
print(f"CI Width: {results['ci_width']:.4f}")
print(f"Significant: {results['significant']}")

## Data Generation Experiments

Let's experiment with different data generation parameters.

In [None]:
# Generate data with different noise levels
low_noise_config = DataConfig(
    n_geos=30,
    n_days=90,
    daily_sales_noise=100,  # Low noise
    seed=123
)

high_noise_config = DataConfig(
    n_geos=30,
    n_days=90,
    daily_sales_noise=1000,  # High noise
    seed=123
)

# Generate both datasets
low_noise_gen = SimpleNullGenerator(low_noise_config)
high_noise_gen = SimpleNullGenerator(high_noise_config)

panel_low, features_low = low_noise_gen.generate()
panel_high, features_high = high_noise_gen.generate()

# Compare variability
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Plot time series for first geo
geo_low = panel_low[panel_low['geo'] == 'geo_000']
geo_high = panel_high[panel_high['geo'] == 'geo_000']

axes[0].plot(geo_low['date'], geo_low['sales'], label='Low Noise', alpha=0.8)
axes[0].plot(geo_high['date'], geo_high['sales'], label='High Noise', alpha=0.8)
axes[0].set_title('Sales Time Series Comparison (geo_000)')
axes[0].set_ylabel('Sales')
axes[0].legend()
axes[0].tick_params(axis='x', rotation=45)

# Plot distributions
axes[1].hist(panel_low['sales'], alpha=0.6, label='Low Noise', bins=30)
axes[1].hist(panel_high['sales'], alpha=0.6, label='High Noise', bins=30)
axes[1].set_title('Sales Distribution Comparison')
axes[1].set_xlabel('Sales')
axes[1].set_ylabel('Frequency')
axes[1].legend()

plt.tight_layout()
plt.show()

print(f"Low noise std: {panel_low['sales'].std():.2f}")
print(f"High noise std: {panel_high['sales'].std():.2f}")

## Assignment Method Testing

Test different assignment strategies and their balance.

In [None]:
# Generate test data
data_config = DataConfig(n_geos=50, n_days=60, seed=42)
generator = SimpleNullGenerator(data_config)
panel_data, geo_features = generator.generate()

# Test random assignment with different ratios
assignment_method = RandomAssignment()
plotter = DiagnosticPlotter()

ratios = [0.3, 0.5, 0.7]
fig, axes = plt.subplots(1, len(ratios), figsize=(15, 4))

for i, ratio in enumerate(ratios):
    assignment_df = assignment_method.assign(geo_features, treatment_ratio=ratio, seed=42)
    
    # Plot balance for key features
    merged = geo_features.merge(assignment_df, on='geo')
    
    sns.boxplot(data=merged, x='assignment', y='base_sales', ax=axes[i])
    axes[i].set_title(f'Treatment Ratio: {ratio}')
    
    # Print summary
    treatment_count = (assignment_df['assignment'] == 'treatment').sum()
    print(f"Ratio {ratio}: {treatment_count} treatment, {len(assignment_df) - treatment_count} control")

plt.tight_layout()
plt.show()

## Model Performance Comparison

Run a small evaluation to see how methods perform.

In [None]:
# Quick evaluation with small number of simulations
quick_config = ExperimentConfig(
    n_geos=25,
    n_days=50,
    pre_period_days=35,
    eval_period_days=15,
    n_simulations=20,  # Small for quick testing
    n_bootstrap=100,
    seed=42
)

runner = ExperimentRunner(quick_config)
detailed_results, summary_results = runner.run_full_evaluation(verbose=True)

# Plot results
fig = runner.plot_results(detailed_results)
plt.show()

## Custom Experiments

This section is for your own experiments and testing new ideas.

In [None]:
# 🧪 Experiment with different parameter combinations

# Example: How does the number of geos affect CI width?
geo_counts = [10, 25, 50, 100]
ci_widths = []

for n_geos in geo_counts:
    config = ExperimentConfig(
        n_geos=n_geos,
        n_days=60,
        pre_period_days=40,
        eval_period_days=20,
        n_simulations=10,  # Small for speed
        seed=42
    )
    
    runner = ExperimentRunner(config)
    detailed_results, _ = runner.run_full_evaluation(verbose=False)
    
    avg_ci_width = detailed_results['ci_width'].mean()
    ci_widths.append(avg_ci_width)
    
    print(f"n_geos={n_geos}: avg CI width = {avg_ci_width:.3f}")

# Plot relationship
plt.figure(figsize=(8, 5))
plt.plot(geo_counts, ci_widths, 'o-', linewidth=2, markersize=8)
plt.xlabel('Number of Geos')
plt.ylabel('Average CI Width')
plt.title('CI Width vs Number of Geos')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# 🎯 Your experiments here!

# Ideas to try:
# 1. Effect of noise level on false positive rates
# 2. Optimal pre-period vs evaluation period lengths
# 3. Impact of treatment ratio on power
# 4. Bootstrap sample size vs CI stability

# Example template:
# config = ExperimentConfig(
#     n_geos=...,
#     n_days=...,
#     # ... other parameters
# )
# runner = ExperimentRunner(config)
# results = runner.run_single_experiment(show_plots=True)

print("🚀 Ready for your experiments!")

## Development Notes

Use this section for notes, debugging, and development work.

In [None]:
# Development and debugging space

# Quick data validation
config = DataConfig(n_geos=5, n_days=10, seed=999)
gen = SimpleNullGenerator(config)
panel, features = gen.generate()

print("Panel data sample:")
print(panel.head())
print(f"\nPanel shape: {panel.shape}")
print(f"Features shape: {features.shape}")
print(f"Validation: {gen.validate_data(panel, features)}")