# Task 3: Event Impact Modeling

## Objective
Model how events (policies, product launches, infrastructure investments) affect financial inclusion indicators.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Add src to path
sys.path.append(str(Path('../src').resolve()))

# Import impact modeling module
from task3_impact_modeling import (
    EventImpactModeler,
    ImpactEstimate,
    ImpactDirection,
    EffectType
)

# Set up paths
data_dir = Path('../data/raw')
processed_dir = Path('../data/processed')
figure_dir = Path('../reports/figures')

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Initialize impact modeler
modeler = EventImpactModeler(
    data_file=data_dir / 'ethiopia_fi_unified_data.xlsx',
    logger=logger,
    figure_dir=figure_dir
)

logger.info("Impact modeling notebook initialized successfully")

## 1. Understand the Impact Data

In [None]:
# Load data
data_df, impact_links_df, events_df = modeler.load_data()

print("=== DATA OVERVIEW ===")
print(f"Total records: {len(data_df)}")
print(f"Impact links: {len(impact_links_df)}")
print(f"Events: {len(events_df)}")

print("\n=== IMPACT LINKS STRUCTURE ===")
print(impact_links_df.head(10))
print(f"\nColumns: {list(impact_links_df.columns)}")

print("\n=== EVENTS STRUCTURE ===")
print(events_df.head(10))
print(f"\nColumns: {list(events_df.columns)}")

In [None]:
# Join impact links with events
joined_data = modeler.join_impact_with_events()

print("=== JOINED DATA ===")
print(f"Total joined records: {len(joined_data)}")
print("\nSample of joined data:")
print(joined_data.head(10))

In [None]:
# Create impact summary
summary = modeler.summarize_impacts()

print("=== IMPACT SUMMARY ===")
print(f"Total impact links: {summary['total_links']}")
print(f"Unique events: {summary['unique_events']}")
print(f"Unique indicators: {summary['unique_indicators']}")

print(f"\nBy Direction:")
for direction, count in summary['by_direction'].items():
    print(f"  {direction}: {count}")

print(f"\nBy Pillar:")
for pillar, count in summary['by_pillar'].items():
    print(f"  {pillar}: {count}")

print(f"\nSample Event-Indicator Relationships:")
for item in summary['events_affecting_indicators'][:10]:
    print(f"  Event {item['parent_id']} -> {item['related_indicator']}: "
          f"{item['impact_direction']} {item['impact_magnitude']:.2f} "
          f"(lag: {item['lag_months']:.0f} months)")

## 2. Build Event-Indicator Association Matrix

In [None]:
# Define key indicators
key_indicators = [
    'ACC_OWNERSHIP',  # Account ownership
    'ACC_MM_ACCOUNT',  # Mobile money account
    'USG_DIGITAL_PAYMENT',  # Digital payment usage
    # Add other key indicators from your dataset
]

# Build association matrix
association_matrix = modeler.build_association_matrix(key_indicators=key_indicators)

print("=== ASSOCIATION MATRIX ===")
print(f"Shape: {association_matrix.shape}")
print(f"Events: {len(association_matrix)}")
print(f"Indicators: {len(association_matrix.columns)}")

print("\nMatrix Preview:")
print(association_matrix.head(10))

In [None]:
# Visualize association matrix
fig = modeler.visualize_association_matrix(save=True)
plt.show()

## 3. Model Event Effects Over Time

In [None]:
# Example: Model different effect types
event_date = datetime(2021, 5, 1)  # Example: Telebirr launch

# Immediate effect
immediate_effect = modeler.model_effect_over_time(
    event_date=event_date,
    magnitude=5.0,
    lag_months=0,
    effect_type="immediate",
    duration_months=12
)

# Gradual effect
gradual_effect = modeler.model_effect_over_time(
    event_date=event_date,
    magnitude=5.0,
    lag_months=0,
    effect_type="gradual",
    duration_months=12
)

# Delayed effect
delayed_effect = modeler.model_effect_over_time(
    event_date=event_date,
    magnitude=5.0,
    lag_months=3,
    effect_type="delayed",
    duration_months=12
)

# Visualize effect types
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(immediate_effect.index, immediate_effect.values, label='Immediate', marker='o')
ax.plot(gradual_effect.index, gradual_effect.values, label='Gradual', marker='s')
ax.plot(delayed_effect.index, delayed_effect.values, label='Delayed (3mo lag)', marker='^')
ax.axvline(x=event_date, color='red', linestyle='--', alpha=0.5, label='Event Date')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Effect Magnitude', fontsize=12)
ax.set_title('Event Effect Types Over Time', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(figure_dir / 'effect_types.png', dpi=300, bbox_inches='tight')
plt.show()

## 4. Combine Effects from Multiple Events

In [None]:
# Example: Combine effects from multiple events
event1_effect = modeler.model_effect_over_time(
    event_date=datetime(2021, 5, 1),
    magnitude=3.0,
    effect_type="gradual",
    duration_months=24
)

event2_effect = modeler.model_effect_over_time(
    event_date=datetime(2022, 8, 1),
    magnitude=2.0,
    effect_type="immediate",
    duration_months=24
)

# Combine effects
combined_additive = modeler.combine_event_effects([event1_effect, event2_effect], method="additive")

# Visualize
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(event1_effect.index, event1_effect.values, label='Event 1', alpha=0.7)
ax.plot(event2_effect.index, event2_effect.values, label='Event 2', alpha=0.7)
ax.plot(combined_additive.index, combined_additive.values, label='Combined (Additive)', linewidth=2)
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Effect Magnitude', fontsize=12)
ax.set_title('Combining Effects from Multiple Events', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(figure_dir / 'combined_effects.png', dpi=300, bbox_inches='tight')
plt.show()

## 5. Test Model Against Historical Data

In [None]:
# Validate Telebirr launch (May 2021)
# Mobile money accounts: 4.7% (2021) to 9.45% (2024)
telebirr_validation = modeler.validate_against_historical(
    event_name='Telebirr',
    indicator_code='ACC_MM_ACCOUNT',
    observed_before=4.7,
    observed_after=9.45,
    event_date=datetime(2021, 5, 1)
)

print("=== TELEBIRR VALIDATION ===")
print(f"Event: {telebirr_validation['event_name']}")
print(f"Indicator: {telebirr_validation['indicator_code']}")
print(f"Observed Before: {telebirr_validation['observed_before']:.2f}%")
print(f"Observed After: {telebirr_validation['observed_after']:.2f}%")
print(f"Observed Change: {telebirr_validation['observed_change']:.2f}pp")
print(f"Observed Change %: {telebirr_validation['observed_change_pct']:.2f}%")
print(f"Predicted Impact: {telebirr_validation['predicted_impact']:.2f}pp" if telebirr_validation['predicted_impact'] else "Predicted Impact: N/A")
if telebirr_validation['difference'] is not None:
    print(f"Difference: {telebirr_validation['difference']:.2f}pp")
    print(f"Alignment: {telebirr_validation['alignment']}")

# Store validation result
modeler.model.validation_results['telebirr'] = telebirr_validation

## 6. Refine Impact Estimates

In [None]:
# Refine estimates based on validation
# Example: If Telebirr validation shows different impact than model, refine it

# Find Telebirr event ID
telebirr_event_id = None
if 'category' in events_df.columns:
    telebirr_events = events_df[events_df['category'].str.contains('Telebirr', case=False, na=False)]
    if len(telebirr_events) > 0:
        telebirr_event_id = telebirr_events.iloc[0]['id'] if 'id' in telebirr_events.columns else None

if telebirr_event_id:
    # Refine based on observed change (4.75pp over 3 years â‰ˆ 1.58pp per year)
    observed_annual_impact = telebirr_validation['observed_change'] / 3  # Approximate annual impact
    modeler.refine_estimates(
        event_id=str(telebirr_event_id),
        indicator_code='ACC_MM_ACCOUNT',
        new_magnitude=observed_annual_impact,
        reason=f"Adjusted based on observed change: {telebirr_validation['observed_change']:.2f}pp over 3 years"
    )
    
    print(f"Refined Telebirr impact estimate to {observed_annual_impact:.2f}pp per year")
else:
    print("Telebirr event ID not found in events data")

## 7. Review Comparable Country Evidence

For events where Ethiopian pre/post data is insufficient, use documented impacts from similar contexts.

In [None]:
# Example: Add impact estimates from comparable countries
# This is a placeholder - replace with actual research findings

comparable_evidence = [
    {
        'event_type': 'mobile_money_launch',
        'country': 'Kenya',
        'indicator': 'ACC_MM_ACCOUNT',
        'impact': 15.0,  # Example: 15pp increase
        'timeframe': '2 years',
        'source': 'M-Pesa Kenya case study'
    },
    {
        'event_type': 'digital_payment_policy',
        'country': 'Tanzania',
        'indicator': 'USG_DIGITAL_PAYMENT',
        'impact': 8.0,  # Example: 8pp increase
        'timeframe': '1 year',
        'source': 'Tanzania digital payment policy analysis'
    }
]

print("=== COMPARABLE COUNTRY EVIDENCE ===")
for evidence in comparable_evidence:
    print(f"\n{evidence['event_type']} ({evidence['country']}):")
    print(f"  Indicator: {evidence['indicator']}")
    print(f"  Impact: {evidence['impact']:.1f}pp")
    print(f"  Timeframe: {evidence['timeframe']}")
    print(f"  Source: {evidence['source']}")
    print(f"  Note: Adjust for Ethiopia context (economic conditions, infrastructure, etc.)")

## 8. Document Methodology

In [None]:
# Document methodology
methodology = modeler.document_methodology()
print(methodology)

# Add assumptions
modeler.add_assumptions([
    "Effects are linear within the modeled period",
    "Events are independent (no interaction effects)",
    "Lag periods are fixed as specified in impact_links",
    "Magnitudes are point estimates (uncertainty not modeled)",
    "Comparable country evidence adjusted for Ethiopia context"
])

# Add limitations
modeler.add_limitations([
    "Limited historical data for validation",
    "No interaction effects between events modeled",
    "Uncertainty in impact estimates not quantified",
    "Comparable country evidence may not directly apply",
    "Time-varying effects not fully captured",
    "Market saturation effects not modeled"
])

print("\n=== ASSUMPTIONS ===")
for i, assumption in enumerate(modeler.model.assumptions, 1):
    print(f"{i}. {assumption}")

print("\n=== LIMITATIONS ===")
for i, limitation in enumerate(modeler.model.limitations, 1):
    print(f"{i}. {limitation}")

## 9. Model Summary

In [None]:
# Get model summary
summary = modeler.get_model_summary()

print("=== IMPACT MODEL SUMMARY ===")
print(f"Association Matrix Shape: {summary['association_matrix_shape']}")
print(f"Total Impact Estimates: {summary['total_impact_estimates']}")
print(f"Validation Results: {summary['validation_results_count']}")
print(f"Assumptions: {summary['assumptions_count']}")
print(f"Limitations: {summary['limitations_count']}")

# Display final association matrix
print("\n=== FINAL ASSOCIATION MATRIX ===")
print(modeler.model.association_matrix)

# Save association matrix to CSV
if modeler.model.association_matrix is not None:
    output_file = processed_dir / 'association_matrix.csv'
    modeler.model.association_matrix.to_csv(output_file)
    print(f"\nAssociation matrix saved to {output_file}")