# Task 4: Forecasting Access and Usage

## Objective
Forecast Account Ownership (Access) and Digital Payment Usage for 2025-2027.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Add src to path
sys.path.append(str(Path('../src').resolve()))

# Import forecasting module
from task4_forecasting import (
    FinancialInclusionForecaster,
    ForecastMethod,
    Scenario
)

# Set up paths
data_dir = Path('../data/raw')
processed_dir = Path('../data/processed')
figure_dir = Path('../reports/figures')

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Initialize forecaster
# Optionally load association matrix if available
association_matrix_file = processed_dir / 'association_matrix.csv' if (processed_dir / 'association_matrix.csv').exists() else None

forecaster = FinancialInclusionForecaster(
    data_file=data_dir / 'ethiopia_fi_unified_data.xlsx',
    impact_model_file=association_matrix_file,
    logger=logger,
    figure_dir=figure_dir
)

logger.info("Forecasting notebook initialized successfully")

## 1. Define Targets

- **Account Ownership Rate (Access)**: % of adults with account at financial institution or mobile money
- **Digital Payment Usage**: % of adults who made or received digital payment

In [None]:
# Define target indicators
TARGET_INDICATORS = {
    'ACC_OWNERSHIP': {
        'name': 'Account Ownership',
        'pillar': 'access',
        'description': '% of adults with account at financial institution or mobile money'
    },
    'USG_DIGITAL_PAYMENT': {
        'name': 'Digital Payment Usage',
        'pillar': 'usage',
        'description': '% of adults who made or received digital payment'
    }
}

# Forecast years
FORECAST_YEARS = [2025, 2026, 2027]

print("=== TARGET INDICATORS ===")
for code, info in TARGET_INDICATORS.items():
    print(f"\n{code} - {info['name']}")
    print(f"  Pillar: {info['pillar']}")
    print(f"  Description: {info['description']}")

print(f"\nForecast Period: {FORECAST_YEARS}")

## 2. Extract Historical Data

In [None]:
# Load data
data_df, observations_df = forecaster.load_data()

# Extract time series for target indicators
historical_series = {}

for indicator_code, info in TARGET_INDICATORS.items():
    series = forecaster.extract_indicator_series(
        indicator_code=indicator_code,
        pillar=info['pillar']
    )
    
    if len(series) > 0:
        historical_series[indicator_code] = series
        print(f"\n=== {info['name']} ({indicator_code}) ===")
        print(f"Data points: {len(series)}")
        print(series)
    else:
        print(f"\nWarning: No data found for {indicator_code}")

# Store in model
forecaster.model.historical_data = pd.concat(historical_series.values()) if historical_series else pd.DataFrame()

## 3. Generate Forecasts

### 3.1 Baseline: Trend Continuation

In [None]:
# Generate baseline forecasts using linear trend
baseline_forecasts = {}

for indicator_code, info in TARGET_INDICATORS.items():
    if indicator_code in historical_series:
        series = historical_series[indicator_code]
        
        # Try linear trend first
        linear_forecast = forecaster.linear_trend_forecast(
            series=series,
            forecast_years=FORECAST_YEARS,
            confidence_level=0.95
        )
        
        if linear_forecast:
            baseline_forecasts[indicator_code] = linear_forecast
            forecaster.model.forecasts.append(linear_forecast)
            
            print(f"\n=== {info['name']} - Linear Trend Forecast ===")
            print(f"R² Score: {linear_forecast.r2_score:.3f}" if linear_forecast.r2_score else "R² Score: N/A")
            print(f"RMSE: {linear_forecast.rmse:.3f}" if linear_forecast.rmse else "RMSE: N/A")
            print(f"\nForecasts:")
            for i, year in enumerate(linear_forecast.forecast_years):
                print(f"  {year}: {linear_forecast.forecast_values[i]:.2f}% "
                      f"[{linear_forecast.confidence_intervals_lower[i]:.2f}, "
                      f"{linear_forecast.confidence_intervals_upper[i]:.2f}]")

### 3.2 With Events: Event-Augmented Model

In [None]:
# Generate event-augmented forecasts
event_augmented_forecasts = {}

for indicator_code, info in TARGET_INDICATORS.items():
    if indicator_code in historical_series:
        series = historical_series[indicator_code]
        
        event_forecast = forecaster.event_augmented_forecast(
            series=series,
            forecast_years=FORECAST_YEARS,
            association_matrix=forecaster.association_matrix,
            confidence_level=0.95
        )
        
        if event_forecast:
            event_augmented_forecasts[indicator_code] = event_forecast
            
            print(f"\n=== {info['name']} - Event-Augmented Forecast ===")
            if 'event_impact' in event_forecast.model_params:
                print(f"Event Impact: {event_forecast.model_params['event_impact']:.2f}pp")
                print(f"Events Count: {event_forecast.model_params['events_count']}")
            print(f"\nForecasts:")
            for i, year in enumerate(event_forecast.forecast_years):
                print(f"  {year}: {event_forecast.forecast_values[i]:.2f}% "
                      f"[{event_forecast.confidence_intervals_lower[i]:.2f}, "
                      f"{event_forecast.confidence_intervals_upper[i]:.2f}]")

### 3.3 Scenarios: Optimistic, Base, Pessimistic

In [None]:
# Generate scenario forecasts
scenario_forecasts = {}

for indicator_code, info in TARGET_INDICATORS.items():
    # Use event-augmented as base if available, otherwise baseline
    base_forecast = event_augmented_forecasts.get(indicator_code) or baseline_forecasts.get(indicator_code)
    
    if base_forecast:
        scenarios = forecaster.scenario_forecast(
            base_forecast=base_forecast,
            optimistic_multiplier=1.15,  # 15% above base
            pessimistic_multiplier=0.85   # 15% below base
        )
        
        scenario_forecasts[indicator_code] = scenarios
        
        print(f"\n=== {info['name']} - Scenario Forecasts ===")
        for scenario_name, forecast in scenarios.items():
            print(f"\n{scenario_name.upper()} Scenario:")
            for i, year in enumerate(forecast.forecast_years):
                print(f"  {year}: {forecast.forecast_values[i]:.2f}%")

## 4. Visualize Forecasts

In [None]:
# Visualize baseline forecasts
for indicator_code, info in TARGET_INDICATORS.items():
    if indicator_code in baseline_forecasts:
        forecast = baseline_forecasts[indicator_code]
        historical = historical_series.get(indicator_code)
        
        fig = forecaster.visualize_forecast(
            forecast=forecast,
            historical_data=historical,
            title=f"{info['name']} - Baseline Forecast",
            save=True
        )
        plt.show()

In [None]:
# Visualize scenario forecasts
for indicator_code, info in TARGET_INDICATORS.items():
    if indicator_code in scenario_forecasts:
        scenarios = scenario_forecasts[indicator_code]
        historical = historical_series.get(indicator_code)
        
        fig = forecaster.visualize_scenarios(
            scenarios=scenarios,
            historical_data=historical,
            title=f"{info['name']} - Scenario Forecasts",
            save=True
        )
        plt.show()

## 5. Forecast Table with Confidence Intervals

In [None]:
# Create forecast table
all_forecasts = []

# Add baseline forecasts
for forecast in baseline_forecasts.values():
    all_forecasts.append(forecast)

# Add event-augmented forecasts
for forecast in event_augmented_forecasts.values():
    all_forecasts.append(forecast)

# Add scenario forecasts
for scenarios in scenario_forecasts.values():
    for forecast in scenarios.values():
        all_forecasts.append(forecast)

# Create table
forecast_table = forecaster.create_forecast_table(all_forecasts, save=True)

print("=== FORECAST TABLE ===")
print(forecast_table.to_string(index=False))

# Display summary table
summary_table = forecast_table.pivot_table(
    index=['Indicator', 'Year'],
    columns='Scenario',
    values='Forecast',
    aggfunc='first'
).fillna(0)

print("\n=== FORECAST SUMMARY TABLE ===")
print(summary_table)

## 6. Interpret Results

### 6.1 Model Predictions

In [None]:
print("=== MODEL PREDICTIONS ===")

for indicator_code, info in TARGET_INDICATORS.items():
    if indicator_code in baseline_forecasts:
        forecast = baseline_forecasts[indicator_code]
        print(f"\n{info['name']} ({indicator_code}):")
        print(f"  Current (latest): {historical_series[indicator_code]['value'].iloc[-1]:.2f}%")
        print(f"  Forecast 2025: {forecast.forecast_values[0]:.2f}%")
        print(f"  Forecast 2027: {forecast.forecast_values[2]:.2f}%")
        print(f"  Projected Change (2024-2027): "
              f"{forecast.forecast_values[2] - historical_series[indicator_code]['value'].iloc[-1]:.2f}pp")

### 6.2 Events with Largest Potential Impact

In [None]:
# Identify events with largest potential impact
if forecaster.association_matrix is not None:
    print("=== EVENTS WITH LARGEST POTENTIAL IMPACT ===")
    
    for indicator_code, info in TARGET_INDICATORS.items():
        if indicator_code in forecaster.association_matrix.columns:
            impacts = forecaster.association_matrix[indicator_code]
            impacts = impacts[impacts != 0].sort_values(ascending=False, key=abs)
            
            print(f"\n{info['name']} ({indicator_code}):")
            if len(impacts) > 0:
                print("  Top 5 events by impact:")
                for event_id, impact in impacts.head(5).items():
                    print(f"    Event {event_id}: {impact:.2f}pp")
            else:
                print("  No event impacts found")
else:
    print("Association matrix not available. Load impact model to see event impacts.")

### 6.3 Key Uncertainties

In [None]:
# Document uncertainties
uncertainties = [
    "Limited historical data (5 Findex points over 13 years) reduces model confidence",
    "Sparse data makes trend estimation uncertain, especially for long-term forecasts",
    "Event impacts are estimated and may not fully capture real-world dynamics",
    "No interaction effects between events modeled",
    "Economic conditions and external shocks not explicitly included",
    "Market saturation effects not captured in trend models",
    "Comparable country evidence may not directly apply to Ethiopia",
    "Confidence intervals are approximate and may underestimate uncertainty"
]

forecaster.model.uncertainties = uncertainties

print("=== KEY UNCERTAINTIES ===")
for i, uncertainty in enumerate(uncertainties, 1):
    print(f"{i}. {uncertainty}")

print("\n=== ACKNOWLEDGMENT OF LIMITATIONS ===")
print("These forecasts are based on limited historical data and estimated event impacts.")
print("They should be interpreted with caution and updated as new data becomes available.")
print("Scenario analysis provides a range of possible outcomes but does not capture all risks.")