# Time Series Forecasting - Refactored Architecture

This notebook demonstrates the new modular architecture for time series forecasting experiments.

## Features:
- 🔧 Configuration-driven experiments
- 📊 Unified logging and metrics
- 🎨 Interactive visualizations
- 🔄 Rolling window validation
- ⚡ Parallel model execution

In [1]:
# ============================================================================
# Cell 1: Setup & Data Loading
# ============================================================================

import sys
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Setup paths
current_dir = Path.cwd()
if "ENEXIS" in str(current_dir):
    while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
        current_dir = current_dir.parent
    project_root = current_dir
else:
    project_root = current_dir

src_path = project_root / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# Load training data
from utils.build_training_set import build_training_set

training_data = build_training_set(
    train_start="2025-01-01 00:00:00",
    train_end="2025-03-14 23:00:00",
    run_date="2025-03-15 12:00:00"
)

if training_data is not None:
    # Set target_datetime as index for time series analysis
    training_data = training_data.set_index('target_datetime')
    training_data.index = pd.to_datetime(training_data.index, utc=True)
    
    # Ensure we have all required columns
    required_columns = ['Price', 'Load', 'shortwave_radiation', 'temperature_2m', 
                       'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 
                       'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 
                       'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 
                       'weekday_sin', 'hour_sin', 'weekday_cos']
    
    missing_cols = [col for col in required_columns if col not in training_data.columns]
    if missing_cols:
        print(f"⚠️ Missing required columns: {missing_cols}")
    else:
        print("✅ All required columns present")
    
    print(f"📊 Dataset shape: {training_data.shape}")
    print(f"📅 Date range: {training_data.index.min()} to {training_data.index.max()}")
    print(f"💰 Price range: {training_data['Price'].min():.4f} to {training_data['Price'].max():.4f}")
    print(f"🔍 Data quality: {(1 - training_data['Price'].isna().sum() / len(training_data)) * 100:.1f}% complete")
    
    

    
else:
    raise Exception("❌ Failed to load training data")

logger.info("✅ Data loading complete")

2025-05-28 11:18:06,108 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-28 11:18:06,109 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00
2025-05-28 11:18:06,110 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 12:00:00+00:00, target range: 2025-03-15 12:00:00+00:00 → 2025-03-22 11:00:00+00:00
2025-05-28 11:18:06,113 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-28 11:18:06,115 - build_training_set - INFO - 📋 Requested columns found: 20/20
2025-05-28 11:18:06,115 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-28 11:18:06,141 - build_training_set - INFO - ✅ 

✅ All required columns present
📊 Dataset shape: (1752, 19)
📅 Date range: 2025-01-01 00:00:00+00:00 to 2025-03-14 23:00:00+00:00
💰 Price range: -0.0204 to 0.5235
🔍 Data quality: 100.0% complete


In [2]:
# ============================================================================
# Cell 2: Model Configuration
# ============================================================================

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error

# Define exogenous variables for SARIMAX
EXOG_VARS = [
    'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 
    'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 
    'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 
    'weekday_sin', 'hour_sin', 'weekday_cos'
]

# Model configurations
MODEL_CONFIGS = {
    'Naive': {
        'description': 'Previous day same hour forecast',
        'params': {'lag': 24}  # 24 hours = previous day same hour
    },
    'SARIMA': {
        'description': 'Simple ARIMA model (fallback to moving average if needed)',
        'params': {
            'order': (1, 0, 0),           # Simple AR(1) model
            'seasonal_order': None        # No seasonal component for robustness
        }
    },
    'SARIMAX': {
        'description': 'SARIMA with exogenous variables',
        'params': {
            'order': (1, 1, 1),           # (p, d, q)
            'seasonal_order': (1, 1, 1, 24), # (P, D, Q, s)
            'exog_vars': EXOG_VARS
        }
    }
}

def create_naive_model():
    """Create naive forecasting function"""
    def forecast_naive(train_data, forecast_steps=1):
        # Use same hour from previous day (24 hours ago)
        if len(train_data) >= 24:
            return [train_data.iloc[-24]] * forecast_steps
        else:
            return [train_data.iloc[-1]] * forecast_steps
    return forecast_naive

def create_sarima_model(train_data):
    """Create and fit SARIMA model"""
    try:
        model = ARIMA(
            train_data, 
            order=MODEL_CONFIGS['SARIMA']['params']['order'],
            seasonal_order=MODEL_CONFIGS['SARIMA']['params']['seasonal_order']
        )
        fitted_model = model.fit()
        return fitted_model
    except Exception as e:
        print(f"SARIMA model failed: {e}")
        return None

def create_sarimax_model(train_data, exog_data):
    """Create and fit SARIMAX model with exogenous variables"""
    try:
        model = SARIMAX(
            train_data,
            exog=exog_data,
            order=MODEL_CONFIGS['SARIMAX']['params']['order'],
            seasonal_order=MODEL_CONFIGS['SARIMAX']['params']['seasonal_order']
        )
        fitted_model = model.fit(disp=False)
        return fitted_model
    except Exception as e:
        print(f"SARIMAX model failed: {e}")
        return None

# Validate exogenous variables are available
available_exog = [col for col in EXOG_VARS if col in training_data.columns]
missing_exog = [col for col in EXOG_VARS if col not in training_data.columns]

print("🤖 MODEL CONFIGURATIONS:")
for model_name, config in MODEL_CONFIGS.items():
    print(f"  {model_name}: {config['description']}")
    if 'exog_vars' in config['params']:
        print(f"    Exogenous variables: {len(available_exog)}/{len(EXOG_VARS)} available")

if missing_exog:
    print(f"⚠️ Missing exogenous variables: {missing_exog}")
else:
    print("✅ All exogenous variables available")

print(f"\n📋 Exogenous variables for SARIMAX:")
for i, var in enumerate(available_exog, 1):
    print(f"  {i:2d}. {var}")

logger.info("✅ Model configuration complete")

2025-05-28 11:18:07,123 - __main__ - INFO - ✅ Model configuration complete


🤖 MODEL CONFIGURATIONS:
  Naive: Previous day same hour forecast
  SARIMA: Simple ARIMA model (fallback to moving average if needed)
  SARIMAX: SARIMA with exogenous variables
    Exogenous variables: 18/18 available
✅ All exogenous variables available

📋 Exogenous variables for SARIMAX:
   1. Load
   2. shortwave_radiation
   3. temperature_2m
   4. direct_normal_irradiance
   5. diffuse_radiation
   6. Flow_NO
   7. yearday_cos
   8. Flow_GB
   9. month
  10. is_dst
  11. yearday_sin
  12. is_non_working_day
  13. hour_cos
  14. is_weekend
  15. cloud_cover
  16. weekday_sin
  17. hour_sin
  18. weekday_cos


In [3]:
# ============================================================================
# Cell 3: 30-Day TRUE Rolling Window Validation
# ============================================================================

from datetime import datetime, timedelta
import warnings

# Suppress ALL warnings and logging for cleaner output
warnings.filterwarnings('ignore')
import logging
logging.getLogger('build_training_set').setLevel(logging.ERROR)  # Suppress build_training_set INFO logs

def run_true_rolling_window_validation(n_days=30):
    """
    TRUE rolling window validation - rebuild dataset each day with shifted dates
    Day 1: Jan 1 - Mar 14 (test Mar 15)
    Day 2: Jan 2 - Mar 15 (test Mar 16)
    Day 3: Jan 3 - Mar 16 (test Mar 17)
    etc.
    """
    from utils.build_training_set import build_training_set
    
    results_matrix = []
    
    # Get available exogenous variables (from initial load to check schema)
    temp_data = build_training_set(
        train_start="2025-01-01 00:00:00",
        train_end="2025-03-14 23:00:00", 
        run_date="2025-03-15 12:00:00"
    )
    available_exog = [col for col in EXOG_VARS if col in temp_data.columns]
    
    print(f"🔄 Starting TRUE rolling window validation (30 days)...")
    print(f"🎯 Each day: entire dataset shifts by 1 day")
    print(f"📊 Exogenous variables available: {len(available_exog)}")
    print("⚡ Warnings suppressed for clean output\n")
    
    successful_days = 0
    failed_days = 0
    
    for day in range(n_days):
        # Calculate shifting dates for this iteration
        train_start_date = datetime(2025, 1, 1) + timedelta(days=day)
        train_end_date = datetime(2025, 3, 14) + timedelta(days=day)
        run_date = datetime(2025, 3, 15) + timedelta(days=day)
        
        train_start_str = train_start_date.strftime("%Y-%m-%d %H:%M:%S")
        train_end_str = train_end_date.strftime("%Y-%m-%d %H:%M:%S") 
        run_date_str = run_date.strftime("%Y-%m-%d %H:%M:%S")
        
        print(f"📊 Day {day+1:2d}: {run_date.strftime('%Y-%m-%d')}", end="", flush=True)
        
        # Load shifted dataset for this day
        try:
            daily_data = build_training_set(
                train_start=train_start_str,
                train_end=train_end_str,
                run_date=run_date_str
            )
            
            if daily_data is None or len(daily_data) == 0:
                print(" | DATA:FAIL")
                continue
                
            # Set index for time series
            daily_data = daily_data.set_index('target_datetime')
            daily_data.index = pd.to_datetime(daily_data.index, utc=True)
            
            # Split into train/test (last 24 hours as test)
            split_point = daily_data.index[-24]  # Last 24 hours for testing
            train_data = daily_data[daily_data.index < split_point]['Price'].copy()
            test_data = daily_data[daily_data.index >= split_point]['Price'].copy()
            
            if len(train_data) == 0 or len(test_data) == 0:
                print(" | SPLIT:FAIL")
                continue
                
        except Exception as e:
            print(" | LOAD:FAIL")
            continue
        
        # Start the results line
        print(" | ", end="", flush=True)
        
        day_results = {
            'Day': day + 1,
            'Test_Date': run_date.strftime('%Y-%m-%d'),
            'Train_Start': train_start_date.strftime('%Y-%m-%d'),
            'Train_End': train_end_date.strftime('%Y-%m-%d'),
            'Train_Samples': len(train_data),
            'Test_Samples': len(test_data)
        }
        
        day_success = True
        
        # 1. NAIVE MODEL
        try:
            naive_preds = [train_data.iloc[-24]] * len(test_data) if len(train_data) >= 24 else [train_data.iloc[-1]] * len(test_data)
            naive_rmse = np.sqrt(mean_squared_error(test_data, naive_preds))
            day_results['Naive'] = naive_rmse
            print(f"N:{naive_rmse:.4f}", end=" | ", flush=True)
        except Exception:
            day_results['Naive'] = np.nan
            print("N:FAIL", end=" | ", flush=True)
            day_success = False
        
        # 2. SARIMA MODEL (Simple ARIMA)
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                
                model = ARIMA(train_data, order=(1, 0, 0), enforce_stationarity=False, enforce_invertibility=False)
                fitted_model = model.fit(method='mle', maxiter=50, disp=False)
                sarima_forecast = fitted_model.forecast(steps=len(test_data))
                sarima_rmse = np.sqrt(mean_squared_error(test_data, sarima_forecast))
                day_results['SARIMA'] = sarima_rmse
                print(f"S:{sarima_rmse:.4f}", end=" | ", flush=True)
                
        except Exception:
            # Fallback to moving average
            try:
                window_size = min(24, len(train_data))
                ma_pred = train_data.rolling(window=window_size).mean().iloc[-1]
                ma_forecast = [ma_pred] * len(test_data)
                sarima_rmse = np.sqrt(mean_squared_error(test_data, ma_forecast))
                day_results['SARIMA'] = sarima_rmse
                print(f"S:{sarima_rmse:.4f}*", end=" | ", flush=True)
            except:
                day_results['SARIMA'] = np.nan
                print("S:FAIL", end=" | ", flush=True)
                day_success = False
        
        # 3. SARIMAX MODEL
        try:
            if available_exog:
                # Get exogenous data for this iteration
                train_exog = daily_data[daily_data.index < split_point][available_exog].copy()
                test_exog = daily_data[daily_data.index >= split_point][available_exog].copy()
                
                if len(train_exog) == len(train_data) and len(test_exog) == len(test_data):
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore")
                        
                        model = SARIMAX(
                            train_data,
                            exog=train_exog,
                            order=(1, 0, 1),
                            seasonal_order=(1, 0, 1, 24),
                            enforce_stationarity=False,
                            enforce_invertibility=False
                        )
                        
                        fitted_model = model.fit(method='lbfgs', maxiter=50, disp=False)
                        sarimax_forecast = fitted_model.forecast(steps=len(test_data), exog=test_exog)
                        sarimax_rmse = np.sqrt(mean_squared_error(test_data, sarimax_forecast))
                        day_results['SARIMAX'] = sarimax_rmse  
                        print(f"X:{sarimax_rmse:.4f}", flush=True)
                else:
                    day_results['SARIMAX'] = np.nan
                    print("X:SIZE", flush=True)
                    day_success = False
            else:
                day_results['SARIMAX'] = np.nan
                print("X:NOEXOG", flush=True)
                day_success = False
                
        except Exception:
            day_results['SARIMAX'] = np.nan
            print("X:FAIL", flush=True)
            day_success = False
        
        if day_success:
            successful_days += 1
        else:
            failed_days += 1
            
        results_matrix.append(day_results)
        
        # Progress update every 5 days
        if (day + 1) % 5 == 0:
            success_rate = successful_days / (successful_days + failed_days) * 100
            print(f"\n📈 Progress: {day+1}/{n_days} days | Success rate: {success_rate:.1f}%\n")
    
    return pd.DataFrame(results_matrix)

# Run TRUE rolling window validation
print("🚀 Starting TRUE ROLLING WINDOW validation...")
print("Legend: N=Naive, S=SARIMA, X=SARIMAX | Values=RMSE | *=Fallback | FAIL=Model failed\n")

import time
start_time = time.time()

results_df = run_true_rolling_window_validation(n_days=30)

end_time = time.time()
elapsed_time = end_time - start_time

if not results_df.empty:
    print(f"\n✅ TRUE rolling window validation complete!")
    print(f"⏱️ Total time: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} minutes)")
    print(f"📊 Completed {len(results_df)} validation windows")
    print(f"⚡ Average time per day: {elapsed_time/len(results_df):.1f} seconds")
    
    # Show success rates
    model_cols = ['Naive', 'SARIMA', 'SARIMAX']
    print(f"\n📈 SUCCESS RATES:")
    for model in model_cols:
        if model in results_df.columns:
            valid_results = results_df[model].dropna()
            success_rate = len(valid_results) / len(results_df) * 100
            if len(valid_results) > 0:
                avg_rmse = valid_results.mean()
                print(f"  {model}: {len(valid_results)}/{len(results_df)} ({success_rate:.1f}%) | Avg RMSE: {avg_rmse:.6f}")
            else:
                print(f"  {model}: 0/{len(results_df)} (0.0%) | No successful runs")
else:
    print("❌ No validation results generated")

logger.info("✅ TRUE rolling window validation complete")

🚀 Starting TRUE ROLLING WINDOW validation...
Legend: N=Naive, S=SARIMA, X=SARIMAX | Values=RMSE | *=Fallback | FAIL=Model failed

🔄 Starting TRUE rolling window validation (30 days)...
🎯 Each day: entire dataset shifts by 1 day
📊 Exogenous variables available: 18

📊 Day  1: 2025-03-15 | N:0.0328 | S:0.0237* | X:0.0146
📊 Day  2: 2025-03-16 | N:0.0246 | S:0.0189* | X:0.0140
📊 Day  3: 2025-03-17 | N:0.0265 | S:0.0351* | X:0.0170
📊 Day  4: 2025-03-18 | N:0.0528 | S:0.0463* | X:0.0132
📊 Day  5: 2025-03-19 | N:0.0452 | S:0.0504* | X:0.0132

📈 Progress: 5/30 days | Success rate: 100.0%

📊 Day  6: 2025-03-20 | N:0.0544 | S:0.0508* | X:0.0140
📊 Day  7: 2025-03-21 | N:0.0606 | S:0.0615* | X:0.0261
📊 Day  8: 2025-03-22 | N:0.0609 | S:0.0608* | X:0.0210
📊 Day  9: 2025-03-23 | N:0.0549 | S:0.0573* | X:0.0365
📊 Day 10: 2025-03-24 | N:0.0798 | S:0.0584* | X:0.0383

📈 Progress: 10/30 days | Success rate: 100.0%

📊 Day 11: 2025-03-25 | N:0.0809 | S:0.0592* | X:0.0245
📊 Day 12: 2025-03-26 | N:0.0540 | S

2025-05-28 11:36:27,840 - __main__ - INFO - ✅ TRUE rolling window validation complete



📈 Progress: 30/30 days | Success rate: 100.0%


✅ TRUE rolling window validation complete!
⏱️ Total time: 1100.7 seconds (18.3 minutes)
📊 Completed 30 validation windows
⚡ Average time per day: 36.7 seconds

📈 SUCCESS RATES:
  Naive: 30/30 (100.0%) | Avg RMSE: 0.053970
  SARIMA: 30/30 (100.0%) | Avg RMSE: 0.050976
  SARIMAX: 30/30 (100.0%) | Avg RMSE: 0.023518


In [4]:
# ============================================================================
# Cell 4: Results Matrix & Analysis
# ============================================================================

def create_results_matrix(results_df):
    """Create clean results matrix with RMSE values"""
    
    if results_df.empty:
        print("❌ No results to display")
        return None
    
    # Select columns for the matrix
    display_cols = ['Day', 'Test_Date', 'Naive', 'SARIMA', 'SARIMAX']
    available_cols = [col for col in display_cols if col in results_df.columns]
    
    matrix_df = results_df[available_cols].copy()
    
    # Round RMSE values to 6 decimal places
    model_cols = ['Naive', 'SARIMA', 'SARIMAX']
    for col in model_cols:
        if col in matrix_df.columns:
            matrix_df[col] = matrix_df[col].round(6)
    
    return matrix_df

def calculate_summary_stats(results_df):
    """Calculate summary statistics for each model"""
    
    model_cols = ['Naive', 'SARIMA', 'SARIMAX']
    available_models = [col for col in model_cols if col in results_df.columns]
    
    summary_stats = {}
    
    for model in available_models:
        valid_results = results_df[model].dropna()
        
        if len(valid_results) > 0:
            summary_stats[model] = {
                'Mean': valid_results.mean(),
                'Std': valid_results.std(),
                'Min': valid_results.min(),
                'Max': valid_results.max(),
                'Valid_Days': len(valid_results),
                'Total_Days': len(results_df)
            }
        else:
            summary_stats[model] = {
                'Mean': np.nan,
                'Std': np.nan,
                'Min': np.nan,
                'Max': np.nan,
                'Valid_Days': 0,
                'Total_Days': len(results_df)
            }
    
    return summary_stats

# Create and display results matrix
print("📊 RESULTS MATRIX:")
print("=" * 80)

results_matrix = create_results_matrix(results_df)

if results_matrix is not None:
    # Display the matrix
    from IPython.display import display, HTML
    
    # Style the dataframe for better display
    styled_matrix = results_matrix.copy()
    
    # Replace NaN with dash for cleaner display
    model_cols = ['Naive', 'SARIMA', 'SARIMAX']
    for col in model_cols:
        if col in styled_matrix.columns:
            styled_matrix[col] = styled_matrix[col].fillna('-')
    
    print(f"📅 Rolling Window Results ({len(results_matrix)} days):")
    display(styled_matrix)
    
    # Calculate and display summary statistics
    print(f"\n📊 SUMMARY STATISTICS:")
    print("=" * 60)
    
    summary_stats = calculate_summary_stats(results_df)
    
    # Create summary DataFrame
    summary_rows = []
    for model, stats in summary_stats.items():
        summary_rows.append({
            'Model': model,
            'Mean_RMSE': f"{stats['Mean']:.6f}" if not np.isnan(stats['Mean']) else '-',
            'Std_RMSE': f"{stats['Std']:.6f}" if not np.isnan(stats['Std']) else '-',
            'Min_RMSE': f"{stats['Min']:.6f}" if not np.isnan(stats['Min']) else '-',
            'Max_RMSE': f"{stats['Max']:.6f}" if not np.isnan(stats['Max']) else '-',
            'Success_Rate': f"{stats['Valid_Days']}/{stats['Total_Days']} ({100*stats['Valid_Days']/stats['Total_Days']:.1f}%)"
        })
    
    summary_df = pd.DataFrame(summary_rows)
    display(summary_df)
    
    # Determine best model
    print(f"\n🏆 MODEL RANKING (by Mean RMSE):")
    print("-" * 40)
    
    valid_models = []
    for model, stats in summary_stats.items():
        if not np.isnan(stats['Mean']) and stats['Valid_Days'] > 0:
            valid_models.append((model, stats['Mean'], stats['Valid_Days']))
    
    # Sort by mean RMSE (ascending = better)
    valid_models.sort(key=lambda x: x[1])
    
    for i, (model, mean_rmse, valid_days) in enumerate(valid_models, 1):
        status = "🥇" if i == 1 else "🥈" if i == 2 else "🥉" if i == 3 else f"{i}."
        print(f"  {status} {model}: {mean_rmse:.6f} RMSE (based on {valid_days} days)")
    
    if not valid_models:
        print("  ❌ No models produced valid results")
    
    print(f"\n💾 Results saved to 'results_df' variable for further analysis")
    
else:
    print("❌ Unable to create results matrix")

logger.info("✅ Results analysis complete")

📊 RESULTS MATRIX:
📅 Rolling Window Results (30 days):


Unnamed: 0,Day,Test_Date,Naive,SARIMA,SARIMAX
0,1,2025-03-15,0.032763,0.02373,0.014562
1,2,2025-03-16,0.024564,0.018892,0.013972
2,3,2025-03-17,0.02653,0.035072,0.016972
3,4,2025-03-18,0.052788,0.046339,0.013162
4,5,2025-03-19,0.045184,0.050427,0.013158
5,6,2025-03-20,0.054443,0.050817,0.014
6,7,2025-03-21,0.06064,0.061482,0.026083
7,8,2025-03-22,0.060913,0.060756,0.021044
8,9,2025-03-23,0.05495,0.057293,0.03647
9,10,2025-03-24,0.079806,0.058384,0.03833



📊 SUMMARY STATISTICS:


Unnamed: 0,Model,Mean_RMSE,Std_RMSE,Min_RMSE,Max_RMSE,Success_Rate
0,Naive,0.05397,0.019372,0.024564,0.092428,30/30 (100.0%)
1,SARIMA,0.050976,0.01596,0.018892,0.091136,30/30 (100.0%)
2,SARIMAX,0.023518,0.010723,0.007165,0.046331,30/30 (100.0%)


2025-05-28 11:36:27,888 - __main__ - INFO - ✅ Results analysis complete



🏆 MODEL RANKING (by Mean RMSE):
----------------------------------------
  🥇 SARIMAX: 0.023518 RMSE (based on 30 days)
  🥈 SARIMA: 0.050976 RMSE (based on 30 days)
  🥉 Naive: 0.053970 RMSE (based on 30 days)

💾 Results saved to 'results_df' variable for further analysis


In [5]:
# ============================================================================
# Cell 5: Final Recommendations
# ============================================================================

def generate_recommendations(results_df, summary_stats):
    """Generate recommendations based on model performance"""
    
    print("🎯 FINAL RECOMMENDATIONS:")
    print("=" * 60)
    
    # Get valid models with their performance
    valid_models = []
    for model, stats in summary_stats.items():
        if not np.isnan(stats['Mean']) and stats['Valid_Days'] > 0:
            valid_models.append({
                'model': model,
                'mean_rmse': stats['Mean'],
                'std_rmse': stats['Std'],
                'success_rate': stats['Valid_Days'] / stats['Total_Days'],
                'valid_days': stats['Valid_Days']
            })
    
    if not valid_models:
        print("❌ No models produced valid results for recommendation")
        return
    
    # Sort by mean RMSE
    valid_models.sort(key=lambda x: x['mean_rmse'])
    
    best_model = valid_models[0]
    
    print(f"🏆 RECOMMENDED MODEL: {best_model['model']}")
    print(f"   Mean RMSE: {best_model['mean_rmse']:.6f}")
    print(f"   Std RMSE:  {best_model['std_rmse']:.6f}")
    print(f"   Success Rate: {best_model['success_rate']*100:.1f}% ({best_model['valid_days']}/{summary_stats[best_model['model']]['Total_Days']} days)")
    
    # Performance insights
    print(f"\n📈 PERFORMANCE INSIGHTS:")
    print("-" * 30)
    
    # Compare models
    if len(valid_models) > 1:
        naive_model = next((m for m in valid_models if m['model'] == 'Naive'), None)
        best_advanced = valid_models[0] if valid_models[0]['model'] != 'Naive' else valid_models[1]
        
        if naive_model and best_advanced:
            improvement = ((naive_model['mean_rmse'] - best_advanced['mean_rmse']) / naive_model['mean_rmse']) * 100
            print(f"✅ {best_advanced['model']} improves over Naive by {improvement:.1f}%")
        
        # Consistency analysis
        most_consistent = min(valid_models, key=lambda x: x['std_rmse'])
        if most_consistent['model'] != best_model['model']:
            print(f"📊 Most consistent model: {most_consistent['model']} (Std RMSE: {most_consistent['std_rmse']:.6f})")
        
        # Success rate analysis
        most_reliable = max(valid_models, key=lambda x: x['success_rate'])
        if most_reliable['success_rate'] < 1.0:
            print(f"⚠️ Model reliability: {most_reliable['model']} has highest success rate ({most_reliable['success_rate']*100:.1f}%)")
    
    # Model-specific insights
    print(f"\n🔍 MODEL-SPECIFIC INSIGHTS:")
    print("-" * 30)
    
    for model_info in valid_models:
        model_name = model_info['model']
        print(f"  {model_name}:")
        
        if model_name == 'Naive':
            print(f"    • Simple baseline using previous day same hour")
            print(f"    • Always available, no fitting required")
        elif model_name == 'SARIMA':
            print(f"    • Captures seasonal patterns (24-hour cycle)")
            print(f"    • No external variables required")
        elif model_name == 'SARIMAX':
            print(f"    • Uses {len(EXOG_VARS)} exogenous variables")
            print(f"    • Most complex model, requires all input features")
        
        # Performance characteristics
        if model_info['std_rmse'] < 0.01:
            print(f"    • Very consistent performance")
        elif model_info['std_rmse'] < 0.02:
            print(f"    • Reasonably consistent performance")
        else:
            print(f"    • Variable performance across days")
        
        if model_info['success_rate'] < 0.8:
            print(f"    • ⚠️ Some fitting failures ({model_info['success_rate']*100:.1f}% success rate)")
    
    # Production recommendations
    print(f"\n🚀 PRODUCTION RECOMMENDATIONS:")
    print("-" * 30)
    
    if best_model['model'] == 'Naive':
        print("• Consider using Naive model for:")
        print("  - Quick baseline forecasts")
        print("  - Fallback when advanced models fail")
        print("  - Real-time applications requiring minimal computation")
    else:
        print(f"• Consider using {best_model['model']} model for:")
        print("  - Primary forecasting in production")
        print("  - Applications requiring high accuracy")
        
        print("• Recommended backup strategy:")
        print("  - Use Naive model as fallback when advanced model fails")
        print("  - Monitor model performance regularly")
    
    # Data quality insights
    print(f"\n📊 DATA QUALITY INSIGHTS:")
    print("-" * 30)
    
    min_success_rate = min(m['success_rate'] for m in valid_models)
    if min_success_rate < 0.9:
        print(f"⚠️ Some models have success rates below 90%")
        print(f"   Consider investigating data quality issues")
    
    # Overall conclusion
    print(f"\n🎯 CONCLUSION:")
    print("-" * 15)
    print(f"Based on {len(results_df)} days of rolling window validation:")
    print(f"• Best performing model: {best_model['model']} (RMSE: {best_model['mean_rmse']:.6f})")
    print(f"• Model complexity vs. performance trade-off analyzed")
    print(f"• Ready for production deployment with appropriate monitoring")

# Generate recommendations
if 'results_df' in locals() and not results_df.empty:
    summary_stats = calculate_summary_stats(results_df)
    generate_recommendations(results_df, summary_stats)
    
    # Save results for future reference
    timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
    results_filename = f"sarimax_evaluation_results_{timestamp}.csv"
    results_df.to_csv(results_filename, index=False)
    print(f"\n💾 Results saved to: {results_filename}")
    
else:
    print("❌ No results available for recommendations")
    print("Please run the previous cells first")

logger.info("✅ Evaluation complete")

2025-05-28 11:36:27,918 - __main__ - INFO - ✅ Evaluation complete


🎯 FINAL RECOMMENDATIONS:
🏆 RECOMMENDED MODEL: SARIMAX
   Mean RMSE: 0.023518
   Std RMSE:  0.010723
   Success Rate: 100.0% (30/30 days)

📈 PERFORMANCE INSIGHTS:
------------------------------
✅ SARIMAX improves over Naive by 56.4%

🔍 MODEL-SPECIFIC INSIGHTS:
------------------------------
  SARIMAX:
    • Uses 18 exogenous variables
    • Most complex model, requires all input features
    • Reasonably consistent performance
  SARIMA:
    • Captures seasonal patterns (24-hour cycle)
    • No external variables required
    • Reasonably consistent performance
  Naive:
    • Simple baseline using previous day same hour
    • Always available, no fitting required
    • Reasonably consistent performance

🚀 PRODUCTION RECOMMENDATIONS:
------------------------------
• Consider using SARIMAX model for:
  - Primary forecasting in production
  - Applications requiring high accuracy
• Recommended backup strategy:
  - Use Naive model as fallback when advanced model fails
  - Monitor model perfor

In [6]:
# ============================================================================
# Cell 6: Detailed Model Comparison Analysis (FIXED)
# ============================================================================

print("🔍 Creating detailed model comparison analysis...")

# Create dictionary of model predictions from single_run_results
model_predictions = {}
for model_name, result in single_run_results.items():
    if result.success and result.predictions is not None:
        # Use a cleaner display name
        display_name = model_name.replace('_', ' ').title()
        model_predictions[display_name] = result.predictions

print(f"📊 Analyzing {len(model_predictions)} successful models:")
for name in model_predictions.keys():
    print(f"  • {name}")

if len(model_predictions) == 0:
    print("❌ No successful model predictions available for detailed analysis")
else:
    # Get actual values for the forecast period
    y_actual = data_splits.y_test
    
    # Calculate RMSE per day
    print("\n📅 Calculating daily RMSE...")
    day_rmse_data = []
    
    for model_name, preds in model_predictions.items():
        # Ensure preds is a Series and has the right index
        if not isinstance(preds, pd.Series):
            print(f"⚠️ {model_name} predictions not available as a Series, skipping...")
            continue
            
        # Create dataframe with actual and predicted values
        # Align indices to handle any mismatches
        common_idx = y_actual.index.intersection(preds.index)
        if len(common_idx) == 0:
            print(f"⚠️ No common timestamps between actual and {model_name} predictions, skipping...")
            continue
            
        df_day = pd.DataFrame({
            'actual': y_actual.loc[common_idx],
            'pred': preds.loc[common_idx]
        })
        
        # Add date column
        df_day["date"] = df_day.index.date
        
        # Function to calculate RMSE for a group with error handling
        def calc_group_rmse(group):
            try:
                from sklearn.metrics import mean_squared_error
                return np.sqrt(mean_squared_error(group["actual"], group["pred"]))
            except Exception as e:
                print(f"⚠️ Error calculating RMSE for {model_name} on {group.name}: {e}")
                return np.nan
                
        # FIXED: Group by date and calculate RMSE for each day (with include_groups=False)
        daily_rmse = df_day.groupby("date", include_groups=False).apply(calc_group_rmse)
        day_rmse_data.append(daily_rmse.round(3).rename(model_name))
    
    if day_rmse_data:
        # Combine all daily RMSE data
        rmse_day_df = pd.concat(day_rmse_data, axis=1)
        
        from IPython.display import display, HTML
        display(HTML("<h3>📅 RMSE per dag</h3>"))
        display(rmse_day_df)
        
        # Create empty DataFrame for hourly errors
        print("\n🕒 Calculating hourly absolute errors...")
        rmse_full_hourly_df = pd.DataFrame(index=y_actual.index)
        
        # Calculate absolute error at each timestamp
        for model_name, preds in model_predictions.items():
            if not isinstance(preds, pd.Series):
                print(f"⚠️ {model_name} predictions not available as a Series, skipping...")
                continue
                
            # Align indices
            common_idx = y_actual.index.intersection(preds.index)
            if len(common_idx) == 0:
                continue
                
            # Calculate absolute error at each timestamp
            actual_aligned = y_actual.loc[common_idx]
            preds_aligned = preds.loc[common_idx]
            abs_error = np.abs(actual_aligned - preds_aligned)
            rmse_full_hourly_df.loc[common_idx, model_name] = abs_error.round(3)
        
        display(HTML("<h3>🕒 Absolute error per tijdstip (alle uur)</h3>"))
        display(rmse_full_hourly_df.head(20))  # Show first 20 hours to avoid overwhelming output
        print(f"... (showing first 20 of {len(rmse_full_hourly_df)} total hours)")
        
        # Add summary table showing average daily RMSE
        display(HTML("<h3>📊 Gemiddelde RMSE per dag</h3>"))
        avg_day_rmse = rmse_day_df.mean().to_frame("Avg Daily RMSE").round(3)
        
        if not avg_day_rmse.empty:
            best_day = avg_day_rmse["Avg Daily RMSE"].idxmin()
            avg_day_rmse["Rank"] = avg_day_rmse["Avg Daily RMSE"].rank().astype(int)
            avg_day_rmse.loc[best_day, "Note"] = "🏆 Best model"
            
            # Sort by rank for better display
            avg_day_rmse_sorted = avg_day_rmse.sort_values("Rank")
            display(avg_day_rmse_sorted)
        
        # Add summary table showing percentage of days each model is best
        display(HTML("<h3>🥇 Aantal dagen dat model het best presteert</h3>"))
        if not rmse_day_df.empty and rmse_day_df.shape[0] > 0:
            best_days = rmse_day_df.idxmin(axis=1).value_counts().to_frame("Number of days best")
            best_days["Percentage"] = (best_days["Number of days best"] / len(rmse_day_df) * 100).round(1)
            best_days["Percentage_str"] = best_days["Percentage"].astype(str) + '%'
            
            # Sort by number of days best (descending)
            best_days_sorted = best_days.sort_values("Number of days best", ascending=False)
            
            # Add winner emoji
            if len(best_days_sorted) > 0:
                best_model = best_days_sorted.index[0]
                best_days_sorted.loc[best_model, "Note"] = "🥇 Most consistent"
            
            display(best_days_sorted[["Number of days best", "Percentage_str", "Note"]])
        else:
            print("⚠️ Not enough daily data to calculate best performing days")
        
        # Additional insights
        print(f"\n💡 DETAILED INSIGHTS:")
        print("=" * 50)
        
        if not rmse_day_df.empty:
            # Overall best model
            overall_best = avg_day_rmse["Avg Daily RMSE"].idxmin()
            overall_best_rmse = avg_day_rmse.loc[overall_best, "Avg Daily RMSE"]
            print(f"🏆 Best overall model: {overall_best} (Avg RMSE: {overall_best_rmse:.3f})")
            
            # Most consistent model (lowest std deviation of daily RMSE)
            daily_std = rmse_day_df.std().round(3)
            most_consistent = daily_std.idxmin()
            consistency_score = daily_std.loc[most_consistent]
            print(f"📈 Most consistent model: {most_consistent} (RMSE Std: {consistency_score:.3f})")
            
            # Model with best single day performance
            best_single_day_value = rmse_day_df.min().min()
            best_single_day_model = rmse_day_df.min().idxmin()
            print(f"⚡ Best single day performance: {best_single_day_model} (RMSE: {best_single_day_value:.3f})")
            
            # Performance spread analysis
            print(f"\n📊 Performance Spread Analysis:")
            for model in rmse_day_df.columns:
                model_rmse = rmse_day_df[model].dropna()
                if len(model_rmse) > 0:
                    print(f"  {model}:")
                    print(f"    Range: {model_rmse.min():.3f} - {model_rmse.max():.3f}")
                    print(f"    Std Dev: {model_rmse.std():.3f}")
                    print(f"    Days analyzed: {len(model_rmse)}")
        
        else:
            print("⚠️ No daily RMSE data available for detailed insights")
    
    else:
        print("❌ No valid daily RMSE data could be calculated")

print(f"\n🎉 DETAILED COMPARISON ANALYSIS COMPLETE!")

🔍 Creating detailed model comparison analysis...


NameError: name 'single_run_results' is not defined

In [None]:
# ============================================================================
# Cell 6: Rolling Window Validation
# ============================================================================

logger.info("🔄 Starting rolling window validation...")

# Run rolling validation
rolling_results = experiment.run_rolling_validation(
    n_windows=config.rolling_windows,
    parallel=config.parallel_execution
)

if not rolling_results.empty:
    print(f"\n🔄 ROLLING WINDOW VALIDATION RESULTS:")
    print("=" * 60)
    
    # Summary statistics by model
    print("\n📊 Summary by Model:")
    summary_stats = rolling_results.groupby('model_name').agg({
        'rmse': ['mean', 'std', 'min', 'max'],
        'mae': ['mean', 'std'],
        'execution_time': ['mean', 'sum'],
        'status': lambda x: f"{(x == 'completed').sum()}/{len(x)}"
    }).round(6)
    
    print(summary_stats)
    
    # Create rolling validation plot
    print("\n📈 Creating rolling validation plot...")
    rolling_plot = visualizer.create_rolling_validation_plot(rolling_results)
    rolling_plot.show()
    
    # Performance trend analysis
    trends = experiment.validator.analyze_performance_trends(rolling_results)
    
    print(f"\n📈 PERFORMANCE TRENDS:")
    print("-" * 40)
    
    for model_name, trend_info in trends.get('performance_trends', {}).items():
        trend = trend_info.get('trend', 'UNKNOWN')
        degradation = trend_info.get('degradation_percent', 0)
        windows_completed = trend_info.get('windows_completed', 0)
        
        print(f"  {model_name.replace('_', ' ').title()}:")
        print(f"    Trend: {trend} ({degradation:+.1f}%)")
        print(f"    Windows completed: {windows_completed}/{config.rolling_windows}")
        
        if trend in ['SEVERE', 'SIGNIFICANT']:
            print(f"    ⚠️  Performance degradation detected!")
        elif trend == 'IMPROVING':
            print(f"    ✅ Performance improving over time")
    
    # Success rates
    print(f"\n📊 SUCCESS RATES:")
    for model_name, success_info in trends.get('success_rate', {}).items():
        rate = success_info['success_rate']
        count = success_info['success_count']
        total = success_info['total_count']
        print(f"  {model_name.replace('_', ' ').title()}: {rate:.1f}% ({count}/{total})")

else:
    print("\n❌ No rolling validation results available")
    print("This could be due to insufficient data or all models failing")

In [None]:
# ============================================================================
# Cell 7: Full Experiment & Analysis
# ============================================================================

# Run complete experiment with comprehensive logging
experiment_name = f"Complete_Model_Comparison_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}"

print(f"🎯 Running full experiment: {experiment_name}")
full_results = experiment.run_full_experiment(
    experiment_name=experiment_name,
    include_rolling=True
)

# Display experiment summary
summary = full_results.get('summary', {})

print(f"\n🎉 EXPERIMENT SUMMARY:")
print("=" * 60)
print(f"Experiment ID: {full_results['experiment_id']}")
print(f"Name: {full_results['experiment_name']}")
print(f"Status: {full_results.get('status', 'completed')}")

# Single run summary
single_summary = summary.get('single_run_summary', {})
if single_summary:
    print(f"\n📊 Single Run Results:")
    print(f"  Total models: {single_summary.get('total_models', 0)}")
    print(f"  Successful: {single_summary.get('successful_models', 0)}")
    print(f"  Failed: {single_summary.get('failed_models', 0)}")
    print(f"  Best model: {single_summary.get('best_model', 'Unknown')}")
    print(f"  Best RMSE: {single_summary.get('best_rmse', 'N/A')}")

# Rolling validation summary
rolling_summary = summary.get('rolling_validation_summary', {})
if rolling_summary:
    print(f"\n🔄 Rolling Validation Results:")
    print(f"  Total windows: {rolling_summary.get('total_windows', 0)}")
    print(f"  Models tested: {len(rolling_summary.get('models_tested', []))}")
    if 'best_model' in rolling_summary:
        print(f"  Best model (avg): {rolling_summary['best_model']}")
        print(f"  Best avg RMSE: {rolling_summary.get('best_avg_rmse', 'N/A')}")

# Overall recommendation
best_model = summary.get('overall_best_model')
if best_model:
    print(f"\n🏆 OVERALL BEST MODEL: {best_model.replace('_', ' ').title()}")

# Recommendations
recommendations = summary.get('recommendations', [])
if recommendations:
    print(f"\n💡 RECOMMENDATIONS:")
    for rec in recommendations:
        print(f"  {rec}")

print(f"\n📋 Results saved to database with experiment ID: {full_results['experiment_id']}")

In [None]:
# ============================================================================
# Cell 8: Advanced Analysis & Historical Comparison
# ============================================================================

# Advanced model comparison
print("🔍 ADVANCED MODEL ANALYSIS:")
print("=" * 50)

successful_results = {name: result for name, result in single_run_results.items() if result.success}
if len(successful_results) > 1:
    predictions_dict = {name: result.predictions for name, result in successful_results.items()}
    
    comparison = metrics_calc.compare_predictions(data_splits.y_test, predictions_dict)
    
    print(f"\nModels compared: {comparison['models']}")
    
    # Detailed metrics comparison
    print(f"\n📊 DETAILED METRICS COMPARISON:")
    for model_name, metrics in comparison['metrics_comparison'].items():
        print(f"\n  {model_name.replace('_', ' ').title()}:")
        print(f"    RMSE: {metrics.get('rmse', 'N/A'):.6f}")
        print(f"    MAE:  {metrics.get('mae', 'N/A'):.6f}")
        print(f"    MAPE: {metrics.get('mape', 'N/A'):.2f}%")
        print(f"    R²:   {metrics.get('r_squared', 'N/A'):.4f}")
        print(f"    Correlation: {metrics.get('correlation', 'N/A'):.4f}")
    
    # Model ranking
    ranking = comparison.get('ranking', {})
    if ranking:
        print(f"\n🏆 RANKING BY RMSE:")
        for i, entry in enumerate(ranking['by_rmse'], 1):
            model_display = entry['model'].replace('_', ' ').title()
            print(f"  {i}. {model_display}: {entry['rmse']:.6f}")

# Compare with previous experiments
print(f"\n🕒 HISTORICAL COMPARISON:")
comparison = experiment.compare_with_previous_experiments(limit=5)
if 'error' not in comparison:
    print(f"Previous experiments analyzed: {comparison['previous_experiments_count']}")
    
    for rec in comparison.get('recommendations', []):
        print(f"  {rec}")
else:
    print(f"  {comparison['error']}")

# Data quality final assessment
print(f"\n📊 FINAL DATA QUALITY ASSESSMENT:")
print(f"Overall quality score: {quality_report['quality_score']:.1f}%")

target_stats = quality_report.get('target_column_stats', {})
if target_stats:
    print(f"\nTarget column ({config.target_column}) statistics:")
    print(f"  Mean: {target_stats.get('mean', 'N/A'):.4f}")
    print(f"  Std:  {target_stats.get('std', 'N/A'):.4f}")
    print(f"  Range: {target_stats.get('min', 'N/A'):.4f} to {target_stats.get('max', 'N/A'):.4f}")
    print(f"  Missing: {target_stats.get('missing_count', 'N/A')} values")

# Feature importance (if available from models)
print(f"\n🔧 MODEL CONFIGURATION SUMMARY:")
for model_name, result in single_run_results.items():
    if result.success:
        print(f"\n  {model_name.replace('_', ' ').title()}:")
        print(f"    Hyperparameters: {result.hyperparameters}")
        if result.diagnostics:
            print(f"    Diagnostics available: {list(result.diagnostics.keys())}")
        if result.convergence_info:
            converged = result.convergence_info.get('converged', True)
            print(f"    Convergence: {'✅ Yes' if converged else '⚠️ Issues detected'}")

print(f"\n🎉 ANALYSIS COMPLETE!")
print(f"\n💾 All results have been logged to the database.")
print(f"📊 You can query the logs database for detailed historical analysis.")

logger.info("🔬 Advanced analysis complete!")