In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path
import warnings

warnings.filterwarnings("ignore")

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing SARIMA Model - RMSE per forecast day")
print("=" * 60)

# SARIMA parameters (example: adjust as needed)
order = (1, 1, 1)
seasonal_order = (1, 1, 1, 24)  # 24 for hourly data with daily seasonality

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df['target_datetime'] <= run_date_utc]
        test_data = df[df['target_datetime'] > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=['target_datetime', target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Prepare data for SARIMA
        # Zorg dat index datetime64[ns] en zonder tz is
        train_data['target_datetime'] = pd.to_datetime(train_data['target_datetime']).dt.tz_localize(None)
        test_data['target_datetime'] = pd.to_datetime(test_data['target_datetime']).dt.tz_localize(None)
        y_train = train_data.set_index('target_datetime')[target].astype(float)
        y_test = test_data.set_index('target_datetime')[target].astype(float)

        # Fit SARIMA model
        model = SARIMAX(y_train, order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
        model_fit = model.fit(disp=False)

        # Forecast for the test period
        n_test = len(y_test)
        forecast = model_fit.forecast(steps=n_test)
        y_pred = forecast.values

        # Sla de eerste 24 uur over
        if len(y_pred) > 24:
            y_pred = y_pred[24:]
            y_test = y_test[24:]
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan
            rmse_results.append({
                'iteration': i + 1,
                'run_date': run_date.strftime('%Y-%m-%d'),
                'valid_predictions': 0,
                'rmse': rmse
            })
            continue

        if len(y_pred) > 0:
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            rmse = np.nan

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(y_pred),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(y_pred)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - SARIMA Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-05-30 16:02:22,540 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-05-30 16:02:22,543 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-30 16:02:22,544 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)
2025-05-30 16:02:22,547 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-05-30 16:02:22,552 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-30 16:02:22,559 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-30 16:02:22,559 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

🔍 Testing SARIMA Model - RMSE per forecast day


2025-05-30 16:02:22,735 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 16:02:22,735 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 16:02:22,735 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 16:02:22,735 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 16:02:22,853 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:02:22,853 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:02:22,955 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:02:22,956 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:02:23,076 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:02:23,078 - build_training_set - I

Day 1: ✅ 144 test rows, Run: 03-15


2025-05-30 16:02:45,769 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:02:45,849 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:02:45,853 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:02:45,925 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:02:45,925 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:02:45,930 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:02:45,938 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:02:45,976 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:02:45,978 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:02:45,982 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 2: ✅ 144 test rows, Run: 03-16


2025-05-30 16:03:03,370 - build_training_set - INFO - 📊 Forecast rows available: 169
2025-05-30 16:03:03,379 - build_training_set - INFO - 📋 Common columns for predictions: 17 - ['target_datetime', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'yearday_cos', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-30 16:03:03,417 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 16:03:03,417 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 16:03:03,417 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 16:03:03,425 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 16:03:03,425 - build_training_set - INFO -    

Day 3: ✅ 144 test rows, Run: 03-17


2025-05-30 16:03:19,530 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:03:19,532 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:03:19,664 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:03:19,665 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:03:19,747 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:03:19,747 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:03:19,747 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:03:19,753 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:03:19,806 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:03:19,806 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:03:19,806 - buil

Day 4: ✅ 144 test rows, Run: 03-18


2025-05-30 16:03:36,161 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:03:36,161 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:03:36,303 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:03:36,313 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:03:36,483 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:03:36,496 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:03:36,504 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:03:36,514 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:03:36,550 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:03:36,550 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:03:36,558 - buil

Day 5: ✅ 144 test rows, Run: 03-19


2025-05-30 16:03:51,284 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:03:51,290 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:03:51,400 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:03:51,402 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:03:51,402 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:03:51,407 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:03:51,457 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:03:51,457 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:03:51,466 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:03:51,473 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 6: ✅ 144 test rows, Run: 03-20


2025-05-30 16:04:03,585 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:04:03,701 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:04:03,701 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:04:03,896 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:04:03,896 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:04:03,902 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:04:03,903 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:04:03,962 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:04:03,970 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:04:03,970 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 7: ✅ 144 test rows, Run: 03-21


2025-05-30 16:04:17,645 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:04:17,648 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:04:17,739 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:04:17,746 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:04:17,822 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:04:17,831 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:04:17,831 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:04:17,831 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:04:17,879 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:04:17,887 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:04:17,887 - buil

Day 8: ✅ 144 test rows, Run: 03-22


2025-05-30 16:04:34,027 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:04:34,055 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:04:34,184 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:04:34,184 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:04:34,310 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:04:34,310 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:04:34,310 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:04:34,317 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:04:34,387 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:04:34,392 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:04:34,395 - buil

Day 9: ✅ 144 test rows, Run: 03-23


2025-05-30 16:04:43,370 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:04:43,370 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:04:43,484 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:04:43,484 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:04:43,593 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:04:43,600 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:04:43,602 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:04:43,610 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:04:43,683 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:04:43,685 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:04:43,685 - buil

Day 10: ✅ 144 test rows, Run: 03-24


2025-05-30 16:04:57,087 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:04:57,087 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:04:57,256 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:04:57,256 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:04:57,256 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:04:57,264 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:04:57,314 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:04:57,314 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:04:57,314 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:04:57,322 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 11: ✅ 144 test rows, Run: 03-25


2025-05-30 16:05:12,593 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:05:12,593 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:05:12,667 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:05:12,667 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:05:12,667 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:05:12,676 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:05:12,718 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:05:12,718 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:05:12,727 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:05:12,727 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 12: ✅ 144 test rows, Run: 03-26


2025-05-30 16:05:26,241 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:05:26,242 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:05:26,330 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:05:26,339 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:05:26,339 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:05:26,347 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:05:26,395 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:05:26,402 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:05:26,404 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:05:26,404 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 13: ✅ 144 test rows, Run: 03-27


2025-05-30 16:05:39,600 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:05:39,600 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:05:39,748 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:05:39,756 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:05:39,756 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:05:39,765 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:05:39,842 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:05:39,842 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:05:39,850 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:05:39,850 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 14: ✅ 144 test rows, Run: 03-28


2025-05-30 16:05:52,988 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:05:52,994 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:05:53,126 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:05:53,126 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:05:53,227 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:05:53,227 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:05:53,227 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:05:53,244 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:05:53,294 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:05:53,294 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:05:53,294 - buil

Day 15: ✅ 144 test rows, Run: 03-29


2025-05-30 16:06:07,654 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:06:07,658 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:06:07,762 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:06:07,772 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:06:07,772 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:06:07,780 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:06:07,839 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:06:07,847 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:06:07,847 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:06:07,860 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 16: ✅ 144 test rows, Run: 03-30


2025-05-30 16:06:23,018 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:06:23,128 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:06:23,135 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:06:23,231 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:06:23,233 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:06:23,235 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:06:23,238 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:06:23,268 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:06:23,268 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:06:23,277 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 17: ✅ 144 test rows, Run: 03-31


2025-05-30 16:06:34,586 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:06:34,586 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:06:34,661 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:06:34,661 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:06:34,728 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:06:34,728 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:06:34,728 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:06:34,736 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:06:34,769 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:06:34,769 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:06:34,769 - buil

Day 18: ✅ 144 test rows, Run: 04-01


2025-05-30 16:06:43,117 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:06:43,194 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:06:43,194 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:06:43,293 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:06:43,301 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:06:43,303 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:06:43,307 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:06:43,371 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:06:43,371 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:06:43,382 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 19: ✅ 144 test rows, Run: 04-02


2025-05-30 16:06:51,468 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:06:51,468 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:06:51,600 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:06:51,600 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:06:51,600 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:06:51,610 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:06:51,684 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:06:51,684 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:06:51,692 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:06:51,692 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 20: ✅ 144 test rows, Run: 04-03


2025-05-30 16:07:07,658 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:07:07,658 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:07:07,774 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:07:07,774 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:07:07,856 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:07:07,856 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:07:07,856 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:07:07,864 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:07:07,915 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:07:07,923 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:07:07,923 - buil

Day 21: ✅ 144 test rows, Run: 04-04


2025-05-30 16:07:21,216 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:07:21,295 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:07:21,301 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:07:21,376 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:07:21,384 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:07:21,384 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:07:21,392 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:07:21,451 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:07:21,451 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:07:21,459 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 22: ✅ 144 test rows, Run: 04-05


2025-05-30 16:07:36,437 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:07:36,439 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:07:36,518 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:07:36,526 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:07:36,623 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:07:36,631 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:07:36,632 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:07:36,636 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:07:36,699 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:07:36,702 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:07:36,710 - buil

Day 23: ✅ 144 test rows, Run: 04-06


2025-05-30 16:07:50,351 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:07:50,488 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:07:50,488 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:07:50,575 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:07:50,575 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:07:50,580 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:07:50,580 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:07:50,631 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:07:50,631 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:07:50,639 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 24: ✅ 144 test rows, Run: 04-07


2025-05-30 16:08:04,446 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:08:04,453 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:08:04,580 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:08:04,580 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:08:04,697 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:08:04,697 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:08:04,697 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:08:04,705 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:08:04,770 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:08:04,770 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:08:04,778 - buil

Day 25: ✅ 144 test rows, Run: 04-08


2025-05-30 16:08:14,111 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:08:14,224 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:08:14,224 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:08:14,342 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:08:14,352 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:08:14,352 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:08:14,356 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:08:14,416 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:08:14,416 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:08:14,425 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 26: ✅ 144 test rows, Run: 04-09


2025-05-30 16:08:29,085 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:08:29,094 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:08:29,159 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:08:29,159 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:08:29,159 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:08:29,168 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:08:29,196 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:08:29,203 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:08:29,203 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:08:29,212 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 27: ✅ 144 test rows, Run: 04-10


2025-05-30 16:08:50,853 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:08:50,853 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:08:50,951 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:08:50,951 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:08:50,951 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:08:50,960 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:08:50,993 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:08:50,995 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:08:50,995 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:08:50,995 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 28: ✅ 144 test rows, Run: 04-11


2025-05-30 16:09:02,674 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:09:02,690 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:09:02,809 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:09:02,809 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:09:02,860 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:09:02,868 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:09:02,868 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:09:02,868 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:09:02,918 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:09:02,918 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:09:02,935 - buil

Day 29: ✅ 144 test rows, Run: 04-12


2025-05-30 16:09:19,963 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:09:19,963 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:09:20,035 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:09:20,035 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:09:20,043 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:09:20,045 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:09:20,080 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:09:20,089 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:09:20,089 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 16:09:20,089 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - SARIMA Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.05
         2 2025-03-16                144  0.04
         3 2025-03-17                144  0.06
         4 2025-03-18                144  0.04
         5 2025-03-19                144  0.04
         6 2025-03-20                144  0.05
         7 2025-03-21                144  0.04
         8 2025-03-22                144  0.12
         9 2025-03-23                144  0.02
        10 2025-03-24                144  0.05
        11 2025-03-25                144  0.04
        12 2025-03-26                144  0.05
        13 2025-03-27                144  0.05
        14 2025-03-28                144  0.04
        15 2025-03-29                144  0.06
        16 2025-03-30                144  0.11
        17 2025-03-31                144  0.04
        18 2025-04-01                144  0.05
        19 2025-0

In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path
import warnings

warnings.filterwarnings("ignore")

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

FEATURES = [
    'Load', 'shortwave_radiation', 'temperature_2m', 
    'direct_normal_irradiance', 'diffuse_radiation', 
    'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 
    'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 
    'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 
    'hour_sin', 'weekday_cos'
]
target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing SARIMAX Model - RMSE per forecast day")
print("=" * 60)

# SARIMAX parameters (example: adjust as needed)
order = (1, 1, 1)
seasonal_order = (1, 1, 1, 24)  # 24 for hourly data with daily seasonality

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df['target_datetime'] <= run_date_utc]
        test_data = df[df['target_datetime'] > run_date_utc]

        # Drop any missing data in training and test
        train_data = train_data.dropna(subset=['target_datetime', target] + FEATURES)
        test_data = test_data.dropna(subset=['target_datetime', target] + FEATURES)

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Prepare data for SARIMAX
        train_data['target_datetime'] = pd.to_datetime(train_data['target_datetime']).dt.tz_localize(None)
        test_data['target_datetime'] = pd.to_datetime(test_data['target_datetime']).dt.tz_localize(None)
        y_train = train_data.set_index('target_datetime')[target].astype(float)
        y_test = test_data.set_index('target_datetime')[target].astype(float)
        exog_train = train_data.set_index('target_datetime')[FEATURES].astype(float)
        exog_test = test_data.set_index('target_datetime')[FEATURES].astype(float)

        # Fit SARIMAX model with exogenous variables
        model = SARIMAX(
            y_train,
            exog=exog_train,
            order=order,
            seasonal_order=seasonal_order,
            enforce_stationarity=False,
            enforce_invertibility=False
        )
        model_fit = model.fit(disp=False)

        # Forecast for the test period
        n_test = len(y_test)
        forecast = model_fit.forecast(steps=n_test, exog=exog_test)
        y_pred = forecast.values

        # Sla de eerste 24 uur over
        if len(y_pred) > 24:
            y_pred = y_pred[24:]
            y_test = y_test[24:]
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan
            rmse_results.append({
                'iteration': i + 1,
                'run_date': run_date.strftime('%Y-%m-%d'),
                'valid_predictions': 0,
                'rmse': rmse
            })
            continue

        if len(y_pred) > 0:
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            rmse = np.nan

        print(f"RSME is {rmse:.4f} for day {i+1}")

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(y_pred),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(y_pred)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - SARIMAX Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-05-30 16:46:13,285 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-05-30 16:46:13,285 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-30 16:46:13,285 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)
2025-05-30 16:46:13,285 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-05-30 16:46:13,285 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-30 16:46:13,312 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-30 16:46:13,312 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

🔍 Testing SARIMAX Model - RMSE per forecast day


2025-05-30 16:46:13,538 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:46:13,538 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:46:13,613 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:46:13,613 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:46:13,696 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:46:13,713 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:46:13,713 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:46:13,713 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:46:13,786 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:46:13,790 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:46:13,790 - buil

RSME is 0.0759 for day 1
Day 1: ✅ 144 test rows, Run: 03-15


2025-05-30 16:49:21,705 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:49:21,705 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:49:21,805 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:49:21,805 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:49:21,888 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:49:21,888 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:49:21,888 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:49:21,904 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:49:21,946 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:49:21,946 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:49:21,946 - buil

RSME is 0.0475 for day 2
Day 2: ✅ 144 test rows, Run: 03-16


2025-05-30 16:55:35,537 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:55:35,545 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:55:35,670 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:55:35,678 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:55:35,807 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:55:35,820 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:55:35,820 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:55:35,824 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:55:35,888 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:55:35,890 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:55:35,895 - buil

RSME is 0.0744 for day 3
Day 3: ✅ 144 test rows, Run: 03-17


2025-05-30 16:57:29,360 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:57:29,362 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:57:29,568 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:57:29,570 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:57:29,748 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:57:29,751 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:57:29,751 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:57:29,755 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:57:29,816 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:57:29,821 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:57:29,821 - buil

RSME is 0.0374 for day 4
Day 4: ✅ 144 test rows, Run: 03-18


2025-05-30 16:59:16,355 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 16:59:16,355 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 16:59:16,466 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 16:59:16,474 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 16:59:16,587 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 16:59:16,587 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 16:59:16,595 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 16:59:16,603 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 16:59:16,659 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 16:59:16,667 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 16:59:16,667 - buil

RSME is 0.0369 for day 5
Day 5: ✅ 144 test rows, Run: 03-19


2025-05-30 17:01:02,110 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:01:02,110 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:01:02,224 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:01:02,232 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:01:02,350 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:01:02,350 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:01:02,359 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:01:02,367 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:01:02,419 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:01:02,419 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:01:02,427 - buil

RSME is 0.0450 for day 6
Day 6: ✅ 144 test rows, Run: 03-20


2025-05-30 17:02:48,973 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:02:48,975 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:02:49,107 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:02:49,107 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:02:49,224 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:02:49,224 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:02:49,224 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:02:49,232 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:02:49,290 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:02:49,290 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:02:49,298 - buil

RSME is 0.0466 for day 7
Day 7: ✅ 144 test rows, Run: 03-21


2025-05-30 17:04:36,091 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:04:36,091 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:04:36,216 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:04:36,216 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:04:36,341 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:04:36,341 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:04:36,341 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:04:36,349 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:04:36,417 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:04:36,417 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:04:36,425 - buil

RSME is 0.2150 for day 8
Day 8: ✅ 144 test rows, Run: 03-22


2025-05-30 17:06:24,121 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:06:24,121 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:06:24,235 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:06:24,244 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:06:24,352 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:06:24,361 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:06:24,361 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:06:24,369 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:06:24,438 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:06:24,446 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:06:24,446 - buil

RSME is 0.0313 for day 9
Day 9: ✅ 144 test rows, Run: 03-23


2025-05-30 17:08:20,510 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:08:20,517 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:08:20,651 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:08:20,651 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:08:20,772 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:08:20,772 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:08:20,780 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:08:20,788 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:08:20,845 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:08:20,845 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:08:20,845 - buil

RSME is 0.0749 for day 10
Day 10: ✅ 144 test rows, Run: 03-24


2025-05-30 17:10:19,572 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:10:19,572 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:10:19,695 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:10:19,695 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:10:19,813 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:10:19,813 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:10:19,813 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:10:19,821 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:10:19,880 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:10:19,888 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:10:19,888 - buil

RSME is 0.0424 for day 11
Day 11: ✅ 144 test rows, Run: 03-25


2025-05-30 17:17:12,269 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:17:12,277 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:17:12,396 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:17:12,396 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:17:12,526 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:17:12,526 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:17:12,526 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:17:12,534 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:17:12,595 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:17:12,595 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:17:12,603 - buil

RSME is 0.0407 for day 12
Day 12: ✅ 144 test rows, Run: 03-26


2025-05-30 17:19:03,504 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:19:03,513 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:19:03,636 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:19:03,636 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:19:03,753 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:19:03,753 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:19:03,753 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:19:03,761 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:19:03,874 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:19:03,885 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:19:03,892 - buil

RSME is 0.0609 for day 13
Day 13: ✅ 144 test rows, Run: 03-27


2025-05-30 17:26:15,349 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 17:26:15,360 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 17:26:15,476 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 17:26:15,476 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 17:26:15,590 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 17:26:15,598 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 17:26:15,598 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 17:26:15,606 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 17:26:15,662 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 17:26:15,662 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 17:26:15,670 - buil

RSME is 0.0575 for day 14
Day 14: ✅ 144 test rows, Run: 03-28


2025-05-30 18:11:46,791 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:11:46,794 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:11:46,949 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:11:46,953 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:11:47,090 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:11:47,091 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:11:47,222 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:11:47,232 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:11:47,234 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:11:47,236 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-3

RSME is 0.0804 for day 15
Day 15: ✅ 144 test rows, Run: 03-29


2025-05-30 18:19:33,828 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:19:33,952 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:19:33,954 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:19:34,074 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:19:34,076 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:19:34,196 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:19:34,207 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:19:34,207 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:19:34,216 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 18:19:34,274 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 18:19:34,276 -

RSME is 0.2069 for day 16
Day 16: ✅ 144 test rows, Run: 03-30


2025-05-30 18:27:47,480 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:27:47,620 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:27:47,622 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:27:47,746 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:27:47,746 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:27:47,871 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:27:47,871 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:27:47,871 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:27:47,889 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 18:27:47,970 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 18:27:47,970 -

RSME is 0.0561 for day 17
Day 17: ✅ 144 test rows, Run: 03-31


2025-05-30 18:35:59,355 - build_training_set - INFO - ✅ Actuals loaded: 1752 rows with 21 selected columns
2025-05-30 18:35:59,358 - build_training_set - INFO - 🔍 Loading forecast/prediction data...
2025-05-30 18:35:59,398 - build_training_set - INFO - 📊 Forecast rows available: 169
2025-05-30 18:35:59,404 - build_training_set - INFO - 📋 Common columns for predictions: 17 - ['target_datetime', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'yearday_cos', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-30 18:35:59,438 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 18:35:59,442 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:35:59,442 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding tar

RSME is 0.0599 for day 18
Day 18: ✅ 144 test rows, Run: 04-01


2025-05-30 18:43:46,020 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 18:43:46,030 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:43:46,032 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 18:43:46,033 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:43:46,037 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:43:46,179 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:43:46,195 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:43:46,337 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:43:46,337 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:43:46,470 - build_training_s

RSME is 0.0455 for day 19
Day 19: ✅ 144 test rows, Run: 04-02


2025-05-30 18:45:35,200 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 18:45:35,200 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:45:35,206 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 18:45:35,211 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:45:35,213 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:45:35,338 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:45:35,338 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:45:35,477 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:45:35,477 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:45:35,653 - build_training_s

RSME is 0.0877 for day 20
Day 20: ✅ 144 test rows, Run: 04-03


2025-05-30 18:46:33,363 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:46:33,367 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 18:46:33,370 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:46:33,370 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:46:33,518 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:46:33,518 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:46:33,634 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:46:33,643 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:46:33,768 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:46:33,768 - build_training_set - I

RSME is 0.0322 for day 21
Day 21: ✅ 144 test rows, Run: 04-04


2025-05-30 18:47:32,241 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 18:47:32,247 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:47:32,251 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 18:47:32,254 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:47:32,256 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:47:32,411 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:47:32,416 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:47:32,553 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:47:32,553 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:47:32,678 - build_training_s

RSME is 0.0233 for day 22
Day 22: ✅ 144 test rows, Run: 04-05


2025-05-30 18:48:28,527 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:48:28,527 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:48:28,607 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:48:28,610 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:48:28,676 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:48:28,676 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:48:28,676 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:48:28,694 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 18:48:28,732 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 18:48:28,732 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 18:48:28,737 - buil

RSME is 0.0873 for day 23
Day 23: ✅ 144 test rows, Run: 04-06


2025-05-30 18:49:15,334 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 18:49:15,341 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:49:15,341 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 18:49:15,350 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:49:15,350 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:49:15,492 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:49:15,492 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:49:15,623 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:49:15,623 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:49:15,750 - build_training_s

RSME is 0.0250 for day 24
Day 24: ✅ 144 test rows, Run: 04-07


2025-05-30 18:50:09,847 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:50:09,847 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:50:09,982 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:50:09,982 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:50:10,099 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:50:10,116 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:50:10,116 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:50:10,116 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 18:50:10,183 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 18:50:10,183 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 18:50:10,191 - buil

RSME is 0.0376 for day 25
Day 25: ✅ 144 test rows, Run: 04-08


2025-05-30 18:51:06,024 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 18:51:06,032 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:51:06,032 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 18:51:06,032 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:51:06,037 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:51:06,258 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:51:06,258 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:51:06,378 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:51:06,378 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:51:06,496 - build_training_s

RSME is 0.0285 for day 26
Day 26: ✅ 144 test rows, Run: 04-09


2025-05-30 18:52:03,542 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:52:03,542 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:52:03,658 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:52:03,667 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:52:03,789 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:52:03,789 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:52:03,789 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:52:03,798 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 18:52:03,864 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 18:52:03,864 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 18:52:03,873 - buil

RSME is 0.0271 for day 27
Day 27: ✅ 144 test rows, Run: 04-10


2025-05-30 18:52:51,808 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 18:52:51,816 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 18:52:51,816 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 18:52:51,824 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 18:52:51,947 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:52:51,955 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:52:52,074 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:52:52,074 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:52:52,183 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:52:52,191 - build_training_set - I

RSME is 0.0286 for day 28
Day 28: ✅ 144 test rows, Run: 04-11


2025-05-30 18:53:41,019 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:53:41,019 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:53:41,142 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:53:41,142 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:53:41,259 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:53:41,268 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:53:41,268 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:53:41,276 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 18:53:41,341 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 18:53:41,341 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 18:53:41,350 - buil

RSME is 0.0326 for day 29
Day 29: ✅ 144 test rows, Run: 04-12


2025-05-30 18:54:37,961 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 18:54:37,961 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 18:54:38,084 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 18:54:38,084 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 18:54:38,203 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 18:54:38,203 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 18:54:38,203 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 18:54:38,220 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 18:54:38,273 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 18:54:38,273 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 18:54:38,281 - buil

RSME is 0.0641 for day 30
Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - SARIMAX Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.08
         2 2025-03-16                144  0.05
         3 2025-03-17                144  0.07
         4 2025-03-18                144  0.04
         5 2025-03-19                144  0.04
         6 2025-03-20                144  0.04
         7 2025-03-21                144  0.05
         8 2025-03-22                144  0.22
         9 2025-03-23                144  0.03
        10 2025-03-24                144  0.07
        11 2025-03-25                144  0.04
        12 2025-03-26                144  0.04
        13 2025-03-27                144  0.06
        14 2025-03-28                144  0.06
        15 2025-03-29                144  0.08
        16 2025-03-30                144  0.21
        17 2025-03-31                144  0.06
        18 2025-04-01                