In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path
import warnings

warnings.filterwarnings("ignore")

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing SARIMA Model - RMSE per forecast day")
print("=" * 60)

# SARIMA parameters (example: adjust as needed)
order = (1, 1, 1)
seasonal_order = (1, 1, 1, 24)  # 24 for hourly data with daily seasonality

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df['target_datetime'] <= run_date_utc]
        test_data = df[df['target_datetime'] > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=['target_datetime', target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Prepare data for SARIMA
        # Zorg dat index datetime64[ns] en zonder tz is
        train_data['target_datetime'] = pd.to_datetime(train_data['target_datetime']).dt.tz_localize(None)
        test_data['target_datetime'] = pd.to_datetime(test_data['target_datetime']).dt.tz_localize(None)
        y_train = train_data.set_index('target_datetime')[target].astype(float)
        y_test = test_data.set_index('target_datetime')[target].astype(float)

        # Fit SARIMA model
        model = SARIMAX(y_train, order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
        model_fit = model.fit(disp=False)

        # Forecast for the test period
        n_test = len(y_test)
        forecast = model_fit.forecast(steps=n_test)
        y_pred = forecast.values

        # Sla de eerste 24 uur over
        if len(y_pred) > 24:
            y_pred = y_pred[24:]
            y_test = y_test[24:]
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan
            rmse_results.append({
                'iteration': i + 1,
                'run_date': run_date.strftime('%Y-%m-%d'),
                'valid_predictions': 0,
                'rmse': rmse
            })
            continue

        if len(y_pred) > 0:
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            rmse = np.nan

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(y_pred),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(y_pred)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - SARIMA Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-05-31 20:47:23,631 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-05-31 20:47:23,631 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:47:23,632 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)
2025-05-31 20:47:23,632 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-05-31 20:47:23,632 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 20:47:23,633 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 20:47:23,634 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

🔍 Testing SARIMA Model - RMSE per forecast day


2025-05-31 20:47:34,636 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-16 00:00:00+00:00 for lagging support
2025-05-31 20:47:34,637 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:47:34,638 - build_training_set - INFO - 🧠 Actuals van 2025-01-02 00:00:00+00:00 t/m 2025-03-15 23:00:00+00:00 (extended to 2025-03-16 00:00:00+00:00 for lagging)
2025-05-31 20:47:34,639 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-16 00:00:00+00:00, normalized to 2025-03-16 00:00:00+00:00 for DB lookup, target range: 2025-03-16 00:00:00+00:00 → 2025-03-23 00:00:00+00:00
2025-05-31 20:47:34,640 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 20:47:34,642 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 20:47:34,643 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

Day 1: ✅ 144 test rows, Run: 03-15


2025-05-31 20:47:34,844 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 20:47:34,845 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 20:47:34,847 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 20:47:34,848 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 20:47:34,850 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 20:47:34,851 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 20:47:34,852 - build_training_set - INFO - 📅 Date range: 2025-01-02 00:00:00+00:00 →

Day 2: ✅ 144 test rows, Run: 03-16


2025-05-31 20:47:44,517 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-05-31 20:47:44,520 - build_training_set - INFO - 🔒 Connection closed
2025-05-31 20:47:57,117 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-18 00:00:00+00:00 for lagging support
2025-05-31 20:47:57,118 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:47:57,118 - build_training_set - INFO - 🧠 Actuals van 2025-01-04 00:00:00+00:00 t/m 2025-03-17 23:00:00+00:00 (extended to 2025-03-18 00:00:00+00:00 for lagging)
2025-05-31 20:47:57,121 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-18 00:00:00+00:00, normalized to 2025-03-18 00:00:00+00:00 for DB lookup, target range: 2025-03-18 00:00:00+00:00 → 2025-03-25 00:00:00+00:00
2025-05-31 20:47:57,122 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 20:47:57,124 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 20:47:

Day 3: ✅ 144 test rows, Run: 03-17


2025-05-31 20:47:57,323 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-05-31 20:47:57,325 - build_training_set - INFO - 🔒 Connection closed
2025-05-31 20:48:03,893 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-19 00:00:00+00:00 for lagging support
2025-05-31 20:48:03,893 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:48:03,894 - build_training_set - INFO - 🧠 Actuals van 2025-01-05 00:00:00+00:00 t/m 2025-03-18 23:00:00+00:00 (extended to 2025-03-19 00:00:00+00:00 for lagging)
2025-05-31 20:48:03,895 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-19 00:00:00+00:00, normalized to 2025-03-19 00:00:00+00:00 for DB lookup, target range: 2025-03-19 00:00:00+00:00 → 2025-03-26 00:00:00+00:00
2025-05-31 20:48:03,897 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 20:48:03,901 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 20:48:

Day 4: ✅ 144 test rows, Run: 03-18


2025-05-31 20:48:04,097 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 20:48:04,099 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 20:48:04,101 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 20:48:04,103 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 20:48:04,104 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 20:48:04,105 - build_training_set - INFO - 📅 Date range: 2025-01-05 00:00:00+00:00 → 2025-03-26 00:00:00+00:00
2025-05-31 20:48:04,107 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (

Day 5: ✅ 144 test rows, Run: 03-19


2025-05-31 20:48:13,367 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-05-31 20:48:13,369 - build_training_set - INFO - 🔒 Connection closed
2025-05-31 20:48:21,721 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-21 00:00:00+00:00 for lagging support
2025-05-31 20:48:21,721 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:48:21,722 - build_training_set - INFO - 🧠 Actuals van 2025-01-07 00:00:00+00:00 t/m 2025-03-20 23:00:00+00:00 (extended to 2025-03-21 00:00:00+00:00 for lagging)
2025-05-31 20:48:21,723 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-21 00:00:00+00:00, normalized to 2025-03-21 00:00:00+00:00 for DB lookup, target range: 2025-03-21 00:00:00+00:00 → 2025-03-28 00:00:00+00:00
2025-05-31 20:48:21,725 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 20:48:21,727 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 20:48:

Day 6: ✅ 144 test rows, Run: 03-20


2025-05-31 20:48:30,398 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-22 00:00:00+00:00 for lagging support
2025-05-31 20:48:30,399 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:48:30,400 - build_training_set - INFO - 🧠 Actuals van 2025-01-08 00:00:00+00:00 t/m 2025-03-21 23:00:00+00:00 (extended to 2025-03-22 00:00:00+00:00 for lagging)
2025-05-31 20:48:30,401 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-22 00:00:00+00:00, normalized to 2025-03-22 00:00:00+00:00 for DB lookup, target range: 2025-03-22 00:00:00+00:00 → 2025-03-29 00:00:00+00:00
2025-05-31 20:48:30,403 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 20:48:30,405 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 20:48:30,406 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

Day 7: ✅ 144 test rows, Run: 03-21


2025-05-31 20:48:30,598 - build_training_set - INFO - ✅ All columns have good data quality (<20% NaN)
2025-05-31 20:48:30,613 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-05-31 20:48:30,614 - build_training_set - INFO - 🔒 Connection closed
2025-05-31 20:48:40,221 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-23 00:00:00+00:00 for lagging support
2025-05-31 20:48:40,222 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:48:40,223 - build_training_set - INFO - 🧠 Actuals van 2025-01-09 00:00:00+00:00 t/m 2025-03-22 23:00:00+00:00 (extended to 2025-03-23 00:00:00+00:00 for lagging)
2025-05-31 20:48:40,224 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-23 00:00:00+00:00, normalized to 2025-03-23 00:00:00+00:00 for DB lookup, target range: 2025-03-23 00:00:00+00:00 → 2025-03-30 00:00:00+00:00
2025-05-31 20:48:40,226 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
20

Day 8: ✅ 144 test rows, Run: 03-22


2025-05-31 20:48:40,423 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 20:48:40,425 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 20:48:40,426 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 20:48:40,427 - build_training_set - INFO - 📅 Date range: 2025-01-09 00:00:00+00:00 → 2025-03-30 00:00:00+00:00
2025-05-31 20:48:40,428 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (0.0%)
2025-05-31 20:48:40,430 - build_training_set - INFO - ✅ All columns have good data quality (<20% NaN)
2025-05-31 20:48:40,449 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-05-3

Day 9: ✅ 144 test rows, Run: 03-23


2025-05-31 20:48:49,295 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-05-31 20:48:49,299 - build_training_set - INFO - 🔒 Connection closed
2025-05-31 20:48:59,470 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-25 00:00:00+00:00 for lagging support
2025-05-31 20:48:59,470 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 20:48:59,471 - build_training_set - INFO - 🧠 Actuals van 2025-01-11 00:00:00+00:00 t/m 2025-03-24 23:00:00+00:00 (extended to 2025-03-25 00:00:00+00:00 for lagging)
2025-05-31 20:48:59,473 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-25 00:00:00+00:00, normalized to 2025-03-25 00:00:00+00:00 for DB lookup, target range: 2025-03-25 00:00:00+00:00 → 2025-04-01 00:00:00+00:00
2025-05-31 20:48:59,474 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 20:48:59,476 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 20:48:

Day 10: ✅ 144 test rows, Run: 03-24


2025-05-31 20:48:59,686 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-05-31 20:48:59,687 - build_training_set - INFO - 🔒 Connection closed
2025-05-31 21:05:07,750 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-26 00:00:00+00:00 for lagging support
2025-05-31 21:05:07,760 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-31 21:05:07,763 - build_training_set - INFO - 🧠 Actuals van 2025-01-12 00:00:00+00:00 t/m 2025-03-25 23:00:00+00:00 (extended to 2025-03-26 00:00:00+00:00 for lagging)
2025-05-31 21:05:07,765 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-26 00:00:00+00:00, normalized to 2025-03-26 00:00:00+00:00 for DB lookup, target range: 2025-03-26 00:00:00+00:00 → 2025-04-02 00:00:00+00:00
2025-05-31 21:05:07,767 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-31 21:05:07,771 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-31 21:05:

Day 11: ✅ 144 test rows, Run: 03-25


2025-05-31 21:05:07,971 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-31 21:05:07,973 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-31 21:05:07,974 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-31 21:05:07,976 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-31 21:05:07,992 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 21:05:07,993 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 21:05:07,994 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 21:05:07,995 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 21:05:07,996 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 21:05:07,997 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

Day 12: ✅ 144 test rows, Run: 03-26


2025-05-31 21:05:25,169 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 21:05:25,170 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 21:05:25,171 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 21:05:25,172 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 21:05:25,173 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 21:05:25,173 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 21:05:25,174 - build_training_set - INFO - 📅 Date range: 2025-01-13 00:00:00+00:00 →

Day 13: ✅ 144 test rows, Run: 03-27


2025-05-31 21:20:39,289 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 21:20:39,291 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 21:20:39,293 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 21:20:39,294 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 21:20:39,295 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 21:20:39,296 - build_training_set - INFO - 📅 Date range: 2025-01-14 00:00:00+00:00 → 2025-04-04 00:00:00+00:00
2025-05-31 21:20:39,299 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (

Day 14: ✅ 144 test rows, Run: 03-28


2025-05-31 21:53:51,995 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 21:53:51,996 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 21:53:51,998 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 21:53:51,999 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 21:53:52,001 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 21:53:52,001 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 21:53:52,002 - build_training_set - INFO - 📅 Date range: 2025-01-15 00:00:00+00:00 →

Day 15: ✅ 144 test rows, Run: 03-29


2025-05-31 22:27:48,757 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-31 22:27:48,758 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-31 22:27:48,758 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-31 22:27:48,759 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-31 22:27:48,772 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 22:27:48,773 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 22:27:48,774 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 22:27:48,775 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 22:27:48,776 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 22:27:48,776 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

Day 16: ✅ 144 test rows, Run: 03-30


2025-05-31 22:50:00,772 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 22:50:00,781 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 22:50:00,798 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 22:50:00,799 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 22:50:00,800 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 22:50:00,800 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 22:50:00,801 - build_training_set - INFO - 📅 Date range: 2025-01-17 00:00:00+00:00 →

Day 17: ✅ 144 test rows, Run: 03-31


2025-05-31 23:24:36,216 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 23:24:36,217 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 23:24:36,218 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 23:24:36,219 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 23:24:36,220 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 23:24:36,220 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 23:24:36,221 - build_training_set - INFO - 📅 Date range: 2025-01-18 00:00:00+00:00 →

Day 18: ✅ 144 test rows, Run: 04-01


2025-05-31 23:42:02,424 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-31 23:42:02,425 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-31 23:42:02,428 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-31 23:42:02,430 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-31 23:42:02,431 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-05-31 23:42:02,431 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-05-31 23:42:02,431 - build_training_set - INFO - 📅 Date range: 2025-01-19 00:00:00+00:00 →

Day 19: ✅ 144 test rows, Run: 04-02


2025-06-01 00:13:38,275 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-04 00:00:00+00:00 for lagging support
2025-06-01 00:13:38,276 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 00:13:38,277 - build_training_set - INFO - 🧠 Actuals van 2025-01-21 00:00:00+00:00 t/m 2025-04-03 23:00:00+00:00 (extended to 2025-04-04 00:00:00+00:00 for lagging)
2025-06-01 00:13:38,278 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-04 00:00:00+00:00, normalized to 2025-04-04 00:00:00+00:00 for DB lookup, target range: 2025-04-04 00:00:00+00:00 → 2025-04-11 00:00:00+00:00
2025-06-01 00:13:38,279 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 00:13:38,284 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 00:13:38,284 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

Day 20: ✅ 144 test rows, Run: 04-03


2025-06-01 00:47:12,462 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-05 00:00:00+00:00 for lagging support
2025-06-01 00:47:12,475 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 00:47:12,476 - build_training_set - INFO - 🧠 Actuals van 2025-01-22 00:00:00+00:00 t/m 2025-04-04 23:00:00+00:00 (extended to 2025-04-05 00:00:00+00:00 for lagging)
2025-06-01 00:47:12,477 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-05 00:00:00+00:00, normalized to 2025-04-05 00:00:00+00:00 for DB lookup, target range: 2025-04-05 00:00:00+00:00 → 2025-04-12 00:00:00+00:00
2025-06-01 00:47:12,477 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 00:47:12,479 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 00:47:12,480 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

Day 21: ✅ 144 test rows, Run: 04-04


2025-06-01 00:47:12,661 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 00:47:12,664 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 00:47:12,682 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 00:47:12,683 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 00:47:12,684 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 00:47:12,686 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 00:47:12,686 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 00:47:12,687 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10

Day 22: ✅ 144 test rows, Run: 04-05


2025-06-01 01:19:33,063 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 01:19:33,063 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 01:19:33,065 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 01:19:33,065 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 01:19:33,066 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 01:19:33,066 - build_training_set - INFO - 📅 Date range: 2025-01-23 00:00:00+00:00 → 2025-04-13 00:00:00+00:00
2025-06-01 01:19:33,066 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (

Day 23: ✅ 144 test rows, Run: 04-06


2025-06-01 02:09:58,814 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-08 00:00:00+00:00 for lagging support
2025-06-01 02:09:58,816 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 02:09:58,816 - build_training_set - INFO - 🧠 Actuals van 2025-01-25 00:00:00+00:00 t/m 2025-04-07 23:00:00+00:00 (extended to 2025-04-08 00:00:00+00:00 for lagging)
2025-06-01 02:09:58,817 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-08 00:00:00+00:00, normalized to 2025-04-08 00:00:00+00:00 for DB lookup, target range: 2025-04-08 00:00:00+00:00 → 2025-04-15 00:00:00+00:00
2025-06-01 02:09:58,818 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 02:09:58,820 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 02:09:58,821 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

Day 24: ✅ 144 test rows, Run: 04-07


2025-06-01 02:09:59,035 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 02:09:59,036 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 02:09:59,042 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 02:09:59,044 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 02:09:59,045 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 02:09:59,045 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 02:09:59,045 - build_training_set - INFO - 📅 Date range: 2025-01-25 00:00:00+00:00 →

Day 25: ✅ 144 test rows, Run: 04-08


2025-06-01 02:26:10,657 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-01 02:26:10,659 - build_training_set - INFO - 🔒 Connection closed
2025-06-01 03:01:23,692 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-10 00:00:00+00:00 for lagging support
2025-06-01 03:01:23,693 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 03:01:23,695 - build_training_set - INFO - 🧠 Actuals van 2025-01-27 00:00:00+00:00 t/m 2025-04-09 23:00:00+00:00 (extended to 2025-04-10 00:00:00+00:00 for lagging)
2025-06-01 03:01:23,698 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-10 00:00:00+00:00, normalized to 2025-04-10 00:00:00+00:00 for DB lookup, target range: 2025-04-10 00:00:00+00:00 → 2025-04-17 00:00:00+00:00
2025-06-01 03:01:23,699 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 03:01:23,702 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 03:01:

Day 26: ✅ 144 test rows, Run: 04-09


2025-06-01 03:01:23,908 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-01 03:01:23,909 - build_training_set - INFO - 🔒 Connection closed
2025-06-01 03:33:06,977 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-11 00:00:00+00:00 for lagging support
2025-06-01 03:33:06,977 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 03:33:06,978 - build_training_set - INFO - 🧠 Actuals van 2025-01-28 00:00:00+00:00 t/m 2025-04-10 23:00:00+00:00 (extended to 2025-04-11 00:00:00+00:00 for lagging)
2025-06-01 03:33:06,979 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-11 00:00:00+00:00, normalized to 2025-04-11 00:00:00+00:00 for DB lookup, target range: 2025-04-11 00:00:00+00:00 → 2025-04-18 00:00:00+00:00
2025-06-01 03:33:06,980 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 03:33:06,981 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 03:33:

Day 27: ✅ 144 test rows, Run: 04-10


2025-06-01 03:33:07,207 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 03:33:07,208 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 03:33:07,208 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 03:33:07,210 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 03:33:07,222 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 03:33:07,222 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 03:33:07,223 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 03:33:07,225 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 03:33:07,226 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 03:33:07,226 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

Day 28: ✅ 144 test rows, Run: 04-11


2025-06-01 03:33:19,150 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 03:33:19,151 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 03:33:19,152 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 03:33:19,154 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 03:33:19,155 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 03:33:19,155 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 03:33:19,155 - build_training_set - INFO - 📅 Date range: 2025-01-29 00:00:00+00:00 →

Day 29: ✅ 144 test rows, Run: 04-12
Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - SARIMA Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.05
         2 2025-03-16                144  0.04
         3 2025-03-17                144  0.06
         4 2025-03-18                144  0.04
         5 2025-03-19                144  0.04
         6 2025-03-20                144  0.05
         7 2025-03-21                144  0.04
         8 2025-03-22                144  0.12
         9 2025-03-23                144  0.02
        10 2025-03-24                144  0.05
        11 2025-03-25                144  0.04
        12 2025-03-26                144  0.05
        13 2025-03-27                144  0.05
        14 2025-03-28                144  0.04
        15 2025-03-29                144  0.06
        16 2025-03-30                144  0.11
        17 2025-03-31                144  0.04
        18 2025-04-01       

In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path
import warnings

warnings.filterwarnings("ignore")

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

FEATURES = [
    'Load', 'shortwave_radiation', 'temperature_2m', 
    'direct_normal_irradiance', 'diffuse_radiation', 
    'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 
    'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 
    'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 
    'hour_sin', 'weekday_cos'
]
target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing SARIMAX Model - RMSE per forecast day")
print("=" * 60)

# SARIMAX parameters (example: adjust as needed)
order = (1, 1, 1)
seasonal_order = (1, 1, 1, 24)  # 24 for hourly data with daily seasonality

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df['target_datetime'] <= run_date_utc]
        test_data = df[df['target_datetime'] > run_date_utc]

        # Drop any missing data in training and test
        train_data = train_data.dropna(subset=['target_datetime', target] + FEATURES)
        test_data = test_data.dropna(subset=['target_datetime', target] + FEATURES)

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Prepare data for SARIMAX
        train_data['target_datetime'] = pd.to_datetime(train_data['target_datetime']).dt.tz_localize(None)
        test_data['target_datetime'] = pd.to_datetime(test_data['target_datetime']).dt.tz_localize(None)
        y_train = train_data.set_index('target_datetime')[target].astype(float)
        y_test = test_data.set_index('target_datetime')[target].astype(float)
        exog_train = train_data.set_index('target_datetime')[FEATURES].astype(float)
        exog_test = test_data.set_index('target_datetime')[FEATURES].astype(float)

        # Fit SARIMAX model with exogenous variables
        model = SARIMAX(
            y_train,
            exog=exog_train,
            order=order,
            seasonal_order=seasonal_order,
            enforce_stationarity=False,
            enforce_invertibility=False
        )
        model_fit = model.fit(disp=False)

        # Forecast for the test period
        n_test = len(y_test)
        forecast = model_fit.forecast(steps=n_test, exog=exog_test)
        y_pred = forecast.values

        # Sla de eerste 24 uur over
        if len(y_pred) > 24:
            y_pred = y_pred[24:]
            y_test = y_test[24:]
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan
            rmse_results.append({
                'iteration': i + 1,
                'run_date': run_date.strftime('%Y-%m-%d'),
                'valid_predictions': 0,
                'rmse': rmse
            })
            continue

        if len(y_pred) > 0:
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            rmse = np.nan

        print(f"RSME is {rmse:.4f} for day {i+1}")

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(y_pred),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(y_pred)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - SARIMAX Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-06-01 04:37:54,343 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-06-01 04:37:54,357 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 04:37:54,359 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)


🔍 Testing SARIMAX Model - RMSE per forecast day


2025-06-01 04:37:54,373 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-06-01 04:53:57,871 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 04:53:57,873 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 04:53:57,874 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 04:53:57,889 - build_training_set - INFO - ✅ Actuals loaded: 1752 rows with 21 selected columns
2025-06-01 04:53:57,890 - build_training_set - INFO - 🔍 Loading forecast/prediction 

RSME is 0.0759 for day 1
Day 1: ✅ 144 test rows, Run: 03-15


2025-06-01 08:00:30,003 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-06-01 08:00:30,004 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-06-01 08:00:30,031 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 08:00:30,032 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 08:00:30,032 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 08:00:30,034 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 08:00:30,047 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 08:00:30,047 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 08:00:30,049 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 08:00:30,051 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

RSME is 0.0475 for day 2
Day 2: ✅ 144 test rows, Run: 03-16


2025-06-01 09:13:23,084 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:13:23,085 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:13:23,085 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:13:23,087 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:13:23,098 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:13:23,099 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:13:23,100 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:13:23,101 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:13:23,101 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:13:23,101 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0744 for day 3
Day 3: ✅ 144 test rows, Run: 03-17


2025-06-01 09:13:59,499 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:13:59,501 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:13:59,503 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:13:59,506 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:13:59,507 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:13:59,509 - build_training_set - INFO - 📅 Date range: 2025-01-04 00:00:00+00:00 → 2025-03-25 00:00:00+00:00
2025-06-01 09:13:59,519 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (

RSME is 0.0374 for day 4
Day 4: ✅ 144 test rows, Run: 03-18


2025-06-01 09:14:59,805 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:14:59,806 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:14:59,806 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:14:59,809 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:14:59,823 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:14:59,824 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:14:59,825 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:14:59,827 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:14:59,828 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:14:59,828 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0369 for day 5
Day 5: ✅ 144 test rows, Run: 03-19


2025-06-01 09:16:00,108 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:16:00,109 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:16:00,111 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:16:00,115 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:16:00,116 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:16:00,117 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:16:00,117 - build_training_set - INFO - 📅 Date range: 2025-01-06 00:00:00+00:00 →

RSME is 0.0450 for day 6
Day 6: ✅ 144 test rows, Run: 03-20


2025-06-01 09:17:00,425 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:17:00,426 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:17:00,428 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:17:00,430 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:17:00,431 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:17:00,432 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:17:00,433 - build_training_set - INFO - 📅 Date range: 2025-01-07 00:00:00+00:00 →

RSME is 0.0466 for day 7
Day 7: ✅ 144 test rows, Run: 03-21


2025-06-01 09:17:59,172 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:17:59,173 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:17:59,175 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:17:59,176 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:17:59,176 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:17:59,176 - build_training_set - INFO - 📅 Date range: 2025-01-08 00:00:00+00:00 → 2025-03-29 00:00:00+00:00
2025-06-01 09:17:59,177 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (

RSME is 0.2150 for day 8
Day 8: ✅ 144 test rows, Run: 03-22


2025-06-01 09:18:58,746 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:18:58,750 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:18:58,782 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:18:58,782 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:18:58,783 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:18:58,784 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:18:58,785 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:18:58,785 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10

RSME is 0.0313 for day 9
Day 9: ✅ 144 test rows, Run: 03-23


2025-06-01 09:23:24,220 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:23:24,220 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:23:24,220 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:23:24,222 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:23:24,234 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:23:24,235 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:23:24,236 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:23:24,237 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:23:24,237 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:23:24,237 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0749 for day 10
Day 10: ✅ 144 test rows, Run: 03-24


2025-06-01 09:24:26,809 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:24:26,810 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:24:26,813 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:24:26,838 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:24:26,841 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:24:26,844 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:24:26,846 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:24:26,847 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:24:26,847 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radi

RSME is 0.0424 for day 11
Day 11: ✅ 144 test rows, Run: 03-25


2025-06-01 09:27:53,943 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:27:53,946 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:27:53,968 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:27:53,968 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:27:53,969 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:27:53,970 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:27:53,971 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:27:53,971 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10

RSME is 0.0407 for day 12
Day 12: ✅ 144 test rows, Run: 03-26


2025-06-01 09:28:28,887 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-06-01 09:28:28,915 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:28:28,916 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:28:28,916 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:28:28,917 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:28:28,929 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:28:28,929 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:28:28,930 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:28:28,932 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:28:28,932 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 column

RSME is 0.0609 for day 13
Day 13: ✅ 144 test rows, Run: 03-27


2025-06-01 09:32:00,957 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:32:00,959 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:32:00,961 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:32:00,962 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:32:00,962 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:32:00,963 - build_training_set - INFO - 📅 Date range: 2025-01-14 00:00:00+00:00 → 2025-04-04 00:00:00+00:00
2025-06-01 09:32:00,963 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (

RSME is 0.0575 for day 14
Day 14: ✅ 144 test rows, Run: 03-28


2025-06-01 09:35:40,357 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:35:40,358 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:35:40,359 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:35:40,366 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:35:40,382 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:35:40,383 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:35:40,385 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:35:40,386 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:35:40,387 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:35:40,387 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0804 for day 15
Day 15: ✅ 144 test rows, Run: 03-29


2025-06-01 09:43:23,477 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-31 00:00:00+00:00 for lagging support
2025-06-01 09:43:23,478 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 09:43:23,479 - build_training_set - INFO - 🧠 Actuals van 2025-01-17 00:00:00+00:00 t/m 2025-03-30 23:00:00+00:00 (extended to 2025-03-31 00:00:00+00:00 for lagging)
2025-06-01 09:43:23,479 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-31 00:00:00+00:00, normalized to 2025-03-31 00:00:00+00:00 for DB lookup, target range: 2025-03-31 00:00:00+00:00 → 2025-04-07 00:00:00+00:00
2025-06-01 09:43:23,480 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 09:43:23,482 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 09:43:23,482 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

RSME is 0.2069 for day 16
Day 16: ✅ 144 test rows, Run: 03-30


2025-06-01 09:43:23,690 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-01 09:43:23,691 - build_training_set - INFO - 🔒 Connection closed
2025-06-01 09:47:32,740 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-01 00:00:00+00:00 for lagging support
2025-06-01 09:47:32,752 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 09:47:32,752 - build_training_set - INFO - 🧠 Actuals van 2025-01-18 00:00:00+00:00 t/m 2025-03-31 23:00:00+00:00 (extended to 2025-04-01 00:00:00+00:00 for lagging)
2025-06-01 09:47:32,752 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-01 00:00:00+00:00, normalized to 2025-04-01 00:00:00+00:00 for DB lookup, target range: 2025-04-01 00:00:00+00:00 → 2025-04-08 00:00:00+00:00
2025-06-01 09:47:32,753 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 09:47:32,755 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 09:47:

RSME is 0.0561 for day 17
Day 17: ✅ 144 test rows, Run: 03-31


2025-06-01 09:47:32,959 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-01 09:47:32,959 - build_training_set - INFO - 🔒 Connection closed
2025-06-01 09:50:59,826 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-02 00:00:00+00:00 for lagging support
2025-06-01 09:50:59,827 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 09:50:59,827 - build_training_set - INFO - 🧠 Actuals van 2025-01-19 00:00:00+00:00 t/m 2025-04-01 23:00:00+00:00 (extended to 2025-04-02 00:00:00+00:00 for lagging)
2025-06-01 09:50:59,828 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-02 00:00:00+00:00, normalized to 2025-04-02 00:00:00+00:00 for DB lookup, target range: 2025-04-02 00:00:00+00:00 → 2025-04-09 00:00:00+00:00
2025-06-01 09:50:59,829 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 09:50:59,831 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 09:50:

RSME is 0.0599 for day 18
Day 18: ✅ 144 test rows, Run: 04-01


2025-06-01 09:51:00,027 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:51:00,029 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:51:00,031 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:51:00,049 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:51:00,050 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:51:00,050 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:51:00,052 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:51:00,053 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:51:00,053 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radi

RSME is 0.0455 for day 19
Day 19: ✅ 144 test rows, Run: 04-02


2025-06-01 09:51:58,021 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:51:58,022 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:51:58,024 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:51:58,024 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:51:58,024 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:51:58,025 - build_training_set - INFO - 📅 Date range: 2025-01-20 00:00:00+00:00 → 2025-04-10 00:00:00+00:00
2025-06-01 09:51:58,025 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (

RSME is 0.0877 for day 20
Day 20: ✅ 144 test rows, Run: 04-03


2025-06-01 09:52:29,904 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:52:29,905 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:52:29,905 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:52:29,906 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:52:29,918 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:52:29,918 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:52:29,919 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:52:29,920 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:52:29,921 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:52:29,921 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0322 for day 21
Day 21: ✅ 144 test rows, Run: 04-04


2025-06-01 09:53:02,113 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:53:02,114 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:53:02,116 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:53:02,119 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:53:02,137 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:53:02,138 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:53:02,138 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:53:02,140 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:53:02,140 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:53:02,141 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0233 for day 22
Day 22: ✅ 144 test rows, Run: 04-05


2025-06-01 09:53:34,890 - build_training_set - INFO - ✅ All columns have good data quality (<20% NaN)
2025-06-01 09:53:34,901 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-01 09:53:34,902 - build_training_set - INFO - 🔒 Connection closed
2025-06-01 09:54:05,877 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-07 00:00:00+00:00 for lagging support
2025-06-01 09:54:05,877 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 09:54:05,878 - build_training_set - INFO - 🧠 Actuals van 2025-01-24 00:00:00+00:00 t/m 2025-04-06 23:00:00+00:00 (extended to 2025-04-07 00:00:00+00:00 for lagging)
2025-06-01 09:54:05,878 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-07 00:00:00+00:00, normalized to 2025-04-07 00:00:00+00:00 for DB lookup, target range: 2025-04-07 00:00:00+00:00 → 2025-04-14 00:00:00+00:00
2025-06-01 09:54:05,878 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
20

RSME is 0.0873 for day 23
Day 23: ✅ 144 test rows, Run: 04-06


2025-06-01 09:54:06,080 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:54:06,081 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:54:06,081 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:54:06,083 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:54:06,094 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:54:06,095 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:54:06,096 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:54:06,097 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:54:06,098 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:54:06,098 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0250 for day 24
Day 24: ✅ 144 test rows, Run: 04-07


2025-06-01 09:54:38,140 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:54:38,141 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:54:38,142 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:54:38,143 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:54:38,144 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:54:38,144 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:54:38,145 - build_training_set - INFO - 📅 Date range: 2025-01-25 00:00:00+00:00 →

RSME is 0.0376 for day 25
Day 25: ✅ 144 test rows, Run: 04-08


2025-06-01 09:55:12,544 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-01 09:55:12,550 - build_training_set - INFO - 🔒 Connection closed
2025-06-01 09:55:44,760 - build_training_set - INFO - 📅 Loading additional historical data until 2025-04-10 00:00:00+00:00 for lagging support
2025-06-01 09:55:44,761 - build_training_set - INFO - 🚀 Start build van trainingset
2025-06-01 09:55:44,775 - build_training_set - INFO - 🧠 Actuals van 2025-01-27 00:00:00+00:00 t/m 2025-04-09 23:00:00+00:00 (extended to 2025-04-10 00:00:00+00:00 for lagging)
2025-06-01 09:55:44,776 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-10 00:00:00+00:00, normalized to 2025-04-10 00:00:00+00:00 for DB lookup, target range: 2025-04-10 00:00:00+00:00 → 2025-04-17 00:00:00+00:00
2025-06-01 09:55:44,777 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-06-01 09:55:44,779 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-06-01 09:55:

RSME is 0.0285 for day 26
Day 26: ✅ 144 test rows, Run: 04-09


2025-06-01 09:55:44,973 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:55:44,974 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:55:44,975 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:55:44,980 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:55:45,014 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:55:45,014 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:55:45,015 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:55:45,017 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:55:45,018 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:55:45,018 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0271 for day 27
Day 27: ✅ 144 test rows, Run: 04-10


2025-06-01 09:56:18,960 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-06-01 09:56:18,962 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-06-01 09:56:18,962 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-06-01 09:56:18,965 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-06-01 09:56:18,980 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-06-01 09:56:18,980 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-06-01 09:56:18,981 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-06-01 09:56:18,983 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:56:18,983 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:56:18,984 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_dateti

RSME is 0.0286 for day 28
Day 28: ✅ 144 test rows, Run: 04-11


2025-06-01 09:56:48,290 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-06-01 09:56:48,291 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 columns
2025-06-01 09:56:48,291 - build_training_set - INFO - 🧾 Final columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos']
2025-06-01 09:56:48,291 - build_training_set - INFO - 📅 Date range: 2025-01-29 00:00:00+00:00 → 2025-04-19 00:00:00+00:00
2025-06-01 09:56:48,292 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (0.0%)
2025-06-01 09:56:48,293 - build_training_set - INFO - ✅ All columns have good data quality (<20% NaN)
2025-06-01 09:56:48,300 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-0

RSME is 0.0326 for day 29
Day 29: ✅ 144 test rows, Run: 04-12


2025-06-01 09:57:21,425 - build_training_set - INFO - 💰 Price NaN count: 0/1921 (0.0%)
2025-06-01 09:57:21,429 - build_training_set - INFO - ✅ All columns have good data quality (<20% NaN)
2025-06-01 09:57:21,440 - build_training_set - INFO - ✅ Saved as training_set in WARP.db
2025-06-01 09:57:21,441 - build_training_set - INFO - 🔒 Connection closed


RSME is 0.0641 for day 30
Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - SARIMAX Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.08
         2 2025-03-16                144  0.05
         3 2025-03-17                144  0.07
         4 2025-03-18                144  0.04
         5 2025-03-19                144  0.04
         6 2025-03-20                144  0.04
         7 2025-03-21                144  0.05
         8 2025-03-22                144  0.22
         9 2025-03-23                144  0.03
        10 2025-03-24                144  0.07
        11 2025-03-25                144  0.04
        12 2025-03-26                144  0.04
        13 2025-03-27                144  0.06
        14 2025-03-28                144  0.06
        15 2025-03-29                144  0.08
        16 2025-03-30                144  0.21
        17 2025-03-31                144  0.06
        18 2025-04-01                