In [3]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing Prophet Model - RMSE per forecast day")
print("=" * 60)

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df['target_datetime'] <= run_date_utc]
        test_data = df[df['target_datetime'] > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=['target_datetime', target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Prepare data for Prophet
        prophet_train = train_data.rename(columns={'target_datetime': 'ds', target: 'y'})[['ds', 'y']]
        prophet_train['ds'] = prophet_train['ds'].dt.tz_localize(None)
        prophet_test = test_data.rename(columns={'target_datetime': 'ds', target: 'y'})[['ds', 'y']]
        prophet_test['ds'] = prophet_test['ds'].dt.tz_localize(None)

        # Train Prophet model
        model = Prophet(daily_seasonality=True, yearly_seasonality=True, weekly_seasonality=True)
        model.fit(prophet_train)

        # Forecast for the test period
        future = prophet_test[['ds']]
        forecast = model.predict(future)
        y_pred = forecast['yhat'].values
        y_test = prophet_test['y'].values

        # Sla de eerste 24 uur over
        if len(y_pred) > 24:
            y_pred = y_pred[24:]
            y_test = y_test[24:]
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan
            rmse_results.append({
                'iteration': i + 1,
                'run_date': run_date.strftime('%Y-%m-%d'),
                'valid_predictions': 0,
                'rmse': rmse
            })
            continue

        if len(y_pred) > 0:
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            rmse = np.nan

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(y_pred),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(y_pred)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - Prophet Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-05-30 15:54:20,283 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-05-30 15:54:20,287 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-30 15:54:20,289 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)
2025-05-30 15:54:20,292 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-05-30 15:54:20,295 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-30 15:54:20,300 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-30 15:54:20,301 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

🔍 Testing Prophet Model - RMSE per forecast day


2025-05-30 15:54:20,473 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:54:20,473 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:54:20,473 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:54:20,473 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:54:20,564 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:20,570 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:20,651 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:20,657 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:20,814 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:20,814 - build_training_set - I

Day 1: ✅ 144 test rows, Run: 03-15


2025-05-30 15:54:22,394 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:22,402 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:22,533 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:22,533 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:22,609 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:22,609 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:22,609 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:22,617 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:22,674 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:22,683 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:22,689 - buil

Day 2: ✅ 144 test rows, Run: 03-16


2025-05-30 15:54:23,917 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:54:23,917 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:54:24,027 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:24,027 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:24,143 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:24,143 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:24,234 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:24,234 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:24,234 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:24,242 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-3

Day 3: ✅ 144 test rows, Run: 03-17


2025-05-30 15:54:25,482 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:25,593 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:25,601 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:25,700 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:25,700 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:25,700 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:25,709 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:25,747 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:25,755 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:25,757 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 4: ✅ 144 test rows, Run: 03-18


2025-05-30 15:54:26,832 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:26,908 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:26,909 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:27,010 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:27,010 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:27,010 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:27,018 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:27,049 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:27,049 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:27,049 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 5: ✅ 144 test rows, Run: 03-19


2025-05-30 15:54:28,138 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:28,220 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:28,220 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:28,220 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:28,228 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:28,271 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:28,271 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:28,271 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:54:28,280 - build_training_set - INFO - ✅ No overlap between actuals and predictions
2025-05-30 15:54:28,280 - build_training_set - INFO - 📦 Final combined table: 1921 rows, 21 column

Day 6: ✅ 144 test rows, Run: 03-20


2025-05-30 15:54:29,270 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:29,270 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:29,380 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:29,388 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:29,498 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:29,498 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:29,498 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:29,512 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:29,572 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:29,572 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:29,580 - buil

Day 7: ✅ 144 test rows, Run: 03-21


2025-05-30 15:54:30,986 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:31,116 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:31,118 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:31,190 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:31,192 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:31,193 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:31,201 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:31,276 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:31,279 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:31,282 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 8: ✅ 144 test rows, Run: 03-22


2025-05-30 15:54:32,550 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:32,550 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:32,633 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:32,633 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:32,641 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:32,643 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:32,677 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:32,677 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:32,677 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:54:32,687 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 9: ✅ 144 test rows, Run: 03-23


2025-05-30 15:54:33,639 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:33,760 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:33,760 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:33,841 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:33,850 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:33,850 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:33,858 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:33,922 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:33,922 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:33,922 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 10: ✅ 144 test rows, Run: 03-24


2025-05-30 15:54:35,024 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:35,024 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:35,144 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:35,151 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:35,151 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:35,159 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:35,211 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:35,211 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:35,219 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:54:35,219 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 11: ✅ 144 test rows, Run: 03-25


2025-05-30 15:54:36,244 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:36,244 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:36,353 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:36,353 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:36,353 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:36,361 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:36,420 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:36,420 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:36,428 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:54:36,437 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 12: ✅ 144 test rows, Run: 03-26


2025-05-30 15:54:37,761 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:37,761 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:37,906 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:37,906 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:38,088 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:38,091 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:38,091 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:38,097 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:38,148 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:38,148 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:38,157 - buil

Day 13: ✅ 144 test rows, Run: 03-27


2025-05-30 15:54:39,465 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:54:39,472 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:54:39,472 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:54:39,472 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:54:39,623 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:39,623 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:39,737 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:39,737 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:39,888 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:39,888 - build_training_set - I

Day 14: ✅ 144 test rows, Run: 03-28


2025-05-30 15:54:41,063 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:41,133 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:41,144 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:41,249 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:41,257 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:41,257 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:41,266 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:41,323 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:41,323 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:41,331 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 15: ✅ 144 test rows, Run: 03-29


2025-05-30 15:54:42,674 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:42,675 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:42,849 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:42,849 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:42,849 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:42,857 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:42,918 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:42,918 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:42,926 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:54:42,926 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 16: ✅ 144 test rows, Run: 03-30


2025-05-30 15:54:44,169 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:54:44,169 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:54:44,175 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:54:44,179 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:54:44,287 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:44,289 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:44,387 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:44,387 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:44,450 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:44,452 - build_training_set - I

Day 17: ✅ 144 test rows, Run: 03-31


2025-05-30 15:54:45,850 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:45,850 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:45,944 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:45,944 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:46,030 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:46,030 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:46,038 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:46,039 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:46,111 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:46,120 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:46,120 - buil

Day 18: ✅ 144 test rows, Run: 04-01


2025-05-30 15:54:47,277 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:47,425 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:47,425 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:47,552 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:47,552 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:47,552 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:47,560 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:47,610 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:47,610 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:47,619 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 19: ✅ 144 test rows, Run: 04-02


2025-05-30 15:54:48,688 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:48,696 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:48,845 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:48,845 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:48,945 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:48,953 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:48,953 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:48,953 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:49,014 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:49,014 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:49,014 - buil

Day 20: ✅ 144 test rows, Run: 04-03


2025-05-30 15:54:50,286 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:50,288 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:50,384 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:50,384 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:50,457 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:50,457 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:50,457 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:50,465 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:50,500 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:50,503 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:50,503 - buil

Day 21: ✅ 144 test rows, Run: 04-04


2025-05-30 15:54:51,755 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:51,755 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:51,850 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:51,850 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:51,947 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:51,949 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:51,949 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:51,957 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:52,032 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:52,040 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:52,040 - buil

Day 22: ✅ 144 test rows, Run: 04-05


2025-05-30 15:54:53,124 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:53,124 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:53,258 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:53,258 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:53,364 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:53,373 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:53,373 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:53,383 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:53,433 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:53,442 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:53,442 - buil

Day 23: ✅ 144 test rows, Run: 04-06


2025-05-30 15:54:54,553 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:54,553 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:54,675 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:54,675 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:54,763 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:54,771 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:54,771 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:54,780 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:54,816 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:54,816 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:54,825 - buil

Day 24: ✅ 144 test rows, Run: 04-07


2025-05-30 15:54:55,825 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:55,834 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:56,017 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:56,017 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:56,111 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:56,116 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:56,119 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:56,127 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:56,185 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:56,188 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:56,198 - buil

Day 25: ✅ 144 test rows, Run: 04-08


2025-05-30 15:54:57,293 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:57,293 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:57,418 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:57,418 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:57,520 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:57,520 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:57,520 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:57,535 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:57,587 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:57,587 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:57,593 - buil

Day 26: ✅ 144 test rows, Run: 04-09


2025-05-30 15:54:58,718 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:54:58,718 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:54:58,851 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:54:58,852 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:54:58,920 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:54:58,928 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:54:58,928 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:54:58,937 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:54:58,971 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:54:58,979 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:54:58,979 - buil

Day 27: ✅ 144 test rows, Run: 04-10


2025-05-30 15:55:00,077 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:55:00,077 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:55:00,242 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:55:00,242 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:55:00,338 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:55:00,346 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:55:00,346 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:55:00,346 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:55:00,386 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:55:00,386 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:55:00,394 - buil

Day 28: ✅ 144 test rows, Run: 04-11


2025-05-30 15:55:01,325 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:55:01,458 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:55:01,458 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:55:01,599 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:55:01,607 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:55:01,731 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:55:01,740 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:55:01,740 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:55:01,749 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:55:01,807 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:55:01,815 -

Day 29: ✅ 144 test rows, Run: 04-12


2025-05-30 15:55:02,940 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:55:02,940 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:55:03,075 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:55:03,084 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:55:03,084 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:55:03,092 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:55:03,155 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:55:03,155 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:55:03,163 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:55:03,165 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - Prophet Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.14
         2 2025-03-16                144  0.15
         3 2025-03-17                144  0.10
         4 2025-03-18                144  0.08
         5 2025-03-19                144  0.04
         6 2025-03-20                144  0.04
         7 2025-03-21                144  0.05
         8 2025-03-22                144  0.05
         9 2025-03-23                144  0.05
        10 2025-03-24                144  0.06
        11 2025-03-25                144  0.12
        12 2025-03-26                144  0.13
        13 2025-03-27                144  0.10
        14 2025-03-28                144  0.06
        15 2025-03-29                144  0.06
        16 2025-03-30                144  0.08
        17 2025-03-31                144  0.24
        18 2025-04-01                144  0.07
        19 2025-

In [4]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

FEATURES = [
    'Load', 'shortwave_radiation', 'temperature_2m', 
    'direct_normal_irradiance', 'diffuse_radiation', 
    'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 
    'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 
    'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 
    'hour_sin', 'weekday_cos'
]
target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing Prophet Model - RMSE per forecast day")
print("=" * 60)

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df['target_datetime'] <= run_date_utc]
        test_data = df[df['target_datetime'] > run_date_utc]

        # Drop any missing data in training and test
        train_data = train_data.dropna(subset=['target_datetime', target] + FEATURES)
        test_data = test_data.dropna(subset=['target_datetime', target] + FEATURES)

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Prepare data for Prophet
        prophet_train = train_data.rename(columns={'target_datetime': 'ds', target: 'y'})[['ds', 'y'] + FEATURES]
        prophet_train['ds'] = prophet_train['ds'].dt.tz_localize(None)
        prophet_test = test_data.rename(columns={'target_datetime': 'ds', target: 'y'})[['ds', 'y'] + FEATURES]
        prophet_test['ds'] = prophet_test['ds'].dt.tz_localize(None)

        # Train Prophet model with extra regressors
        model = Prophet(daily_seasonality=True, yearly_seasonality=True, weekly_seasonality=True)
        for reg in FEATURES:
            model.add_regressor(reg)
        model.fit(prophet_train)

        # Forecast for the test period
        future = prophet_test[['ds'] + FEATURES]
        forecast = model.predict(future)
        y_pred = forecast['yhat'].values
        y_test = prophet_test['y'].values

        # Sla de eerste 24 uur over
        if len(y_pred) > 24:
            y_pred = y_pred[24:]
            y_test = y_test[24:]
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan
            rmse_results.append({
                'iteration': i + 1,
                'run_date': run_date.strftime('%Y-%m-%d'),
                'valid_predictions': 0,
                'rmse': rmse
            })
            continue

        if len(y_pred) > 0:
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            rmse = np.nan

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(y_pred),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(y_pred)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - Prophet Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-05-30 15:58:18,080 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-05-30 15:58:18,081 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-30 15:58:18,082 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)
2025-05-30 15:58:18,083 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-05-30 15:58:18,087 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-30 15:58:18,088 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-30 15:58:18,095 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

🔍 Testing Prophet Model - RMSE per forecast day


2025-05-30 15:58:18,271 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:58:18,275 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:58:18,275 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:58:18,275 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:58:18,371 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:18,371 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:18,446 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:18,446 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:18,513 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:18,513 - build_training_set - I

Day 1: ✅ 144 test rows, Run: 03-15


2025-05-30 15:58:20,154 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:20,276 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:20,284 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:20,403 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:20,405 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:20,405 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:20,413 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:20,476 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:20,476 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:20,484 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 2: ✅ 144 test rows, Run: 03-16


2025-05-30 15:58:21,923 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:21,923 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:22,048 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:22,057 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:22,155 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:22,157 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:22,157 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:22,157 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:22,222 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:22,230 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:22,230 - buil

Day 3: ✅ 144 test rows, Run: 03-17


2025-05-30 15:58:23,834 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:23,834 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:23,969 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:23,973 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:24,083 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:24,090 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:24,090 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:24,090 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:24,134 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:24,141 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:24,142 - buil

Day 4: ✅ 144 test rows, Run: 03-18


2025-05-30 15:58:25,842 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:25,844 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:25,950 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:25,950 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:26,025 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:26,027 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:26,027 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:26,027 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:26,100 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:26,100 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:26,100 - buil

Day 5: ✅ 144 test rows, Run: 03-19


2025-05-30 15:58:27,635 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:27,644 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:27,778 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:27,778 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:27,903 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:27,903 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:27,912 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:27,920 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:27,987 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:27,987 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:27,996 - buil

Day 6: ✅ 144 test rows, Run: 03-20


2025-05-30 15:58:29,546 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:29,546 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:29,663 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:29,671 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:29,738 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:29,738 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:29,738 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:29,745 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:29,804 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:29,804 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:29,813 - buil

Day 7: ✅ 144 test rows, Run: 03-21


2025-05-30 15:58:31,113 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:31,253 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:31,253 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:31,371 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:31,371 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:31,371 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:31,380 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:31,423 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:31,423 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:31,430 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 8: ✅ 144 test rows, Run: 03-22


2025-05-30 15:58:32,682 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:32,756 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:32,756 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:32,897 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:32,897 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:32,905 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:32,905 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:32,965 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:32,965 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:32,973 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 9: ✅ 144 test rows, Run: 03-23


2025-05-30 15:58:34,477 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:34,479 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:34,593 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:34,594 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:34,680 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:34,680 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:34,688 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:34,688 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:34,734 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:34,738 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:34,738 - buil

Day 10: ✅ 144 test rows, Run: 03-24


2025-05-30 15:58:36,168 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:36,177 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:36,251 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:36,260 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:36,401 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:36,406 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:36,407 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:36,417 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:36,471 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:36,471 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:36,471 - buil

Day 11: ✅ 144 test rows, Run: 03-25


2025-05-30 15:58:37,850 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:37,929 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:37,929 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:38,015 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:38,015 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:38,015 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:38,023 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:38,085 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:38,093 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:38,093 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 12: ✅ 144 test rows, Run: 03-26


2025-05-30 15:58:39,289 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:39,297 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:39,372 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:39,381 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:39,382 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:39,389 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:39,461 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:39,461 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:39,466 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:58:39,475 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 13: ✅ 144 test rows, Run: 03-27


2025-05-30 15:58:41,295 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:41,295 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:41,479 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:41,479 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:41,556 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:41,556 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:41,564 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:41,572 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:41,632 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:41,641 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:41,641 - buil

Day 14: ✅ 144 test rows, Run: 03-28


2025-05-30 15:58:43,014 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:43,098 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:43,098 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:43,179 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:43,179 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:43,179 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:43,189 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:43,234 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:43,242 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:43,242 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 15: ✅ 144 test rows, Run: 03-29


2025-05-30 15:58:44,620 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:44,620 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:44,750 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:44,750 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:44,817 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:44,825 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:44,825 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:44,825 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:44,858 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:44,866 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:44,866 - buil

Day 16: ✅ 144 test rows, Run: 03-30


2025-05-30 15:58:46,279 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:46,279 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:46,381 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:46,381 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:46,473 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:46,482 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:46,482 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:46,503 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:46,545 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:46,545 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:46,553 - buil

Day 17: ✅ 144 test rows, Run: 03-31


2025-05-30 15:58:48,126 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:48,126 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:48,269 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:48,269 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:48,370 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:48,378 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:48,378 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:48,381 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:48,437 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:48,437 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:48,445 - buil

Day 18: ✅ 144 test rows, Run: 04-01


2025-05-30 15:58:49,627 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:49,695 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:49,695 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:49,779 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:49,779 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:49,779 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:49,787 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:49,820 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:49,828 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:49,828 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 19: ✅ 144 test rows, Run: 04-02


2025-05-30 15:58:51,472 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:58:51,472 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:58:51,472 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:58:51,472 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:58:51,579 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:58:51,579 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:51,679 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:51,681 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:51,738 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:51,738 - build_training_set - I

Day 20: ✅ 144 test rows, Run: 04-03


2025-05-30 15:58:53,431 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:53,516 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:53,516 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:53,606 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:53,606 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:53,606 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:53,614 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:53,651 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:53,651 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:53,651 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 21: ✅ 144 test rows, Run: 04-04


2025-05-30 15:58:54,949 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:55,061 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:55,061 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:55,199 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:55,205 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:55,205 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:55,216 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:55,282 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:55,282 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:55,290 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 22: ✅ 144 test rows, Run: 04-05


2025-05-30 15:58:56,806 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:56,806 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:56,908 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:56,908 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:56,908 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:56,920 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:56,969 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:56,969 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:56,978 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:58:56,978 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 23: ✅ 144 test rows, Run: 04-06


2025-05-30 15:58:58,268 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:58,268 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:58,341 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:58,349 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:58,351 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:58,351 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:58,392 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:58,392 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:58,400 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:58:58,402 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 24: ✅ 144 test rows, Run: 04-07


2025-05-30 15:58:59,493 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:58:59,553 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:58:59,553 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:58:59,636 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:58:59,636 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:58:59,636 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:58:59,645 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:58:59,704 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:58:59,704 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:58:59,712 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 25: ✅ 144 test rows, Run: 04-08


2025-05-30 15:59:00,957 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:59:00,957 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:59:01,072 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:59:01,072 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:59:01,081 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:59:01,081 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:59:01,121 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:59:01,129 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:59:01,129 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:59:01,129 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 26: ✅ 144 test rows, Run: 04-09


2025-05-30 15:59:02,334 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:59:02,334 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:59:02,414 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:59:02,414 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:59:02,414 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:59:02,423 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:59:02,472 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:59:02,472 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:59:02,480 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:59:02,480 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 27: ✅ 144 test rows, Run: 04-10


2025-05-30 15:59:03,745 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:59:03,747 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:59:03,860 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:59:03,860 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:59:03,926 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:59:03,934 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:59:03,934 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:59:03,942 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:59:03,990 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:59:03,990 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:59:03,998 - buil

Day 28: ✅ 144 test rows, Run: 04-11


2025-05-30 15:59:05,148 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:59:05,158 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:59:05,291 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:59:05,293 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:59:05,384 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:59:05,384 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:59:05,393 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:59:05,394 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:59:05,466 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:59:05,466 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:59:05,466 - buil

Day 29: ✅ 144 test rows, Run: 04-12


2025-05-30 15:59:06,695 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:59:06,695 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:59:06,781 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:59:06,783 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:59:06,783 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:59:06,791 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:59:06,860 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:59:06,860 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:59:06,860 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-05-30 15:59:06,869 - build_training_set - INFO - ✅ No overlap between actuals and predictions
202

Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - Prophet Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.06
         2 2025-03-16                144  0.05
         3 2025-03-17                144  0.03
         4 2025-03-18                144  0.03
         5 2025-03-19                144  0.03
         6 2025-03-20                144  0.03
         7 2025-03-21                144  0.05
         8 2025-03-22                144  0.02
         9 2025-03-23                144  0.04
        10 2025-03-24                144  0.05
        11 2025-03-25                144  0.07
        12 2025-03-26                144  0.05
        13 2025-03-27                144  0.04
        14 2025-03-28                144  0.06
        15 2025-03-29                144  0.09
        16 2025-03-30                144  0.16
        17 2025-03-31                144  0.20
        18 2025-04-01                144  0.12
        19 2025-