In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define feature columns and target
FEATURES = ['weekday_cos', 'weekday_sin', 'hour_cos', 'hour_sin', 'yearday_cos', 'yearday_sin']
TRAIN_FEATURES = FEATURES
target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing XGBoost Model - RMSE per forecast day")
print("=" * 60)

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df.index <= run_date_utc]
        test_data = df[df.index > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=TRAIN_FEATURES + [target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Train model
        X_train = train_data[TRAIN_FEATURES]
        y_train = train_data[target]
        model = xgb.XGBRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)

        # Zorg dat alle features aanwezig zijn in test_data
        for col in TRAIN_FEATURES:
            if col not in test_data.columns:
                test_data[col] = np.nan

        X_test = test_data[TRAIN_FEATURES]
        y_test = test_data[target]

        # Sla de eerste 24 uur over
        X_test = X_test.iloc[24:]
        y_test = y_test.iloc[24:]

        # Alleen voorspellen als er nog testdata over is
        if len(X_test) > 0:
            y_pred = model.predict(X_test)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(X_test),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(X_test)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - XGBoost Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-05-30 15:46:19,422 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-05-30 15:46:19,422 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-30 15:46:19,422 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)
2025-05-30 15:46:19,431 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-05-30 15:46:19,431 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-30 15:46:19,431 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-30 15:46:19,440 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

🔍 Testing XGBoost Model - RMSE per forecast day


2025-05-30 15:46:19,599 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 15:46:19,599 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:46:19,599 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:46:19,607 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:46:19,608 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:46:19,698 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:19,698 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:19,780 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:19,780 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:19,849 - build_training_s

Day 1: ✅ 144 test rows, Run: 03-15


2025-05-30 15:46:20,436 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:20,507 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:20,507 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:20,584 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:20,592 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:20,592 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:20,598 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:20,636 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:20,636 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:20,645 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 2: ✅ 144 test rows, Run: 03-16


2025-05-30 15:46:21,068 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 15:46:21,071 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:46:21,072 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:46:21,073 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:46:21,074 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:46:21,168 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:21,170 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:21,240 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:21,242 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:21,314 - build_training_s

Day 3: ✅ 144 test rows, Run: 03-17


2025-05-30 15:46:21,733 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:21,813 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:21,815 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:21,892 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:21,892 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:21,892 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:21,900 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:21,934 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:21,934 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:21,942 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 4: ✅ 144 test rows, Run: 03-18


2025-05-30 15:46:22,383 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:22,383 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:22,523 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:22,532 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:22,607 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:22,607 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:22,616 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:22,624 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:22,674 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:22,674 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:22,674 - buil

Day 5: ✅ 144 test rows, Run: 03-19


2025-05-30 15:46:23,073 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:23,133 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:23,141 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:23,192 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:23,201 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:23,201 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:23,201 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:23,232 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:23,241 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:23,241 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 6: ✅ 144 test rows, Run: 03-20


2025-05-30 15:46:23,650 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:23,650 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:23,718 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:23,718 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:23,817 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:23,817 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:23,825 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:23,833 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:23,909 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:23,909 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:23,916 - buil

Day 7: ✅ 144 test rows, Run: 03-21


2025-05-30 15:46:24,293 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:24,367 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:24,367 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:24,434 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:24,435 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:24,435 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:24,443 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:24,476 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:24,476 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:24,476 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 8: ✅ 144 test rows, Run: 03-22


2025-05-30 15:46:24,868 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:24,935 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:24,935 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:25,001 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:25,001 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:25,001 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:25,009 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:25,044 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:25,044 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:25,044 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 9: ✅ 144 test rows, Run: 03-23


2025-05-30 15:46:25,434 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:25,542 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:25,544 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:25,606 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:25,606 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:25,606 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:25,617 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:25,667 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:25,668 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:25,668 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 10: ✅ 144 test rows, Run: 03-24


2025-05-30 15:46:26,072 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:26,160 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:26,182 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:26,284 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:26,285 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:26,285 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:26,285 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:26,341 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:26,348 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:26,351 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 11: ✅ 144 test rows, Run: 03-25


2025-05-30 15:46:26,703 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:26,786 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:26,786 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:26,869 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:26,869 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:26,869 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:26,877 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:26,921 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:26,921 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:26,921 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 12: ✅ 144 test rows, Run: 03-26


2025-05-30 15:46:27,337 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:27,337 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:27,419 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:27,419 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:27,503 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:27,503 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:27,503 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:27,511 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:27,578 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:27,578 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:27,588 - buil

Day 13: ✅ 144 test rows, Run: 03-27


2025-05-30 15:46:27,953 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:28,037 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:28,037 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:28,153 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:28,162 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:28,162 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:28,170 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:28,209 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:28,211 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:28,211 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 14: ✅ 144 test rows, Run: 03-28


2025-05-30 15:46:28,625 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:28,625 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:28,745 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:28,745 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:28,846 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:28,848 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:28,848 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:28,856 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:28,906 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:28,906 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:28,906 - buil

Day 15: ✅ 144 test rows, Run: 03-29


2025-05-30 15:46:29,326 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:29,331 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:29,444 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:29,445 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:29,521 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:29,529 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:29,529 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:29,538 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:29,598 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:29,598 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:29,605 - buil

Day 16: ✅ 144 test rows, Run: 03-30


2025-05-30 15:46:29,972 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:30,097 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:30,097 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:30,173 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:30,173 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:30,173 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:30,173 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:30,213 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:30,213 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:30,222 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 17: ✅ 144 test rows, Run: 03-31


2025-05-30 15:46:30,648 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:30,716 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:30,718 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:30,781 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:30,781 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:30,781 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:30,790 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:30,823 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:30,831 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:30,831 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 18: ✅ 144 test rows, Run: 04-01


2025-05-30 15:46:31,306 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:31,373 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:31,373 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:31,449 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:31,457 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:31,457 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:31,465 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:31,499 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:31,499 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:31,499 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 19: ✅ 144 test rows, Run: 04-02


2025-05-30 15:46:31,874 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:31,974 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:31,974 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:32,041 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:32,041 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:32,041 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:32,049 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:32,082 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:32,082 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:32,091 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 20: ✅ 144 test rows, Run: 04-03


2025-05-30 15:46:32,508 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:32,508 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:32,575 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:32,575 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:32,674 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:32,682 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:32,682 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:32,691 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:32,741 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:32,741 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:32,741 - buil

Day 21: ✅ 144 test rows, Run: 04-04


2025-05-30 15:46:33,251 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:33,260 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:33,343 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:33,347 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:33,441 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:33,441 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:33,441 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:33,449 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:33,493 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:33,498 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:33,498 - buil

Day 22: ✅ 144 test rows, Run: 04-05


2025-05-30 15:46:33,893 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:33,893 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:33,977 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:33,985 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:34,060 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:34,060 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:34,060 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:34,076 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:34,117 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:34,127 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:34,127 - buil

Day 23: ✅ 144 test rows, Run: 04-06


2025-05-30 15:46:34,546 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:34,692 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:34,692 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:34,822 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:34,831 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:34,831 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:34,839 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:34,893 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:34,893 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:34,902 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 24: ✅ 144 test rows, Run: 04-07


2025-05-30 15:46:35,355 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:35,476 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:35,485 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:35,625 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:35,626 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:35,626 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:35,635 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:35,694 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:35,702 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:35,710 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 25: ✅ 144 test rows, Run: 04-08


2025-05-30 15:46:36,127 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:36,261 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:36,261 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:36,372 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:36,377 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:36,379 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:36,386 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:36,432 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:36,433 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:36,435 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 26: ✅ 144 test rows, Run: 04-09


2025-05-30 15:46:36,860 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:36,978 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:36,978 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:37,078 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:37,082 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:37,082 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:37,091 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:37,121 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:37,122 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:37,125 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 27: ✅ 144 test rows, Run: 04-10


2025-05-30 15:46:37,596 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:37,596 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:37,695 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:37,697 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:37,770 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:37,770 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:37,770 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:37,783 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:37,828 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:37,828 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:37,836 - buil

Day 28: ✅ 144 test rows, Run: 04-11


2025-05-30 15:46:38,273 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:46:38,279 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:38,421 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:38,421 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:38,540 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:38,546 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:38,546 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:38,554 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:38,622 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:38,622 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:38,630 - buil

Day 29: ✅ 144 test rows, Run: 04-12


2025-05-30 15:46:39,028 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:46:39,171 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:46:39,171 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:46:39,305 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:46:39,314 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:46:39,314 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:46:39,330 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:46:39,396 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:46:39,396 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:46:39,405 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - XGBoost Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.05
         2 2025-03-16                144  0.05
         3 2025-03-17                144  0.03
         4 2025-03-18                144  0.04
         5 2025-03-19                144  0.04
         6 2025-03-20                144  0.04
         7 2025-03-21                144  0.04
         8 2025-03-22                144  0.06
         9 2025-03-23                144  0.05
        10 2025-03-24                144  0.04
        11 2025-03-25                144  0.05
        12 2025-03-26                144  0.05
        13 2025-03-27                144  0.06
        14 2025-03-28                144  0.05
        15 2025-03-29                144  0.06
        16 2025-03-30                144  0.05
        17 2025-03-31                144  0.06
        18 2025-04-01                144  0.04
        19 2025-

In [2]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path

# Zorg dat build_training_set geïmporteerd is
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define feature columns and target
FEATURES = ['Load', 'shortwave_radiation', 'temperature_2m', 
            'direct_normal_irradiance', 'diffuse_radiation', 
            'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 
            'yearday_sin', 'wind_speed_10m', 'is_non_working_day', 
            'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 
            'hour_sin', 'weekday_cos']
TRAIN_FEATURES = FEATURES
target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing XGBoost Model - RMSE per forecast day")
print("=" * 60)

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')

        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df.index <= run_date_utc]
        test_data = df[df.index > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=TRAIN_FEATURES + [target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Train model
        X_train = train_data[TRAIN_FEATURES]
        y_train = train_data[target]
        model = xgb.XGBRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)

        # Zorg dat alle features aanwezig zijn in test_data
        for col in TRAIN_FEATURES:
            if col not in test_data.columns:
                test_data[col] = np.nan

        X_test = test_data[TRAIN_FEATURES]
        y_test = test_data[target]

        # Sla de eerste 24 uur over
        X_test = X_test.iloc[24:]
        y_test = y_test.iloc[24:]

        # Alleen voorspellen als er nog testdata over is
        if len(X_test) > 0:
            y_pred = model.predict(X_test)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        else:
            print("Niet genoeg testdata na lag van 24 uur.")
            rmse = np.nan

        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(X_test),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(X_test)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - XGBoost Model")
    print("=" * 80)
    print(f"Successful runs: {rmse_df['rmse'].notna().sum()}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.4f}")
    print(f"Stddev RMSE: {rmse_df['rmse'].std():.4f}")

else:
    print("❌ No runs completed successfully")

2025-05-30 15:48:27,620 - build_training_set - INFO - 📅 Loading additional historical data until 2025-03-15 00:00:00+00:00 for lagging support
2025-05-30 15:48:27,629 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-30 15:48:27,631 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00 (extended to 2025-03-15 00:00:00+00:00 for lagging)
2025-05-30 15:48:27,633 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, normalized to 2025-03-15 00:00:00+00:00 for DB lookup, target range: 2025-03-15 00:00:00+00:00 → 2025-03-22 00:00:00+00:00
2025-05-30 15:48:27,634 - build_training_set - INFO - 📥 Loading actuals with selected columns only...
2025-05-30 15:48:27,645 - build_training_set - INFO - 📋 Requested columns found: 21/21
2025-05-30 15:48:27,648 - build_training_set - INFO - 📋 Using columns: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'di

🔍 Testing XGBoost Model - RMSE per forecast day


2025-05-30 15:48:27,812 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:48:27,812 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:48:27,820 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:48:27,820 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:48:27,913 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:27,913 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:27,997 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:28,005 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:28,113 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:28,121 - build_training_set - I

Day 1: ✅ 144 test rows, Run: 03-15


2025-05-30 15:48:29,003 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:29,003 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:29,100 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:29,100 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:29,297 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:29,299 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:29,301 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:29,306 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:29,354 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:29,354 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:29,354 - buil

Day 2: ✅ 144 test rows, Run: 03-16


2025-05-30 15:48:29,822 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:29,947 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:29,947 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:30,099 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:30,107 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:30,107 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:30,115 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:30,189 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:30,197 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:30,201 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 3: ✅ 144 test rows, Run: 03-17


2025-05-30 15:48:30,737 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:30,737 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:30,865 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:30,868 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:30,982 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:30,982 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:30,982 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:30,990 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:31,064 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:31,073 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:31,073 - buil

Day 4: ✅ 144 test rows, Run: 03-18


2025-05-30 15:48:31,615 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:31,689 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:31,689 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:31,797 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:31,806 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:31,806 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:31,815 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:31,882 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:31,882 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:31,882 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 5: ✅ 144 test rows, Run: 03-19


2025-05-30 15:48:32,432 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:32,432 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:32,540 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:32,550 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:32,656 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:32,656 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:32,663 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:32,668 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:32,732 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:32,735 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:32,735 - buil

Day 6: ✅ 144 test rows, Run: 03-20


2025-05-30 15:48:33,290 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:33,298 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:33,433 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:33,441 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:33,508 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:33,516 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:33,516 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:33,524 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:33,584 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:33,592 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:33,592 - buil

Day 7: ✅ 144 test rows, Run: 03-21


2025-05-30 15:48:34,199 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:34,208 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:34,326 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:34,326 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:34,383 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:34,383 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:34,392 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:34,393 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:34,460 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:34,468 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:34,468 - buil

Day 8: ✅ 144 test rows, Run: 03-22


2025-05-30 15:48:35,001 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:35,100 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:35,102 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:35,239 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:35,239 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:35,239 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:35,251 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:35,309 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:35,309 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:35,318 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 9: ✅ 144 test rows, Run: 03-23


2025-05-30 15:48:35,895 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:35,895 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:36,027 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:36,027 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:36,144 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:36,154 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:36,154 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:36,169 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:36,236 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:36,236 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:36,245 - buil

Day 10: ✅ 144 test rows, Run: 03-24


2025-05-30 15:48:36,764 - build_training_set - INFO - ✅ Predictions loaded: 169 rows with 17 columns
2025-05-30 15:48:36,768 - build_training_set - INFO - 🔧 Missing columns in predictions: ['Price', 'Load', 'Flow_NO', 'Flow_GB']
2025-05-30 15:48:36,769 - build_training_set - INFO - 📊 Applying 168-hour lag for missing columns (excluding target variables)...
2025-05-30 15:48:36,769 - build_training_set - INFO -    🎯 Column 'Price' is target variable - filled with NaN (not lagged)
2025-05-30 15:48:36,769 - build_training_set - INFO -    🕐 Lagging column 'Load' by 168 hours
2025-05-30 15:48:36,875 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:36,879 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:36,944 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:36,944 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:37,035 - build_training_s

Day 11: ✅ 144 test rows, Run: 03-25


2025-05-30 15:48:37,686 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:37,820 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:37,820 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:37,971 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:37,971 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:37,971 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:37,986 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:38,036 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:38,044 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:38,044 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 12: ✅ 144 test rows, Run: 03-26


2025-05-30 15:48:38,553 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:38,628 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:38,628 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:38,753 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:38,753 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:38,753 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:38,761 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:38,828 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:38,839 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:38,839 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 13: ✅ 144 test rows, Run: 03-27


2025-05-30 15:48:39,370 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:39,370 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:39,489 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:39,489 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:39,586 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:39,587 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:39,587 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:39,596 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:39,681 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:39,687 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:39,688 - buil

Day 14: ✅ 144 test rows, Run: 03-28


2025-05-30 15:48:40,224 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:40,338 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:40,338 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:40,449 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:40,455 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:40,457 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:40,465 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:40,491 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:40,499 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:40,499 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 15: ✅ 144 test rows, Run: 03-29


2025-05-30 15:48:41,023 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:41,176 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:41,184 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:41,276 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:41,276 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:41,276 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:41,284 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:41,323 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:41,331 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:41,331 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 16: ✅ 144 test rows, Run: 03-30


2025-05-30 15:48:41,850 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:41,850 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:41,957 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:41,957 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:42,074 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:42,082 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:42,083 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:42,091 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:42,157 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:42,157 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:42,166 - buil

Day 17: ✅ 144 test rows, Run: 03-31


2025-05-30 15:48:42,707 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:42,791 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:42,791 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:42,915 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:42,915 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:42,915 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:42,924 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:42,984 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:42,984 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:42,991 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 18: ✅ 144 test rows, Run: 04-01


2025-05-30 15:48:43,541 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:43,682 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:43,684 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:43,816 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:43,816 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:43,825 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:43,837 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:43,909 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:43,909 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:43,917 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 19: ✅ 144 test rows, Run: 04-02


2025-05-30 15:48:44,451 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:44,451 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:44,576 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:44,576 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:44,709 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:44,718 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:44,718 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:44,725 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:44,809 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:44,809 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:44,809 - buil

Day 20: ✅ 144 test rows, Run: 04-03


2025-05-30 15:48:45,360 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:45,360 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:45,481 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:45,488 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:45,644 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:45,666 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:45,670 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:45,677 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:45,716 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:45,716 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:45,716 - buil

Day 21: ✅ 144 test rows, Run: 04-04


2025-05-30 15:48:46,295 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:46,295 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:46,435 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:46,435 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:46,591 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:46,594 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:46,594 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:46,603 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:46,680 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:46,680 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:46,689 - buil

Day 22: ✅ 144 test rows, Run: 04-05


2025-05-30 15:48:47,260 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:47,262 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:47,421 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:47,428 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:47,571 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:47,581 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:47,581 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:47,581 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:47,627 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:47,627 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:47,635 - buil

Day 23: ✅ 144 test rows, Run: 04-06


2025-05-30 15:48:48,196 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:48,316 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:48,332 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:48,453 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:48,453 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:48,461 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:48,463 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:48,547 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:48,549 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:48,549 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 24: ✅ 144 test rows, Run: 04-07


2025-05-30 15:48:49,062 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:49,146 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:49,146 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:49,260 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:49,264 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:49,264 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:49,272 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:49,339 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:49,339 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:49,347 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 25: ✅ 144 test rows, Run: 04-08


2025-05-30 15:48:49,963 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:49,965 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:50,032 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:50,032 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:50,148 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:50,148 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:50,148 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:50,156 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:50,214 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:50,214 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:50,222 - buil

Day 26: ✅ 144 test rows, Run: 04-09


2025-05-30 15:48:50,788 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:50,882 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:50,882 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:50,997 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:51,006 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:51,006 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:51,014 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:51,089 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:51,099 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:51,102 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 27: ✅ 144 test rows, Run: 04-10


2025-05-30 15:48:51,632 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:51,640 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:51,773 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:51,773 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:51,898 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:51,898 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:51,907 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:51,916 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:51,974 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:51,984 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:51,984 - buil

Day 28: ✅ 144 test rows, Run: 04-11


2025-05-30 15:48:52,550 - build_training_set - INFO -    ✅ Added Load: 169/169 values found
2025-05-30 15:48:52,550 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:52,682 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:52,682 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:52,808 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:52,817 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:52,817 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:52,826 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:52,897 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:52,897 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:52,905 - buil

Day 29: ✅ 144 test rows, Run: 04-12


2025-05-30 15:48:53,453 - build_training_set - INFO -    🕐 Lagging column 'Flow_NO' by 168 hours
2025-05-30 15:48:53,575 - build_training_set - INFO -    ✅ Added Flow_NO: 169/169 values found
2025-05-30 15:48:53,575 - build_training_set - INFO -    🕐 Lagging column 'Flow_GB' by 168 hours
2025-05-30 15:48:53,710 - build_training_set - INFO -    ✅ Added Flow_GB: 169/169 values found
2025-05-30 15:48:53,718 - build_training_set - INFO - 🔄 Combining actuals and predictions...
2025-05-30 15:48:53,718 - build_training_set - INFO - 💰 Retrieving actual prices for forecast period...
2025-05-30 15:48:53,726 - build_training_set - INFO - 📊 Found 169 actual prices for forecast period
2025-05-30 15:48:53,784 - build_training_set - INFO - ✅ Filled 169/169 prediction prices with actual values
2025-05-30 15:48:53,784 - build_training_set - INFO - 💰 Price coverage: 169/169 (100.0%)
2025-05-30 15:48:53,784 - build_training_set - INFO - ✅ Combined dataset: 1921 rows (1752 actuals + 169 predictions)
2025-

Day 30: ✅ 144 test rows, Run: 04-13

📊 OVERALL RMSE - XGBoost Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                144  0.04
         2 2025-03-16                144  0.04
         3 2025-03-17                144  0.04
         4 2025-03-18                144  0.04
         5 2025-03-19                144  0.04
         6 2025-03-20                144  0.04
         7 2025-03-21                144  0.04
         8 2025-03-22                144  0.03
         9 2025-03-23                144  0.03
        10 2025-03-24                144  0.04
        11 2025-03-25                144  0.04
        12 2025-03-26                144  0.04
        13 2025-03-27                144  0.05
        14 2025-03-28                144  0.05
        15 2025-03-29                144  0.05
        16 2025-03-30                144  0.04
        17 2025-03-31                144  0.04
        18 2025-04-01                144  0.04
        19 2025-