In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
color_pal = sns.color_palette()
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [15]:
import sqlite3

conn = sqlite3.connect('../data/WARP.db')
df = pd.read_sql_query("SELECT * FROM master_warp", conn)
conn.close()
print(df.columns)

Index(['Price', 'target_datetime', 'Load', 'shortwave_radiation',
       'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation',
       'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin',
       'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover',
       'weekday_sin', 'hour_sin', 'weekday_cos'],
      dtype='object')


In [19]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from datetime import timedelta

# Feature and target setup
features = [
    'Flow_NO','is_dst', 'hour_cos', 'hour_sin','month',
    'Load','is_non_working_day','shortwave_radiation', 'temperature_2m' 
] # excluding  'direct_normal_irradiance', 'diffuse_radiation','yearday_cos', 'yearday_sin', 'cloud_cover', 'Flow_GB',  'is_weekend',
target = 'Price'


# Safe datetime handling
if 'target_datetime' not in df.columns:
    print("'target_datetime' column not found in columns. Sorting by index instead.")
    df = df.sort_index()
else:
    df['target_datetime'] = pd.to_datetime(df['target_datetime'])
    df = df.set_index('target_datetime')


# Forecast settings
start_date = pd.Timestamp("2025-03-13 12:00", tz='UTC')
end_date = pd.Timestamp("2025-05-14 12:00", tz='UTC')
lag = timedelta(hours=36)
forecast_horizon = timedelta(hours=144)

# Store RMSEs
rmses = []

current_time = start_date
while current_time <= end_date:
    train_data = df[df.index < current_time]
    test_start = current_time + lag # check current time  
    test_end = test_start + forecast_horizon
    test_data = df[(df.index >= test_start) & (df.index < test_end)]

    if test_data.empty:
        print(f"No test data for forecast starting at {current_time}")
        current_time += timedelta(days=1)
        continue

    X_train = train_data[features]
    y_train = train_data[target]
    X_test = test_data[features]
    y_test = test_data[target]

    # Train and predict
    model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Evaluate
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    rmses.append(rmse)

    print(f"Forecast origin: {current_time}, Predicting {test_start} to {test_end}, RMSE: {rmse:.3f}")

    current_time += timedelta(days=1)

# Summary
avg_rmse = np.mean(rmses)
print(f"\nAverage RMSE over {len(rmses)} runs: {avg_rmse:.5f}")

# Calculate average RMSE over the first 30 runs
if len(rmses) >= 30:
    avg_rmse_30 = np.mean(rmses[:30])
    print(f"Average RMSE over first 30 runs: {avg_rmse_30:.5f}")
else:
    print("Less than 30 runs available to calculate average RMSE.")

'target_datetime' column not found in columns. Sorting by index instead.
Forecast origin: 2025-03-13 12:00:00+00:00, Predicting 2025-03-15 00:00:00+00:00 to 2025-03-21 00:00:00+00:00, RMSE: 0.021
Forecast origin: 2025-03-14 12:00:00+00:00, Predicting 2025-03-16 00:00:00+00:00 to 2025-03-22 00:00:00+00:00, RMSE: 0.026
Forecast origin: 2025-03-15 12:00:00+00:00, Predicting 2025-03-17 00:00:00+00:00 to 2025-03-23 00:00:00+00:00, RMSE: 0.031
Forecast origin: 2025-03-16 12:00:00+00:00, Predicting 2025-03-18 00:00:00+00:00 to 2025-03-24 00:00:00+00:00, RMSE: 0.031
Forecast origin: 2025-03-17 12:00:00+00:00, Predicting 2025-03-19 00:00:00+00:00 to 2025-03-25 00:00:00+00:00, RMSE: 0.032
Forecast origin: 2025-03-18 12:00:00+00:00, Predicting 2025-03-20 00:00:00+00:00 to 2025-03-26 00:00:00+00:00, RMSE: 0.031
Forecast origin: 2025-03-19 12:00:00+00:00, Predicting 2025-03-21 00:00:00+00:00 to 2025-03-27 00:00:00+00:00, RMSE: 0.030
Forecast origin: 2025-03-20 12:00:00+00:00, Predicting 2025-03-22 

In [17]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
# Dynamic path setup
import sys
from pathlib import Path

# Find project root dynamically
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir

# Add utils to path
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define feature columns and target
COMMON_FEATURES = ['is_dst', 'hour_cos', 'hour_sin','month','is_non_working_day','shortwave_radiation', 'temperature_2m'
    ]
# list of all available features 'cloud_cover', 'direct_normal_irradiance', 'diffuse_radiation','hour_cos', 'hour_sin', 'is_dst', 'is_non_working_day','is_weekend', 'month', 'shortwave_radiation','temperature_2m', 'yearday_cos', 'yearday_sin'
TRAIN_ONLY_FEATURES = ['Load','Flow_NO']  # <- Only used if known ex post

TRAIN_FEATURES = COMMON_FEATURES + TRAIN_ONLY_FEATURES
TEST_FEATURES = COMMON_FEATURES


target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing XGBoost Model - RMSE per forecast day")
print("=" * 60)

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')

        # Zorg dat run_date ook in UTC is
        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df.index <= run_date_utc]
        test_data = df[df.index > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=features + [target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Train model
        X_train = train_data[TRAIN_FEATURES]
        y_train = train_data[target]

        # Aanvullen met NaN-kolommen waar nodig
        for col in TRAIN_FEATURES:
            if col not in test_data.columns:
                test_data[col] = np.nan

        X_test = test_data[TRAIN_FEATURES]
        y_test = test_data[target]

        model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(test_data),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(test_data)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - XGBoost Model")
    print("=" * 80)
    print(f"Successful runs: {len(rmse_df)}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.2f}")
else:
    print("❌ No runs completed successfully")

2025-05-27 11:16:48,628 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:48,629 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00
2025-05-27 11:16:48,629 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, target range: 2025-03-15 00:00:00+00:00 → 2025-03-21 23:00:00+00:00
2025-05-27 11:16:48,680 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


🔍 Testing XGBoost Model - RMSE per forecast day


2025-05-27 11:16:48,841 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:48,856 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:48,856 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:48,857 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:48,869 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 1: ✅ 167 test rows, Run: 03-15


2025-05-27 11:16:49,226 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:49,235 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:49,235 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:49,235 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:49,249 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 2: ✅ 167 test rows, Run: 03-16


2025-05-27 11:16:49,684 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:49,684 - build_training_set - INFO - 🧠 Actuals van 2025-01-04 00:00:00+00:00 t/m 2025-03-17 23:00:00+00:00
2025-05-27 11:16:49,684 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-18 00:00:00+00:00, target range: 2025-03-18 00:00:00+00:00 → 2025-03-24 23:00:00+00:00
2025-05-27 11:16:49,701 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:49,851 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:49,861 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:49,861 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 3: ✅ 167 test rows, Run: 03-17


2025-05-27 11:16:49,997 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:49,997 - build_training_set - INFO - 🧠 Actuals van 2025-01-05 00:00:00+00:00 t/m 2025-03-18 23:00:00+00:00
2025-05-27 11:16:49,997 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-19 00:00:00+00:00, target range: 2025-03-19 00:00:00+00:00 → 2025-03-25 23:00:00+00:00
2025-05-27 11:16:50,016 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:50,161 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:50,170 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:50,170 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 4: ✅ 167 test rows, Run: 03-18


2025-05-27 11:16:50,345 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:50,345 - build_training_set - INFO - 🧠 Actuals van 2025-01-06 00:00:00+00:00 t/m 2025-03-19 23:00:00+00:00
2025-05-27 11:16:50,346 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-20 00:00:00+00:00, target range: 2025-03-20 00:00:00+00:00 → 2025-03-26 23:00:00+00:00
2025-05-27 11:16:50,364 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:50,509 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:50,518 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:50,518 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 5: ✅ 167 test rows, Run: 03-19


2025-05-27 11:16:50,643 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:50,644 - build_training_set - INFO - 🧠 Actuals van 2025-01-07 00:00:00+00:00 t/m 2025-03-20 23:00:00+00:00
2025-05-27 11:16:50,644 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-21 00:00:00+00:00, target range: 2025-03-21 00:00:00+00:00 → 2025-03-27 23:00:00+00:00
2025-05-27 11:16:50,661 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:50,810 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:50,820 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:50,820 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 6: ✅ 167 test rows, Run: 03-20


2025-05-27 11:16:50,951 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:50,951 - build_training_set - INFO - 🧠 Actuals van 2025-01-08 00:00:00+00:00 t/m 2025-03-21 23:00:00+00:00
2025-05-27 11:16:50,951 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-22 00:00:00+00:00, target range: 2025-03-22 00:00:00+00:00 → 2025-03-28 23:00:00+00:00
2025-05-27 11:16:50,969 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:51,112 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:51,121 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:51,121 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 7: ✅ 167 test rows, Run: 03-21


2025-05-27 11:16:51,254 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:51,254 - build_training_set - INFO - 🧠 Actuals van 2025-01-09 00:00:00+00:00 t/m 2025-03-22 23:00:00+00:00
2025-05-27 11:16:51,254 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-23 00:00:00+00:00, target range: 2025-03-23 00:00:00+00:00 → 2025-03-29 23:00:00+00:00
2025-05-27 11:16:51,285 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 8: ✅ 167 test rows, Run: 03-22


2025-05-27 11:16:51,466 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:51,477 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:51,478 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:51,478 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:51,491 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 9: ✅ 167 test rows, Run: 03-23


2025-05-27 11:16:51,900 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:51,900 - build_training_set - INFO - 🧠 Actuals van 2025-01-11 00:00:00+00:00 t/m 2025-03-24 23:00:00+00:00
2025-05-27 11:16:51,900 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-25 00:00:00+00:00, target range: 2025-03-25 00:00:00+00:00 → 2025-03-31 23:00:00+00:00
2025-05-27 11:16:51,917 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,061 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,070 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,071 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 10: ✅ 167 test rows, Run: 03-24


2025-05-27 11:16:52,197 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:52,197 - build_training_set - INFO - 🧠 Actuals van 2025-01-12 00:00:00+00:00 t/m 2025-03-25 23:00:00+00:00
2025-05-27 11:16:52,197 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-26 00:00:00+00:00, target range: 2025-03-26 00:00:00+00:00 → 2025-04-01 23:00:00+00:00
2025-05-27 11:16:52,215 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,357 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,366 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,366 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 11: ✅ 167 test rows, Run: 03-25


2025-05-27 11:16:52,538 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:52,539 - build_training_set - INFO - 🧠 Actuals van 2025-01-13 00:00:00+00:00 t/m 2025-03-26 23:00:00+00:00
2025-05-27 11:16:52,539 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-27 00:00:00+00:00, target range: 2025-03-27 00:00:00+00:00 → 2025-04-02 23:00:00+00:00
2025-05-27 11:16:52,555 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,696 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,704 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,704 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 12: ✅ 167 test rows, Run: 03-26


2025-05-27 11:16:52,829 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:52,830 - build_training_set - INFO - 🧠 Actuals van 2025-01-14 00:00:00+00:00 t/m 2025-03-27 23:00:00+00:00
2025-05-27 11:16:52,830 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-28 00:00:00+00:00, target range: 2025-03-28 00:00:00+00:00 → 2025-04-03 23:00:00+00:00
2025-05-27 11:16:52,847 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,987 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,996 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,996 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 13: ✅ 167 test rows, Run: 03-27


2025-05-27 11:16:53,127 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:53,127 - build_training_set - INFO - 🧠 Actuals van 2025-01-15 00:00:00+00:00 t/m 2025-03-28 23:00:00+00:00
2025-05-27 11:16:53,127 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-29 00:00:00+00:00, target range: 2025-03-29 00:00:00+00:00 → 2025-04-04 23:00:00+00:00
2025-05-27 11:16:53,145 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:53,288 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:53,298 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:53,298 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 14: ✅ 167 test rows, Run: 03-28


2025-05-27 11:16:53,425 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:53,426 - build_training_set - INFO - 🧠 Actuals van 2025-01-16 00:00:00+00:00 t/m 2025-03-29 23:00:00+00:00
2025-05-27 11:16:53,426 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-30 00:00:00+00:00, target range: 2025-03-30 00:00:00+00:00 → 2025-04-05 23:00:00+00:00
2025-05-27 11:16:53,444 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:53,620 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 15: ✅ 167 test rows, Run: 03-29


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:53,630 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:53,630 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:53,630 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:53,642 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:53,642 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:53,761 - bui

Day 16: ✅ 167 test rows, Run: 03-30


2025-05-27 11:16:54,051 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:54,051 - build_training_set - INFO - 🧠 Actuals van 2025-01-18 00:00:00+00:00 t/m 2025-03-31 23:00:00+00:00
2025-05-27 11:16:54,052 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-01 00:00:00+00:00, target range: 2025-04-01 00:00:00+00:00 → 2025-04-07 23:00:00+00:00
2025-05-27 11:16:54,068 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:54,212 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:54,221 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:54,222 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 17: ✅ 167 test rows, Run: 03-31


2025-05-27 11:16:54,362 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:54,363 - build_training_set - INFO - 🧠 Actuals van 2025-01-19 00:00:00+00:00 t/m 2025-04-01 23:00:00+00:00
2025-05-27 11:16:54,363 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-02 00:00:00+00:00, target range: 2025-04-02 00:00:00+00:00 → 2025-04-08 23:00:00+00:00
2025-05-27 11:16:54,382 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:54,523 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:54,531 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:54,531 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 18: ✅ 167 test rows, Run: 04-01


2025-05-27 11:16:54,713 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:54,713 - build_training_set - INFO - 🧠 Actuals van 2025-01-20 00:00:00+00:00 t/m 2025-04-02 23:00:00+00:00
2025-05-27 11:16:54,713 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-03 00:00:00+00:00, target range: 2025-04-03 00:00:00+00:00 → 2025-04-09 23:00:00+00:00
2025-05-27 11:16:54,731 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:54,908 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 19: ✅ 167 test rows, Run: 04-02


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:54,917 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:54,917 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:54,917 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:54,929 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:54,930 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:55,045 - bui

Day 20: ✅ 167 test rows, Run: 04-03


2025-05-27 11:16:55,342 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:55,342 - build_training_set - INFO - 🧠 Actuals van 2025-01-22 00:00:00+00:00 t/m 2025-04-04 23:00:00+00:00
2025-05-27 11:16:55,342 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-05 00:00:00+00:00, target range: 2025-04-05 00:00:00+00:00 → 2025-04-11 23:00:00+00:00
2025-05-27 11:16:55,359 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:55,500 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:55,509 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:55,509 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 21: ✅ 167 test rows, Run: 04-04


2025-05-27 11:16:55,650 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:55,650 - build_training_set - INFO - 🧠 Actuals van 2025-01-23 00:00:00+00:00 t/m 2025-04-05 23:00:00+00:00
2025-05-27 11:16:55,650 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-06 00:00:00+00:00, target range: 2025-04-06 00:00:00+00:00 → 2025-04-12 23:00:00+00:00
2025-05-27 11:16:55,669 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:55,814 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:55,822 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:55,822 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 22: ✅ 167 test rows, Run: 04-05


2025-05-27 11:16:55,952 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:55,953 - build_training_set - INFO - 🧠 Actuals van 2025-01-24 00:00:00+00:00 t/m 2025-04-06 23:00:00+00:00
2025-05-27 11:16:55,953 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-07 00:00:00+00:00, target range: 2025-04-07 00:00:00+00:00 → 2025-04-13 23:00:00+00:00
2025-05-27 11:16:55,973 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:56,146 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 23: ✅ 167 test rows, Run: 04-06


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:56,155 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:56,156 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:56,156 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:56,169 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:56,170 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:56,288 - bui

Day 24: ✅ 167 test rows, Run: 04-07


2025-05-27 11:16:56,586 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:56,587 - build_training_set - INFO - 🧠 Actuals van 2025-01-26 00:00:00+00:00 t/m 2025-04-08 23:00:00+00:00
2025-05-27 11:16:56,587 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-09 00:00:00+00:00, target range: 2025-04-09 00:00:00+00:00 → 2025-04-15 23:00:00+00:00
2025-05-27 11:16:56,605 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:56,749 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:56,758 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:56,758 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 25: ✅ 167 test rows, Run: 04-08


2025-05-27 11:16:56,893 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:56,894 - build_training_set - INFO - 🧠 Actuals van 2025-01-27 00:00:00+00:00 t/m 2025-04-09 23:00:00+00:00
2025-05-27 11:16:56,894 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-10 00:00:00+00:00, target range: 2025-04-10 00:00:00+00:00 → 2025-04-16 23:00:00+00:00
2025-05-27 11:16:56,913 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:57,055 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:57,064 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:57,064 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 26: ✅ 167 test rows, Run: 04-09


2025-05-27 11:16:57,199 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:57,199 - build_training_set - INFO - 🧠 Actuals van 2025-01-28 00:00:00+00:00 t/m 2025-04-10 23:00:00+00:00
2025-05-27 11:16:57,200 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-11 00:00:00+00:00, target range: 2025-04-11 00:00:00+00:00 → 2025-04-17 23:00:00+00:00
2025-05-27 11:16:57,240 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:57,395 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 27: ✅ 167 test rows, Run: 04-10


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:57,404 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:57,404 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:57,405 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:57,417 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:57,419 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:57,537 - bui

Day 28: ✅ 167 test rows, Run: 04-11


2025-05-27 11:16:57,837 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:57,837 - build_training_set - INFO - 🧠 Actuals van 2025-01-30 00:00:00+00:00 t/m 2025-04-12 23:00:00+00:00
2025-05-27 11:16:57,838 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-13 00:00:00+00:00, target range: 2025-04-13 00:00:00+00:00 → 2025-04-19 23:00:00+00:00
2025-05-27 11:16:57,855 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:57,995 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:58,003 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:58,004 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 29: ✅ 167 test rows, Run: 04-12
Day 30: ✅ 167 test rows, Run: 04-13

📊 OVERALL RMSE - XGBoost Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                167  0.08
         2 2025-03-16                167  0.09
         3 2025-03-17                167  0.09
         4 2025-03-18                167  0.08
         5 2025-03-19                167  0.08
         6 2025-03-20                167  0.08
         7 2025-03-21                167  0.07
         8 2025-03-22                167  0.08
         9 2025-03-23                167  0.06
        10 2025-03-24                167  0.07
        11 2025-03-25                167  0.09
        12 2025-03-26                167  0.09
        13 2025-03-27                167  0.10
        14 2025-03-28                167  0.11
        15 2025-03-29                167  0.09
        16 2025-03-30                167  0.10
        17 2025-03-31                167  0.10
        18 2025-04-01      

In [None]:
# attempt to use Featre Selection Pipeline , SHAP values mainly

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
# Dynamic path setup
import sys
from pathlib import Path

# Find project root dynamically
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir

# Add utils to path
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define feature columns and target
COMMON_FEATURES = ['is_dst', 'hour_cos', 'hour_sin','month','is_non_working_day','shortwave_radiation', 'temperature_2m'
    ]
# list of all available features 'cloud_cover', 'direct_normal_irradiance', 'diffuse_radiation','hour_cos', 'hour_sin', 'is_dst', 'is_non_working_day','is_weekend', 'month', 'shortwave_radiation','temperature_2m', 'yearday_cos', 'yearday_sin'
TRAIN_ONLY_FEATURES = ['Load','Flow_NO']  # <- Only used if known ex post

TRAIN_FEATURES = COMMON_FEATURES + TRAIN_ONLY_FEATURES
TEST_FEATURES = COMMON_FEATURES


target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing XGBoost Model - RMSE per forecast day")
print("=" * 60)

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')

        # Zorg dat run_date ook in UTC is
        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df.index <= run_date_utc]
        test_data = df[df.index > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=features + [target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Feature selection wrapper
        from sklearn.feature_selection import SelectFromModel

        full_X_train = train_data[TRAIN_FEATURES]
        full_y_train = train_data[target]

        # Train model on full feature set
        model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        model.fit(full_X_train, full_y_train)

        # Feature selection
        selector = SelectFromModel(model, prefit=True, threshold="median")
        selected_columns = full_X_train.columns[selector.get_support()]

        # Apply selection to both train and test
        X_train = full_X_train[selected_columns]
        X_test = test_data.reindex(columns=selected_columns)

        # Aanvullen met NaN-kolommen waar nodig
        for col in TRAIN_FEATURES:
            if col not in test_data.columns:
                test_data[col] = np.nan

        X_test = test_data[TRAIN_FEATURES]
        y_test = test_data[target]

        model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(test_data),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(test_data)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - XGBoost Model")
    print("=" * 80)
    print(f"Successful runs: {len(rmse_df)}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.2f}")
else:
    print("❌ No runs completed successfully")

2025-05-27 11:20:34,689 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:34,695 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00
2025-05-27 11:20:34,700 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, target range: 2025-03-15 00:00:00+00:00 → 2025-03-21 23:00:00+00:00
2025-05-27 11:20:34,812 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


🔍 Testing XGBoost Model - RMSE per forecast day


2025-05-27 11:20:34,972 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:34,983 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:34,984 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:34,984 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:34,999 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 1: ❌ Error: feature_names mismatch: ['hour_sin', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, hour_cos


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:35,674 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:35,679 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:35,681 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:35,718 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:20:35,720 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:20:36,087 - bui

Day 2: ❌ Error: feature_names mismatch: ['hour_sin', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, hour_cos


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:36,290 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:36,290 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:36,290 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:36,368 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:20:36,369 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:20:36,670 - bui

Day 3: ❌ Error: feature_names mismatch: ['hour_sin', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, hour_cos


2025-05-27 11:20:36,890 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:36,901 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:36,902 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:36,902 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:36,917 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 4: ❌ Error: feature_names mismatch: ['hour_sin', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, hour_cos


2025-05-27 11:20:37,569 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:37,570 - build_training_set - INFO - 🧠 Actuals van 2025-01-06 00:00:00+00:00 t/m 2025-03-19 23:00:00+00:00
2025-05-27 11:20:37,570 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-20 00:00:00+00:00, target range: 2025-03-20 00:00:00+00:00 → 2025-03-26 23:00:00+00:00
2025-05-27 11:20:37,588 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:37,732 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:37,741 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:37,741 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 5: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:37,981 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:37,981 - build_training_set - INFO - 🧠 Actuals van 2025-01-07 00:00:00+00:00 t/m 2025-03-20 23:00:00+00:00
2025-05-27 11:20:37,981 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-21 00:00:00+00:00, target range: 2025-03-21 00:00:00+00:00 → 2025-03-27 23:00:00+00:00
2025-05-27 11:20:38,039 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 6: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:38,188 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:38,198 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:38,199 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:38,200 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:38,214 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 7: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:38,836 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:38,836 - build_training_set - INFO - 🧠 Actuals van 2025-01-09 00:00:00+00:00 t/m 2025-03-22 23:00:00+00:00
2025-05-27 11:20:38,837 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-23 00:00:00+00:00, target range: 2025-03-23 00:00:00+00:00 → 2025-03-29 23:00:00+00:00
2025-05-27 11:20:38,854 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:38,993 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:39,002 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:39,002 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 8: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:39,231 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:39,231 - build_training_set - INFO - 🧠 Actuals van 2025-01-10 00:00:00+00:00 t/m 2025-03-23 23:00:00+00:00
2025-05-27 11:20:39,231 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-24 00:00:00+00:00, target range: 2025-03-24 00:00:00+00:00 → 2025-03-30 23:00:00+00:00
2025-05-27 11:20:39,249 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:39,392 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:39,400 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:39,401 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 9: ❌ Error: feature_names mismatch: ['hour_cos', 'is_non_working_day', 'temperature_2m', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, hour_sin, shortwave_radiation, month


2025-05-27 11:20:39,629 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:39,630 - build_training_set - INFO - 🧠 Actuals van 2025-01-11 00:00:00+00:00 t/m 2025-03-24 23:00:00+00:00
2025-05-27 11:20:39,630 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-25 00:00:00+00:00, target range: 2025-03-25 00:00:00+00:00 → 2025-03-31 23:00:00+00:00
2025-05-27 11:20:39,649 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 10: ❌ Error: feature_names mismatch: ['hour_cos', 'hour_sin', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, month


2025-05-27 11:20:39,832 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:39,841 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:39,841 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:39,841 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:39,856 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 11: ❌ Error: feature_names mismatch: ['hour_cos', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, month


2025-05-27 11:20:40,492 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:40,493 - build_training_set - INFO - 🧠 Actuals van 2025-01-13 00:00:00+00:00 t/m 2025-03-26 23:00:00+00:00
2025-05-27 11:20:40,493 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-27 00:00:00+00:00, target range: 2025-03-27 00:00:00+00:00 → 2025-04-02 23:00:00+00:00
2025-05-27 11:20:40,510 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:40,653 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:40,661 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:40,661 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 12: ❌ Error: feature_names mismatch: ['hour_cos', 'is_non_working_day', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, hour_sin, temperature_2m, month


2025-05-27 11:20:40,899 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:40,899 - build_training_set - INFO - 🧠 Actuals van 2025-01-14 00:00:00+00:00 t/m 2025-03-27 23:00:00+00:00
2025-05-27 11:20:40,899 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-28 00:00:00+00:00, target range: 2025-03-28 00:00:00+00:00 → 2025-04-03 23:00:00+00:00
2025-05-27 11:20:40,916 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:41,057 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:41,066 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:41,066 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 13: ❌ Error: feature_names mismatch: ['hour_cos', 'is_non_working_day', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, hour_sin, temperature_2m, month


2025-05-27 11:20:41,306 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:41,306 - build_training_set - INFO - 🧠 Actuals van 2025-01-15 00:00:00+00:00 t/m 2025-03-28 23:00:00+00:00
2025-05-27 11:20:41,306 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-29 00:00:00+00:00, target range: 2025-03-29 00:00:00+00:00 → 2025-04-04 23:00:00+00:00
2025-05-27 11:20:41,326 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 14: ❌ Error: feature_names mismatch: ['hour_cos', 'is_non_working_day', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, hour_sin, temperature_2m, month


2025-05-27 11:20:41,514 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:41,523 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:41,523 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:41,524 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:41,537 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 15: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'is_non_working_day', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, hour_sin, shortwave_radiation, temperature_2m


2025-05-27 11:20:42,155 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:42,156 - build_training_set - INFO - 🧠 Actuals van 2025-01-17 00:00:00+00:00 t/m 2025-03-30 23:00:00+00:00
2025-05-27 11:20:42,156 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-31 00:00:00+00:00, target range: 2025-03-31 00:00:00+00:00 → 2025-04-06 23:00:00+00:00
2025-05-27 11:20:42,173 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:42,318 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:42,327 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:42,327 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 16: ❌ Error: feature_names mismatch: ['hour_cos', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, month


2025-05-27 11:20:42,564 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:42,564 - build_training_set - INFO - 🧠 Actuals van 2025-01-18 00:00:00+00:00 t/m 2025-03-31 23:00:00+00:00
2025-05-27 11:20:42,564 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-01 00:00:00+00:00, target range: 2025-04-01 00:00:00+00:00 → 2025-04-07 23:00:00+00:00
2025-05-27 11:20:42,582 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 17: ❌ Error: feature_names mismatch: ['is_dst', 'hour_cos', 'temperature_2m', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_non_working_day, hour_sin, shortwave_radiation, month


2025-05-27 11:20:42,776 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:42,787 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:42,787 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:42,788 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:42,820 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 18: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:43,462 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:43,462 - build_training_set - INFO - 🧠 Actuals van 2025-01-20 00:00:00+00:00 t/m 2025-04-02 23:00:00+00:00
2025-05-27 11:20:43,462 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-03 00:00:00+00:00, target range: 2025-04-03 00:00:00+00:00 → 2025-04-09 23:00:00+00:00
2025-05-27 11:20:43,480 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:43,620 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:43,628 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:43,629 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 19: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:43,867 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:43,867 - build_training_set - INFO - 🧠 Actuals van 2025-01-21 00:00:00+00:00 t/m 2025-04-03 23:00:00+00:00
2025-05-27 11:20:43,867 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-04 00:00:00+00:00, target range: 2025-04-04 00:00:00+00:00 → 2025-04-10 23:00:00+00:00
2025-05-27 11:20:43,884 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:44,024 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:44,034 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:44,034 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 20: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:44,271 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:44,271 - build_training_set - INFO - 🧠 Actuals van 2025-01-22 00:00:00+00:00 t/m 2025-04-04 23:00:00+00:00
2025-05-27 11:20:44,272 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-05 00:00:00+00:00, target range: 2025-04-05 00:00:00+00:00 → 2025-04-11 23:00:00+00:00
2025-05-27 11:20:44,289 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:44,429 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:44,438 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:44,438 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 21: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:44,715 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:44,715 - build_training_set - INFO - 🧠 Actuals van 2025-01-23 00:00:00+00:00 t/m 2025-04-05 23:00:00+00:00
2025-05-27 11:20:44,715 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-06 00:00:00+00:00, target range: 2025-04-06 00:00:00+00:00 → 2025-04-12 23:00:00+00:00
2025-05-27 11:20:44,733 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:44,873 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:44,882 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:44,882 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 22: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:45,121 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:45,122 - build_training_set - INFO - 🧠 Actuals van 2025-01-24 00:00:00+00:00 t/m 2025-04-06 23:00:00+00:00
2025-05-27 11:20:45,122 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-07 00:00:00+00:00, target range: 2025-04-07 00:00:00+00:00 → 2025-04-13 23:00:00+00:00
2025-05-27 11:20:45,140 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:45,282 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:45,291 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:45,291 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 23: ❌ Error: feature_names mismatch: ['hour_sin', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, hour_cos


2025-05-27 11:20:45,532 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:45,532 - build_training_set - INFO - 🧠 Actuals van 2025-01-25 00:00:00+00:00 t/m 2025-04-07 23:00:00+00:00
2025-05-27 11:20:45,533 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-08 00:00:00+00:00, target range: 2025-04-08 00:00:00+00:00 → 2025-04-14 23:00:00+00:00
2025-05-27 11:20:45,551 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:45,693 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:45,702 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:45,702 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 24: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:45,939 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:45,940 - build_training_set - INFO - 🧠 Actuals van 2025-01-26 00:00:00+00:00 t/m 2025-04-08 23:00:00+00:00
2025-05-27 11:20:45,940 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-09 00:00:00+00:00, target range: 2025-04-09 00:00:00+00:00 → 2025-04-15 23:00:00+00:00
2025-05-27 11:20:45,958 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:46,135 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 25: ❌ Error: feature_names mismatch: ['hour_sin', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, hour_cos


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:46,145 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:46,145 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:46,145 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:46,159 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:20:46,160 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:20:46,391 - bui

Day 26: ❌ Error: feature_names mismatch: ['hour_sin', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, hour_cos


2025-05-27 11:20:46,792 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:46,792 - build_training_set - INFO - 🧠 Actuals van 2025-01-28 00:00:00+00:00 t/m 2025-04-10 23:00:00+00:00
2025-05-27 11:20:46,793 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-11 00:00:00+00:00, target range: 2025-04-11 00:00:00+00:00 → 2025-04-17 23:00:00+00:00
2025-05-27 11:20:46,810 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:46,951 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:46,960 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:46,960 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 27: ❌ Error: feature_names mismatch: ['hour_cos', 'month', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, hour_sin, temperature_2m


2025-05-27 11:20:47,251 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:47,252 - build_training_set - INFO - 🧠 Actuals van 2025-01-29 00:00:00+00:00 t/m 2025-04-11 23:00:00+00:00
2025-05-27 11:20:47,252 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-12 00:00:00+00:00, target range: 2025-04-12 00:00:00+00:00 → 2025-04-18 23:00:00+00:00
2025-05-27 11:20:47,270 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:20:47,415 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:47,425 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:47,426 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 28: ❌ Error: feature_names mismatch: ['hour_cos', 'hour_sin', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, month


2025-05-27 11:20:47,665 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:20:47,665 - build_training_set - INFO - 🧠 Actuals van 2025-01-30 00:00:00+00:00 t/m 2025-04-12 23:00:00+00:00
2025-05-27 11:20:47,665 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-13 00:00:00+00:00, target range: 2025-04-13 00:00:00+00:00 → 2025-04-19 23:00:00+00:00
2025-05-27 11:20:47,682 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 29: ❌ Error: feature_names mismatch: ['hour_cos', 'hour_sin', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, month


2025-05-27 11:20:47,878 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:20:47,888 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:20:47,889 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:20:47,889 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:20:47,902 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 30: ❌ Error: feature_names mismatch: ['hour_cos', 'hour_sin', 'shortwave_radiation', 'Load', 'Flow_NO'] ['is_dst', 'hour_cos', 'hour_sin', 'month', 'is_non_working_day', 'shortwave_radiation', 'temperature_2m', 'Load', 'Flow_NO']
training data did not have the following fields: is_dst, is_non_working_day, temperature_2m, month
❌ No runs completed successfully


In [None]:
# Get feature importances from the trained XGBoost model
importances = model.feature_importances_
feature_names = model.feature_names_in_

# Create a DataFrame for better visualization
feat_imp_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

# Display the feature importances
print(feat_imp_df)

# Optional: Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feat_imp_df, palette='viridis')
plt.title('Feature Importance (XGBoost)')
plt.tight_layout()
plt.show()

In [None]:
# Get feature importances from the last trained model
importances = model.feature_importances_
feature_names = model.feature_names_in_

# Create a DataFrame for better visualization
feat_imp_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

# Display the feature importances
print(feat_imp_df)

# Optional: Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feat_imp_df, palette='viridis')
plt.title('Feature Importance (XGBoost)')
plt.tight_layout()
plt.show()