In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
color_pal = sns.color_palette()
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [21]:
import sqlite3

conn = sqlite3.connect('../data/WARP.db')
df = pd.read_sql_query("SELECT * FROM master_warp", conn)
conn.close()
print(df.columns)

Index(['Price', 'target_datetime', 'Load', 'shortwave_radiation',
       'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation',
       'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin',
       'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover',
       'weekday_sin', 'hour_sin', 'weekday_cos'],
      dtype='object')


In [22]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from datetime import timedelta

# Feature and target setup
features = [
    'Flow_NO','is_dst', 'hour_cos', 'hour_sin','month',
    'Load','is_non_working_day','shortwave_radiation', 'temperature_2m' 
] # excluding  'direct_normal_irradiance', 'diffuse_radiation','yearday_cos', 'yearday_sin', 'cloud_cover', 'Flow_GB',  'is_weekend',
target = 'Price'


# Safe datetime handling
if 'target_datetime' not in df.columns:
    print("'target_datetime' column not found in columns. Sorting by index instead.")
    df = df.sort_index()
else:
    df['target_datetime'] = pd.to_datetime(df['target_datetime'])
    df = df.set_index('target_datetime')


# Forecast settings
start_date = pd.Timestamp("2025-03-13 12:00", tz='UTC')
end_date = pd.Timestamp("2025-05-14 12:00", tz='UTC')
lag = timedelta(hours=36)
forecast_horizon = timedelta(hours=144)

# Store RMSEs
rmses = []

current_time = start_date
while current_time <= end_date:
    train_data = df[df.index < current_time]
    test_start = current_time + lag # check current time  
    test_end = test_start + forecast_horizon
    test_data = df[(df.index >= test_start) & (df.index < test_end)]

    if test_data.empty:
        print(f"No test data for forecast starting at {current_time}")
        current_time += timedelta(days=1)
        continue

    X_train = train_data[features]
    y_train = train_data[target]
    X_test = test_data[features]
    y_test = test_data[target]

    # Train and predict
    model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Evaluate
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    rmses.append(rmse)

    print(f"Forecast origin: {current_time}, Predicting {test_start} to {test_end}, RMSE: {rmse:.3f}")

    current_time += timedelta(days=1)

# Summary
avg_rmse = np.mean(rmses)
print(f"\nAverage RMSE over {len(rmses)} runs: {avg_rmse:.5f}")

# Calculate average RMSE over the first 30 runs
if len(rmses) >= 30:
    avg_rmse_30 = np.mean(rmses[:30])
    print(f"Average RMSE over first 30 runs: {avg_rmse_30:.5f}")
else:
    print("Less than 30 runs available to calculate average RMSE.")

Forecast origin: 2025-03-13 12:00:00+00:00, Predicting 2025-03-15 00:00:00+00:00 to 2025-03-21 00:00:00+00:00, RMSE: 0.020
Forecast origin: 2025-03-14 12:00:00+00:00, Predicting 2025-03-16 00:00:00+00:00 to 2025-03-22 00:00:00+00:00, RMSE: 0.024
Forecast origin: 2025-03-15 12:00:00+00:00, Predicting 2025-03-17 00:00:00+00:00 to 2025-03-23 00:00:00+00:00, RMSE: 0.032
Forecast origin: 2025-03-16 12:00:00+00:00, Predicting 2025-03-18 00:00:00+00:00 to 2025-03-24 00:00:00+00:00, RMSE: 0.030
Forecast origin: 2025-03-17 12:00:00+00:00, Predicting 2025-03-19 00:00:00+00:00 to 2025-03-25 00:00:00+00:00, RMSE: 0.033
Forecast origin: 2025-03-18 12:00:00+00:00, Predicting 2025-03-20 00:00:00+00:00 to 2025-03-26 00:00:00+00:00, RMSE: 0.032
Forecast origin: 2025-03-19 12:00:00+00:00, Predicting 2025-03-21 00:00:00+00:00 to 2025-03-27 00:00:00+00:00, RMSE: 0.029
Forecast origin: 2025-03-20 12:00:00+00:00, Predicting 2025-03-22 00:00:00+00:00 to 2025-03-28 00:00:00+00:00, RMSE: 0.025
Forecast origin:

In [None]:
# no lag, 7 days forecast horizon. higly similar average RMSE to the 36 hours lag, 6 days forecast horizon.
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from datetime import timedelta

# Feature and target setup
features = [
    'Flow_NO','is_dst', 'hour_cos', 'hour_sin','month',
    'Load','is_non_working_day','shortwave_radiation', 'temperature_2m'
] # excluding  'direct_normal_irradiance', 'diffuse_radiation', 'cloud_cover', 'Flow_GB',  'is_weekend','yearday_cos', 'yearday_sin'
target = 'Price'


# Safe datetime handling
if 'target_datetime' not in df.columns:
    print("'target_datetime' column not found in columns. Sorting by index instead.")
    df = df.sort_index()
else:
    df['target_datetime'] = pd.to_datetime(df['target_datetime'])
    df = df.set_index('target_datetime')


# Forecast settings
start_date = pd.Timestamp("2025-03-13 12:00", tz='UTC')
end_date = pd.Timestamp("2025-05-14 12:00", tz='UTC')
lag = timedelta(hours=0)  # No lag
forecast_horizon = timedelta(hours=168)  # 7 days

# Store RMSEs
rmses = []

current_time = start_date
while current_time <= end_date:
    train_data = df[df.index < current_time]
    test_start = current_time + lag # check current time  
    test_end = test_start + forecast_horizon
    test_data = df[(df.index >= test_start) & (df.index < test_end)]

    if test_data.empty:
        print(f"No test data for forecast starting at {current_time}")
        current_time += timedelta(days=1)
        continue

    X_train = train_data[features]
    y_train = train_data[target]
    X_test = test_data[features]
    y_test = test_data[target]

    # Train and predict
    model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Evaluate
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    rmses.append(rmse)

    print(f"Forecast origin: {current_time}, Predicting {test_start} to {test_end}, RMSE: {rmse:.3f}")

    current_time += timedelta(days=1)

# Summary
avg_rmse = np.mean(rmses)
print(f"\nAverage RMSE over {len(rmses)} runs: {avg_rmse:.5f}")

# Calculate average RMSE over the first 30 runs
if len(rmses) >= 30:
    avg_rmse_30 = np.mean(rmses[:30])
    print(f"Average RMSE over first 30 runs: {avg_rmse_30:.5f}")
else:
    print("Less than 30 runs available to calculate average RMSE.")

'target_datetime' column not found in columns. Sorting by index instead.
Forecast origin: 2025-03-13 12:00:00+00:00, Predicting 2025-03-13 12:00:00+00:00 to 2025-03-20 12:00:00+00:00, RMSE: 0.019
Forecast origin: 2025-03-14 12:00:00+00:00, Predicting 2025-03-14 12:00:00+00:00 to 2025-03-21 12:00:00+00:00, RMSE: 0.020
Forecast origin: 2025-03-15 12:00:00+00:00, Predicting 2025-03-15 12:00:00+00:00 to 2025-03-22 12:00:00+00:00, RMSE: 0.027
Forecast origin: 2025-03-16 12:00:00+00:00, Predicting 2025-03-16 12:00:00+00:00 to 2025-03-23 12:00:00+00:00, RMSE: 0.030
Forecast origin: 2025-03-17 12:00:00+00:00, Predicting 2025-03-17 12:00:00+00:00 to 2025-03-24 12:00:00+00:00, RMSE: 0.030
Forecast origin: 2025-03-18 12:00:00+00:00, Predicting 2025-03-18 12:00:00+00:00 to 2025-03-25 12:00:00+00:00, RMSE: 0.030
Forecast origin: 2025-03-19 12:00:00+00:00, Predicting 2025-03-19 12:00:00+00:00 to 2025-03-26 12:00:00+00:00, RMSE: 0.031
Forecast origin: 2025-03-20 12:00:00+00:00, Predicting 2025-03-20 

In [25]:
# now with hyper parameter tuning using gridsearchCV
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from datetime import timedelta

# Feature and target setup
features = [
    'Flow_NO','is_dst', 'hour_cos', 'hour_sin','month',
    'Load','is_non_working_day','shortwave_radiation', 'temperature_2m' 
]
target = 'Price'

# Safe datetime handling
if 'target_datetime' not in df.columns:
    print("'target_datetime' column not found in columns. Sorting by index instead.")
    df = df.sort_index()
else:
    df['target_datetime'] = pd.to_datetime(df['target_datetime'])
    df = df.set_index('target_datetime')

# Forecast settings
start_date = pd.Timestamp("2025-03-13 12:00", tz='UTC')
end_date = pd.Timestamp("2025-05-14 12:00", tz='UTC')
lag = timedelta(hours=36)
forecast_horizon = timedelta(hours=144)

# Store RMSEs
rmses = []

# Extensive grid
param_grid = {
    'n_estimators': [100, 300, 500],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.6, 0.8, 1],
    'colsample_bytree': [0.6, 0.8, 1],
    'gamma': [0, 0.1, 0.3],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [1, 1.5, 2],
}

current_time = start_date
while current_time <= end_date:
    train_data = df[df.index < current_time]
    test_start = current_time + lag
    test_end = test_start + forecast_horizon
    test_data = df[(df.index >= test_start) & (df.index < test_end)]

    if test_data.empty:
        print(f"No test data for forecast starting at {current_time}")
        current_time += timedelta(days=1)
        continue

    X_train = train_data[features]
    y_train = train_data[target]
    X_test = test_data[features]
    y_test = test_data[target]

    try:
        base_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        grid_search = GridSearchCV(
            estimator=base_model,
            param_grid=param_grid,
            scoring='neg_root_mean_squared_error',
            cv=3,
            n_jobs=-1,
            verbose=0
        )
        grid_search.fit(X_train, y_train)
        model = grid_search.best_estimator_
        best_params = grid_search.best_params_

        y_pred = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        rmses.append(rmse)

        print(f"Forecast origin: {current_time}, Predicting {test_start} to {test_end}, "
              f"RMSE: {rmse:.3f}, Best params: {best_params}")

    except Exception as e:
        print(f"Error on forecast starting at {current_time}: {e}")

    current_time += timedelta(days=1)

# Summary
avg_rmse = np.mean(rmses)
print(f"\nAverage RMSE over {len(rmses)} runs: {avg_rmse:.5f}")

# Average over first 30 runs
if len(rmses) >= 30:
    avg_rmse_30 = np.mean(rmses[:30])
    print(f"Average RMSE over first 30 runs: {avg_rmse_30:.5f}")
else:
    print("Less than 30 runs available to calculate average RMSE.")

'target_datetime' column not found in columns. Sorting by index instead.
Forecast origin: 2025-03-13 12:00:00+00:00, Predicting 2025-03-15 00:00:00+00:00 to 2025-03-21 00:00:00+00:00, RMSE: 0.020, Best params: {'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 1}
Forecast origin: 2025-03-14 12:00:00+00:00, Predicting 2025-03-16 00:00:00+00:00 to 2025-03-22 00:00:00+00:00, RMSE: 0.024, Best params: {'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 0.6}
Forecast origin: 2025-03-15 12:00:00+00:00, Predicting 2025-03-17 00:00:00+00:00 to 2025-03-23 00:00:00+00:00, RMSE: 0.032, Best params: {'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0.1, 'reg_lambda': 1, 'subsample': 1}
Forecast origin: 2025-03-16 12:00:00+00:00, Predicting 2025-03-18 00:

In [27]:
# Collect best_params from each run if available in rmse_results
best_params_list = []
for result in rmse_results:
    if 'best_params' in result:
        best_params_list.append(result['best_params'])

if best_params_list:
    best_params_df = pd.DataFrame(best_params_list)
    print("Best hyper-parameter values across runs (top 10 shown):")
    print(best_params_df.head(10))
    print("\nMost frequent values for each hyper-parameter:")
    print(best_params_df.mode().iloc[0])
else:
    print("No best_params found in rmse_results. Make sure to store 'best_params' in each run.")

No best_params found in rmse_results. Make sure to store 'best_params' in each run.


In [17]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
# Dynamic path setup
import sys
from pathlib import Path

# Find project root dynamically
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir

# Add utils to path
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define feature columns and target
COMMON_FEATURES = ['is_dst', 'hour_cos', 'hour_sin','month','is_non_working_day','shortwave_radiation', 'temperature_2m'
    ]
# list of all available features 'cloud_cover', 'direct_normal_irradiance', 'diffuse_radiation','hour_cos', 'hour_sin', 'is_dst', 'is_non_working_day','is_weekend', 'month', 'shortwave_radiation','temperature_2m', 'yearday_cos', 'yearday_sin'
TRAIN_ONLY_FEATURES = ['Load','Flow_NO']  # <- Only used if known ex post

TRAIN_FEATURES = COMMON_FEATURES + TRAIN_ONLY_FEATURES
TEST_FEATURES = COMMON_FEATURES


target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing XGBoost Model - RMSE per forecast day")
print("=" * 60)

for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')

        # Zorg dat run_date ook in UTC is
        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df.index <= run_date_utc]
        test_data = df[df.index > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=features + [target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Train model
        X_train = train_data[TRAIN_FEATURES]
        y_train = train_data[target]

        # Aanvullen met NaN-kolommen waar nodig
        for col in TRAIN_FEATURES:
            if col not in test_data.columns:
                test_data[col] = np.nan

        X_test = test_data[TRAIN_FEATURES]
        y_test = test_data[target]

        model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(test_data),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(test_data)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - XGBoost Model")
    print("=" * 80)
    print(f"Successful runs: {len(rmse_df)}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.2f}")
else:
    print("❌ No runs completed successfully")

2025-05-27 11:16:48,628 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:48,629 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00
2025-05-27 11:16:48,629 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, target range: 2025-03-15 00:00:00+00:00 → 2025-03-21 23:00:00+00:00
2025-05-27 11:16:48,680 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


🔍 Testing XGBoost Model - RMSE per forecast day


2025-05-27 11:16:48,841 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:48,856 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:48,856 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:48,857 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:48,869 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 1: ✅ 167 test rows, Run: 03-15


2025-05-27 11:16:49,226 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:49,235 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:49,235 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:49,235 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:49,249 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 2: ✅ 167 test rows, Run: 03-16


2025-05-27 11:16:49,684 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:49,684 - build_training_set - INFO - 🧠 Actuals van 2025-01-04 00:00:00+00:00 t/m 2025-03-17 23:00:00+00:00
2025-05-27 11:16:49,684 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-18 00:00:00+00:00, target range: 2025-03-18 00:00:00+00:00 → 2025-03-24 23:00:00+00:00
2025-05-27 11:16:49,701 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:49,851 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:49,861 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:49,861 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 3: ✅ 167 test rows, Run: 03-17


2025-05-27 11:16:49,997 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:49,997 - build_training_set - INFO - 🧠 Actuals van 2025-01-05 00:00:00+00:00 t/m 2025-03-18 23:00:00+00:00
2025-05-27 11:16:49,997 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-19 00:00:00+00:00, target range: 2025-03-19 00:00:00+00:00 → 2025-03-25 23:00:00+00:00
2025-05-27 11:16:50,016 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:50,161 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:50,170 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:50,170 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 4: ✅ 167 test rows, Run: 03-18


2025-05-27 11:16:50,345 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:50,345 - build_training_set - INFO - 🧠 Actuals van 2025-01-06 00:00:00+00:00 t/m 2025-03-19 23:00:00+00:00
2025-05-27 11:16:50,346 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-20 00:00:00+00:00, target range: 2025-03-20 00:00:00+00:00 → 2025-03-26 23:00:00+00:00
2025-05-27 11:16:50,364 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:50,509 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:50,518 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:50,518 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 5: ✅ 167 test rows, Run: 03-19


2025-05-27 11:16:50,643 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:50,644 - build_training_set - INFO - 🧠 Actuals van 2025-01-07 00:00:00+00:00 t/m 2025-03-20 23:00:00+00:00
2025-05-27 11:16:50,644 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-21 00:00:00+00:00, target range: 2025-03-21 00:00:00+00:00 → 2025-03-27 23:00:00+00:00
2025-05-27 11:16:50,661 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:50,810 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:50,820 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:50,820 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 6: ✅ 167 test rows, Run: 03-20


2025-05-27 11:16:50,951 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:50,951 - build_training_set - INFO - 🧠 Actuals van 2025-01-08 00:00:00+00:00 t/m 2025-03-21 23:00:00+00:00
2025-05-27 11:16:50,951 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-22 00:00:00+00:00, target range: 2025-03-22 00:00:00+00:00 → 2025-03-28 23:00:00+00:00
2025-05-27 11:16:50,969 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:51,112 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:51,121 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:51,121 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 7: ✅ 167 test rows, Run: 03-21


2025-05-27 11:16:51,254 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:51,254 - build_training_set - INFO - 🧠 Actuals van 2025-01-09 00:00:00+00:00 t/m 2025-03-22 23:00:00+00:00
2025-05-27 11:16:51,254 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-23 00:00:00+00:00, target range: 2025-03-23 00:00:00+00:00 → 2025-03-29 23:00:00+00:00
2025-05-27 11:16:51,285 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 8: ✅ 167 test rows, Run: 03-22


2025-05-27 11:16:51,466 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:51,477 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:51,478 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:51,478 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:51,491 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 9: ✅ 167 test rows, Run: 03-23


2025-05-27 11:16:51,900 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:51,900 - build_training_set - INFO - 🧠 Actuals van 2025-01-11 00:00:00+00:00 t/m 2025-03-24 23:00:00+00:00
2025-05-27 11:16:51,900 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-25 00:00:00+00:00, target range: 2025-03-25 00:00:00+00:00 → 2025-03-31 23:00:00+00:00
2025-05-27 11:16:51,917 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,061 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,070 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,071 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 10: ✅ 167 test rows, Run: 03-24


2025-05-27 11:16:52,197 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:52,197 - build_training_set - INFO - 🧠 Actuals van 2025-01-12 00:00:00+00:00 t/m 2025-03-25 23:00:00+00:00
2025-05-27 11:16:52,197 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-26 00:00:00+00:00, target range: 2025-03-26 00:00:00+00:00 → 2025-04-01 23:00:00+00:00
2025-05-27 11:16:52,215 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,357 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,366 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,366 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 11: ✅ 167 test rows, Run: 03-25


2025-05-27 11:16:52,538 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:52,539 - build_training_set - INFO - 🧠 Actuals van 2025-01-13 00:00:00+00:00 t/m 2025-03-26 23:00:00+00:00
2025-05-27 11:16:52,539 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-27 00:00:00+00:00, target range: 2025-03-27 00:00:00+00:00 → 2025-04-02 23:00:00+00:00
2025-05-27 11:16:52,555 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,696 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,704 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,704 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 12: ✅ 167 test rows, Run: 03-26


2025-05-27 11:16:52,829 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:52,830 - build_training_set - INFO - 🧠 Actuals van 2025-01-14 00:00:00+00:00 t/m 2025-03-27 23:00:00+00:00
2025-05-27 11:16:52,830 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-28 00:00:00+00:00, target range: 2025-03-28 00:00:00+00:00 → 2025-04-03 23:00:00+00:00
2025-05-27 11:16:52,847 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:52,987 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:52,996 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:52,996 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 13: ✅ 167 test rows, Run: 03-27


2025-05-27 11:16:53,127 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:53,127 - build_training_set - INFO - 🧠 Actuals van 2025-01-15 00:00:00+00:00 t/m 2025-03-28 23:00:00+00:00
2025-05-27 11:16:53,127 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-29 00:00:00+00:00, target range: 2025-03-29 00:00:00+00:00 → 2025-04-04 23:00:00+00:00
2025-05-27 11:16:53,145 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:53,288 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:53,298 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:53,298 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 14: ✅ 167 test rows, Run: 03-28


2025-05-27 11:16:53,425 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:53,426 - build_training_set - INFO - 🧠 Actuals van 2025-01-16 00:00:00+00:00 t/m 2025-03-29 23:00:00+00:00
2025-05-27 11:16:53,426 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-30 00:00:00+00:00, target range: 2025-03-30 00:00:00+00:00 → 2025-04-05 23:00:00+00:00
2025-05-27 11:16:53,444 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:53,620 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 15: ✅ 167 test rows, Run: 03-29


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:53,630 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:53,630 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:53,630 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:53,642 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:53,642 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:53,761 - bui

Day 16: ✅ 167 test rows, Run: 03-30


2025-05-27 11:16:54,051 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:54,051 - build_training_set - INFO - 🧠 Actuals van 2025-01-18 00:00:00+00:00 t/m 2025-03-31 23:00:00+00:00
2025-05-27 11:16:54,052 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-01 00:00:00+00:00, target range: 2025-04-01 00:00:00+00:00 → 2025-04-07 23:00:00+00:00
2025-05-27 11:16:54,068 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:54,212 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:54,221 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:54,222 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 17: ✅ 167 test rows, Run: 03-31


2025-05-27 11:16:54,362 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:54,363 - build_training_set - INFO - 🧠 Actuals van 2025-01-19 00:00:00+00:00 t/m 2025-04-01 23:00:00+00:00
2025-05-27 11:16:54,363 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-02 00:00:00+00:00, target range: 2025-04-02 00:00:00+00:00 → 2025-04-08 23:00:00+00:00
2025-05-27 11:16:54,382 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:54,523 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:54,531 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:54,531 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 18: ✅ 167 test rows, Run: 04-01


2025-05-27 11:16:54,713 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:54,713 - build_training_set - INFO - 🧠 Actuals van 2025-01-20 00:00:00+00:00 t/m 2025-04-02 23:00:00+00:00
2025-05-27 11:16:54,713 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-03 00:00:00+00:00, target range: 2025-04-03 00:00:00+00:00 → 2025-04-09 23:00:00+00:00
2025-05-27 11:16:54,731 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:54,908 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 19: ✅ 167 test rows, Run: 04-02


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:54,917 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:54,917 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:54,917 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:54,929 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:54,930 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:55,045 - bui

Day 20: ✅ 167 test rows, Run: 04-03


2025-05-27 11:16:55,342 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:55,342 - build_training_set - INFO - 🧠 Actuals van 2025-01-22 00:00:00+00:00 t/m 2025-04-04 23:00:00+00:00
2025-05-27 11:16:55,342 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-05 00:00:00+00:00, target range: 2025-04-05 00:00:00+00:00 → 2025-04-11 23:00:00+00:00
2025-05-27 11:16:55,359 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:55,500 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:55,509 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:55,509 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 21: ✅ 167 test rows, Run: 04-04


2025-05-27 11:16:55,650 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:55,650 - build_training_set - INFO - 🧠 Actuals van 2025-01-23 00:00:00+00:00 t/m 2025-04-05 23:00:00+00:00
2025-05-27 11:16:55,650 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-06 00:00:00+00:00, target range: 2025-04-06 00:00:00+00:00 → 2025-04-12 23:00:00+00:00
2025-05-27 11:16:55,669 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:55,814 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:55,822 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:55,822 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 22: ✅ 167 test rows, Run: 04-05


2025-05-27 11:16:55,952 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:55,953 - build_training_set - INFO - 🧠 Actuals van 2025-01-24 00:00:00+00:00 t/m 2025-04-06 23:00:00+00:00
2025-05-27 11:16:55,953 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-07 00:00:00+00:00, target range: 2025-04-07 00:00:00+00:00 → 2025-04-13 23:00:00+00:00
2025-05-27 11:16:55,973 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:56,146 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 23: ✅ 167 test rows, Run: 04-06


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:56,155 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:56,156 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:56,156 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:56,169 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:56,170 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:56,288 - bui

Day 24: ✅ 167 test rows, Run: 04-07


2025-05-27 11:16:56,586 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:56,587 - build_training_set - INFO - 🧠 Actuals van 2025-01-26 00:00:00+00:00 t/m 2025-04-08 23:00:00+00:00
2025-05-27 11:16:56,587 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-09 00:00:00+00:00, target range: 2025-04-09 00:00:00+00:00 → 2025-04-15 23:00:00+00:00
2025-05-27 11:16:56,605 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:56,749 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:56,758 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:56,758 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 25: ✅ 167 test rows, Run: 04-08


2025-05-27 11:16:56,893 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:56,894 - build_training_set - INFO - 🧠 Actuals van 2025-01-27 00:00:00+00:00 t/m 2025-04-09 23:00:00+00:00
2025-05-27 11:16:56,894 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-10 00:00:00+00:00, target range: 2025-04-10 00:00:00+00:00 → 2025-04-16 23:00:00+00:00
2025-05-27 11:16:56,913 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:57,055 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:57,064 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:57,064 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 26: ✅ 167 test rows, Run: 04-09


2025-05-27 11:16:57,199 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:57,199 - build_training_set - INFO - 🧠 Actuals van 2025-01-28 00:00:00+00:00 t/m 2025-04-10 23:00:00+00:00
2025-05-27 11:16:57,200 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-11 00:00:00+00:00, target range: 2025-04-11 00:00:00+00:00 → 2025-04-17 23:00:00+00:00
2025-05-27 11:16:57,240 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:57,395 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 27: ✅ 167 test rows, Run: 04-10


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:57,404 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:57,404 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:16:57,405 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:16:57,417 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:16:57,419 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:16:57,537 - bui

Day 28: ✅ 167 test rows, Run: 04-11


2025-05-27 11:16:57,837 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:16:57,837 - build_training_set - INFO - 🧠 Actuals van 2025-01-30 00:00:00+00:00 t/m 2025-04-12 23:00:00+00:00
2025-05-27 11:16:57,838 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-13 00:00:00+00:00, target range: 2025-04-13 00:00:00+00:00 → 2025-04-19 23:00:00+00:00
2025-05-27 11:16:57,855 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:16:57,995 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:16:58,003 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:16:58,004 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 29: ✅ 167 test rows, Run: 04-12
Day 30: ✅ 167 test rows, Run: 04-13

📊 OVERALL RMSE - XGBoost Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                167  0.08
         2 2025-03-16                167  0.09
         3 2025-03-17                167  0.09
         4 2025-03-18                167  0.08
         5 2025-03-19                167  0.08
         6 2025-03-20                167  0.08
         7 2025-03-21                167  0.07
         8 2025-03-22                167  0.08
         9 2025-03-23                167  0.06
        10 2025-03-24                167  0.07
        11 2025-03-25                167  0.09
        12 2025-03-26                167  0.09
        13 2025-03-27                167  0.10
        14 2025-03-28                167  0.11
        15 2025-03-29                167  0.09
        16 2025-03-30                167  0.10
        17 2025-03-31                167  0.10
        18 2025-04-01      

In [23]:
# optimizing the code for feature selection 
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
# Dynamic path setup
import sys
from pathlib import Path
from sklearn.model_selection import GridSearchCV

# Find project root dynamically
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir

# Add utils to path
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define feature columns and target
COMMON_FEATURES = ['is_dst', 'hour_cos', 'hour_sin','month','is_non_working_day','shortwave_radiation', 'temperature_2m'
    ]
# list of all available features 'cloud_cover', 'direct_normal_irradiance', 'diffuse_radiation','hour_cos', 'hour_sin', 'is_dst', 'is_non_working_day','is_weekend', 'month', 'shortwave_radiation','temperature_2m', 'yearday_cos', 'yearday_sin'
TRAIN_ONLY_FEATURES = ['Load','Flow_NO']  # <- Only used if known ex post

TRAIN_FEATURES = COMMON_FEATURES + TRAIN_ONLY_FEATURES
TEST_FEATURES = COMMON_FEATURES


target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing XGBoost Model - RMSE per forecast day")
print("=" * 60)

# GridSearch parameter grid
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5],
    'learning_rate': [0.05, 0.1],
    'subsample': [0.8, 1],
    'colsample_bytree': [0.8, 1],
}

# Loop over forecast days
for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')

        # Zorg dat run_date ook in UTC is
        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df.index <= run_date_utc]
        test_data = df[df.index > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=TRAIN_FEATURES + [target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Train model
        X_train = train_data[TRAIN_FEATURES]
        y_train = train_data[target]

        # Aanvullen met NaN-kolommen waar nodig
        for col in TRAIN_FEATURES:
            if col not in test_data.columns:
                test_data[col] = np.nan

        X_test = test_data[TRAIN_FEATURES]
        y_test = test_data[target]

        base_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        grid_search = GridSearchCV(
            estimator=base_model,
            param_grid=param_grid,
            scoring='neg_root_mean_squared_error',
            cv=3,
            n_jobs=-1,
            verbose=0
        )
        grid_search.fit(X_train, y_train)
        print(f"Day {i+1}: 🧪 Best params: {grid_search.best_params_}")
        model = grid_search.best_estimator_

        y_pred = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(test_data),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(test_data)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - XGBoost Model")
    print("=" * 80)
    print(f"Successful runs: {len(rmse_df)}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.2f}")
else:
    print("❌ No runs completed successfully")

2025-05-27 11:44:54,535 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:44:54,536 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00
2025-05-27 11:44:54,536 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, target range: 2025-03-15 00:00:00+00:00 → 2025-03-21 23:00:00+00:00
2025-05-27 11:44:54,588 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


🔍 Testing XGBoost Model - RMSE per forecast day


2025-05-27 11:44:54,822 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:44:54,834 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:44:54,834 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:44:54,835 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:44:54,851 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 1: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1}
Day 1: ✅ 167 test rows, Run: 03-15


2025-05-27 11:44:58,841 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:44:58,842 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:44:59,641 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:44:59,643 - build_training_set - INFO - 🧠 Actuals van 2025-01-03 00:00:00+00:00 t/m 2025-03-16 23:00:00+00:00
2025-05-27 11:44:59,644 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-17 00:00:00+00:00, target range: 2025-03-17 00:00:00+00:00 → 2025-03-23 23:00:00+00:00
2025-05-27 11:44:59,666 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:44:59,821 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:44:59,830 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:44:59,830 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load'

Day 2: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 2: ✅ 167 test rows, Run: 03-16


2025-05-27 11:44:59,843 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:44:59,844 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:45:00,766 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:00,767 - build_training_set - INFO - 🧠 Actuals van 2025-01-04 00:00:00+00:00 t/m 2025-03-17 23:00:00+00:00
2025-05-27 11:45:00,767 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-18 00:00:00+00:00, target range: 2025-03-18 00:00:00+00:00 → 2025-03-24 23:00:00+00:00
2025-05-27 11:45:00,784 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:00,936 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:00,945 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:00,945 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load'

Day 3: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 3: ✅ 167 test rows, Run: 03-17


2025-05-27 11:45:01,691 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:01,691 - build_training_set - INFO - 🧠 Actuals van 2025-01-05 00:00:00+00:00 t/m 2025-03-18 23:00:00+00:00
2025-05-27 11:45:01,692 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-19 00:00:00+00:00, target range: 2025-03-19 00:00:00+00:00 → 2025-03-25 23:00:00+00:00
2025-05-27 11:45:01,710 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:01,857 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:01,866 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:01,866 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 4: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Day 4: ✅ 167 test rows, Run: 03-18


2025-05-27 11:45:02,543 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:02,544 - build_training_set - INFO - 🧠 Actuals van 2025-01-06 00:00:00+00:00 t/m 2025-03-19 23:00:00+00:00
2025-05-27 11:45:02,544 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-20 00:00:00+00:00, target range: 2025-03-20 00:00:00+00:00 → 2025-03-26 23:00:00+00:00
2025-05-27 11:45:02,562 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:02,716 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:02,726 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:02,726 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 5: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Day 5: ✅ 167 test rows, Run: 03-19


2025-05-27 11:45:02,742 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:45:03,701 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:03,702 - build_training_set - INFO - 🧠 Actuals van 2025-01-07 00:00:00+00:00 t/m 2025-03-20 23:00:00+00:00
2025-05-27 11:45:03,702 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-21 00:00:00+00:00, target range: 2025-03-21 00:00:00+00:00 → 2025-03-27 23:00:00+00:00
2025-05-27 11:45:03,726 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:03,872 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:03,881 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:03,881 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Fl

Day 6: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Day 6: ✅ 167 test rows, Run: 03-20


2025-05-27 11:45:04,515 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:04,516 - build_training_set - INFO - 🧠 Actuals van 2025-01-08 00:00:00+00:00 t/m 2025-03-21 23:00:00+00:00
2025-05-27 11:45:04,516 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-22 00:00:00+00:00, target range: 2025-03-22 00:00:00+00:00 → 2025-03-28 23:00:00+00:00
2025-05-27 11:45:04,533 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:04,677 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:04,686 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:04,686 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 7: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 7: ✅ 167 test rows, Run: 03-21


2025-05-27 11:45:05,330 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:05,330 - build_training_set - INFO - 🧠 Actuals van 2025-01-09 00:00:00+00:00 t/m 2025-03-22 23:00:00+00:00
2025-05-27 11:45:05,331 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-23 00:00:00+00:00, target range: 2025-03-23 00:00:00+00:00 → 2025-03-29 23:00:00+00:00
2025-05-27 11:45:05,348 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:05,490 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:05,499 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:05,500 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 8: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 8: ✅ 167 test rows, Run: 03-22


2025-05-27 11:45:06,462 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:06,463 - build_training_set - INFO - 🧠 Actuals van 2025-01-10 00:00:00+00:00 t/m 2025-03-23 23:00:00+00:00
2025-05-27 11:45:06,463 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-24 00:00:00+00:00, target range: 2025-03-24 00:00:00+00:00 → 2025-03-30 23:00:00+00:00
2025-05-27 11:45:06,483 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 9: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200, 'subsample': 1}
Day 9: ✅ 167 test rows, Run: 03-23


2025-05-27 11:45:06,745 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:06,755 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:06,755 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:45:06,755 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:45:06,769 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 10: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 10: ✅ 167 test rows, Run: 03-24


2025-05-27 11:45:08,196 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:08,196 - build_training_set - INFO - 🧠 Actuals van 2025-01-12 00:00:00+00:00 t/m 2025-03-25 23:00:00+00:00
2025-05-27 11:45:08,196 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-26 00:00:00+00:00, target range: 2025-03-26 00:00:00+00:00 → 2025-04-01 23:00:00+00:00
2025-05-27 11:45:08,217 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:08,371 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:08,381 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:08,381 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 11: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 11: ✅ 167 test rows, Run: 03-25


2025-05-27 11:45:09,128 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:09,129 - build_training_set - INFO - 🧠 Actuals van 2025-01-13 00:00:00+00:00 t/m 2025-03-26 23:00:00+00:00
2025-05-27 11:45:09,129 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-27 00:00:00+00:00, target range: 2025-03-27 00:00:00+00:00 → 2025-04-02 23:00:00+00:00
2025-05-27 11:45:09,149 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:09,311 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:09,321 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:09,321 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 12: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 12: ✅ 167 test rows, Run: 03-26


2025-05-27 11:45:09,336 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:45:09,337 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:45:10,218 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:10,219 - build_training_set - INFO - 🧠 Actuals van 2025-01-14 00:00:00+00:00 t/m 2025-03-27 23:00:00+00:00
2025-05-27 11:45:10,219 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-28 00:00:00+00:00, target range: 2025-03-28 00:00:00+00:00 → 2025-04-03 23:00:00+00:00
2025-05-27 11:45:10,241 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:10,411 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows


Day 13: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 100, 'subsample': 1}
Day 13: ✅ 167 test rows, Run: 03-27


  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:10,421 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:10,421 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:45:10,421 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:45:10,437 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:45:10,438 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:45:11,083 - bui

Day 14: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 14: ✅ 167 test rows, Run: 03-28


2025-05-27 11:45:11,957 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:11,957 - build_training_set - INFO - 🧠 Actuals van 2025-01-16 00:00:00+00:00 t/m 2025-03-29 23:00:00+00:00
2025-05-27 11:45:11,958 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-30 00:00:00+00:00, target range: 2025-03-30 00:00:00+00:00 → 2025-04-05 23:00:00+00:00
2025-05-27 11:45:11,975 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:12,121 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:12,129 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:12,130 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 15: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 100, 'subsample': 0.8}
Day 15: ✅ 167 test rows, Run: 03-29


2025-05-27 11:45:12,772 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:12,773 - build_training_set - INFO - 🧠 Actuals van 2025-01-17 00:00:00+00:00 t/m 2025-03-30 23:00:00+00:00
2025-05-27 11:45:12,773 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-31 00:00:00+00:00, target range: 2025-03-31 00:00:00+00:00 → 2025-04-06 23:00:00+00:00
2025-05-27 11:45:12,789 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 16: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1}
Day 16: ✅ 167 test rows, Run: 03-30


2025-05-27 11:45:13,009 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:13,018 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:13,018 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:45:13,019 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:45:13,035 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 17: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 17: ✅ 167 test rows, Run: 03-31


2025-05-27 11:45:14,680 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:14,680 - build_training_set - INFO - 🧠 Actuals van 2025-01-19 00:00:00+00:00 t/m 2025-04-01 23:00:00+00:00
2025-05-27 11:45:14,680 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-02 00:00:00+00:00, target range: 2025-04-02 00:00:00+00:00 → 2025-04-08 23:00:00+00:00
2025-05-27 11:45:14,697 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:14,842 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:14,851 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:14,851 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 18: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 18: ✅ 167 test rows, Run: 04-01


2025-05-27 11:45:15,493 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:15,493 - build_training_set - INFO - 🧠 Actuals van 2025-01-20 00:00:00+00:00 t/m 2025-04-02 23:00:00+00:00
2025-05-27 11:45:15,493 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-03 00:00:00+00:00, target range: 2025-04-03 00:00:00+00:00 → 2025-04-09 23:00:00+00:00
2025-05-27 11:45:15,511 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:15,657 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:15,665 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:15,666 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 19: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 19: ✅ 167 test rows, Run: 04-02


2025-05-27 11:45:16,267 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:16,267 - build_training_set - INFO - 🧠 Actuals van 2025-01-21 00:00:00+00:00 t/m 2025-04-03 23:00:00+00:00
2025-05-27 11:45:16,268 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-04 00:00:00+00:00, target range: 2025-04-04 00:00:00+00:00 → 2025-04-10 23:00:00+00:00
2025-05-27 11:45:16,284 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 20: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 20: ✅ 167 test rows, Run: 04-03


2025-05-27 11:45:16,503 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:16,512 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:16,512 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:45:16,513 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:45:16,525 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 21: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1}
Day 21: ✅ 167 test rows, Run: 04-04


2025-05-27 11:45:17,960 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:17,961 - build_training_set - INFO - 🧠 Actuals van 2025-01-23 00:00:00+00:00 t/m 2025-04-05 23:00:00+00:00
2025-05-27 11:45:17,961 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-06 00:00:00+00:00, target range: 2025-04-06 00:00:00+00:00 → 2025-04-12 23:00:00+00:00
2025-05-27 11:45:17,978 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:18,117 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:18,126 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:18,127 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 22: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 22: ✅ 167 test rows, Run: 04-05


2025-05-27 11:45:18,767 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:18,767 - build_training_set - INFO - 🧠 Actuals van 2025-01-24 00:00:00+00:00 t/m 2025-04-06 23:00:00+00:00
2025-05-27 11:45:18,767 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-07 00:00:00+00:00, target range: 2025-04-07 00:00:00+00:00 → 2025-04-13 23:00:00+00:00
2025-05-27 11:45:18,785 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:18,928 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:18,937 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:18,937 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 23: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 23: ✅ 167 test rows, Run: 04-06


2025-05-27 11:45:19,582 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:19,583 - build_training_set - INFO - 🧠 Actuals van 2025-01-25 00:00:00+00:00 t/m 2025-04-07 23:00:00+00:00
2025-05-27 11:45:19,583 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-08 00:00:00+00:00, target range: 2025-04-08 00:00:00+00:00 → 2025-04-14 23:00:00+00:00
2025-05-27 11:45:19,600 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


Day 24: 🧪 Best params: {'colsample_bytree': 1, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 24: ✅ 167 test rows, Run: 04-07


2025-05-27 11:45:19,829 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:19,839 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:19,840 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:45:19,840 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:45:19,854 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 25: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 25: ✅ 167 test rows, Run: 04-08


2025-05-27 11:45:21,429 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:21,429 - build_training_set - INFO - 🧠 Actuals van 2025-01-27 00:00:00+00:00 t/m 2025-04-09 23:00:00+00:00
2025-05-27 11:45:21,429 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-10 00:00:00+00:00, target range: 2025-04-10 00:00:00+00:00 → 2025-04-16 23:00:00+00:00
2025-05-27 11:45:21,445 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:21,594 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:21,603 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:21,603 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 26: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 26: ✅ 167 test rows, Run: 04-09


2025-05-27 11:45:22,298 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:22,298 - build_training_set - INFO - 🧠 Actuals van 2025-01-28 00:00:00+00:00 t/m 2025-04-10 23:00:00+00:00
2025-05-27 11:45:22,298 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-11 00:00:00+00:00, target range: 2025-04-11 00:00:00+00:00 → 2025-04-17 23:00:00+00:00
2025-05-27 11:45:22,316 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:22,477 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:22,486 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:22,486 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 27: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 27: ✅ 167 test rows, Run: 04-10


2025-05-27 11:45:22,498 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-27 11:45:22,499 - build_training_set - INFO - 🔒 Verbinding gesloten
2025-05-27 11:45:23,240 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:23,241 - build_training_set - INFO - 🧠 Actuals van 2025-01-29 00:00:00+00:00 t/m 2025-04-11 23:00:00+00:00
2025-05-27 11:45:23,241 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-12 00:00:00+00:00, target range: 2025-04-12 00:00:00+00:00 → 2025-04-18 23:00:00+00:00
2025-05-27 11:45:23,257 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:23,404 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:23,414 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:23,414 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load'

Day 28: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 28: ✅ 167 test rows, Run: 04-11


2025-05-27 11:45:24,108 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:45:24,108 - build_training_set - INFO - 🧠 Actuals van 2025-01-30 00:00:00+00:00 t/m 2025-04-12 23:00:00+00:00
2025-05-27 11:45:24,108 - build_training_set - INFO - 📅 Forecast van run_date 2025-04-13 00:00:00+00:00, target range: 2025-04-13 00:00:00+00:00 → 2025-04-19 23:00:00+00:00
2025-05-27 11:45:24,125 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen
2025-05-27 11:45:24,280 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:45:24,289 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:45:24,289 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_

Day 29: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 29: ✅ 167 test rows, Run: 04-12
Day 30: 🧪 Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Day 30: ✅ 167 test rows, Run: 04-13

📊 OVERALL RMSE - XGBoost Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                167  0.09
         2 2025-03-16                167  0.10
         3 2025-03-17                167  0.10
         4 2025-03-18                167  0.09
         5 2025-03-19                167  0.09
         6 2025-03-20                167  0.08
         7 2025-03-21                167  0.10
         8 2025-03-22                167  0.09
         9 2025-03-23                167  0.07
        10 2025-03-24                167  0.10
        11 2025-03-25                167  0.10
        12 2025-03-26                167  0.09
        

In [24]:
# andere setting hyperparameters in GridSearchCV


import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
# Dynamic path setup
import sys
from pathlib import Path
from sklearn.model_selection import GridSearchCV

# Find project root dynamically
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir

# Add utils to path
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define feature columns and target
COMMON_FEATURES = ['is_dst', 'hour_cos', 'hour_sin','month','is_non_working_day','shortwave_radiation', 'temperature_2m'
    ]
# list of all available features 'cloud_cover', 'direct_normal_irradiance', 'diffuse_radiation','hour_cos', 'hour_sin', 'is_dst', 'is_non_working_day','is_weekend', 'month', 'shortwave_radiation','temperature_2m', 'yearday_cos', 'yearday_sin'
TRAIN_ONLY_FEATURES = ['Load','Flow_NO']  # <- Only used if known ex post

TRAIN_FEATURES = COMMON_FEATURES + TRAIN_ONLY_FEATURES
TEST_FEATURES = COMMON_FEATURES


target = 'Price'

# Initial training window
base_start = "2025-01-01 00:00:00"
base_end = "2025-03-14 23:00:00"
base_run = "2025-03-15 00:00:00"

rmse_results = []

print("🔍 Testing XGBoost Model - RMSE per forecast day")
print("=" * 60)

# GridSearch parameter grid
param_grid = {
    'n_estimators': [100, 300, 500],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.6, 0.8, 1],
    'colsample_bytree': [0.6, 0.8, 1],
    'gamma': [0, 0.1, 0.3],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [1, 1.5, 2],
}

# Loop over forecast days
for i in range(30):
    start = pd.Timestamp(base_start) + pd.Timedelta(days=i)
    end = pd.Timestamp(base_end) + pd.Timedelta(days=i)
    run_date = pd.Timestamp(base_run) + pd.Timedelta(days=i)

    try:
        df = build_training_set(
            train_start=start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=run_date.strftime("%Y-%m-%d %H:%M:%S")
        )

        if df is None or df.empty:
            print(f"Day {i+1}: ❌ No training data returned")
            continue

        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')

        # Zorg dat run_date ook in UTC is
        run_date_utc = run_date.tz_localize("UTC")

        # Split into training and testing sets
        train_data = df[df.index <= run_date_utc]
        test_data = df[df.index > run_date_utc]

        # Drop any missing data in training
        train_data = train_data.dropna(subset=TRAIN_FEATURES + [target])

        if test_data.empty or train_data.empty:
            print(f"Day {i+1}: ❌ Not enough data for training or testing")
            continue

        # Train model
        X_train = train_data[TRAIN_FEATURES]
        y_train = train_data[target]

        # Aanvullen met NaN-kolommen waar nodig
        for col in TRAIN_FEATURES:
            if col not in test_data.columns:
                test_data[col] = np.nan

        X_test = test_data[TRAIN_FEATURES]
        y_test = test_data[target]

        base_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        grid_search = GridSearchCV(
            estimator=base_model,
            param_grid=param_grid,
            scoring='neg_root_mean_squared_error',
            cv=3,
            n_jobs=-1,
            verbose=0
        )
        grid_search.fit(X_train, y_train)
        print(f"Day {i+1}: 🧪 Best params: {grid_search.best_params_}")
        model = grid_search.best_estimator_

        y_pred = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        rmse_results.append({
            'iteration': i + 1,
            'run_date': run_date.strftime('%Y-%m-%d'),
            'valid_predictions': len(test_data),
            'rmse': rmse
        })

        print(f"Day {i+1}: ✅ {len(test_data)} test rows, Run: {run_date.strftime('%m-%d')}")

    except Exception as e:
        print(f"Day {i+1}: ❌ Error: {e}")

# Create results dataframe
if rmse_results:
    rmse_df = pd.DataFrame(rmse_results)

    print(f"\n📊 OVERALL RMSE - XGBoost Model")
    print("=" * 80)
    print(f"Successful runs: {len(rmse_df)}/30")

    print(rmse_df[['iteration', 'run_date', 'valid_predictions', 'rmse']].round(2).to_string(index=False))

    print(f"\n📈 SUMMARY STATISTICS")
    print("-" * 40)
    print(rmse_df['rmse'].describe().round(2))

    print(f"\n📊 AVERAGE OVERALL RMSE")
    print("-" * 40)
    print(f"Mean RMSE: {rmse_df['rmse'].mean():.2f}")
else:
    print("❌ No runs completed successfully")

2025-05-27 11:49:28,630 - build_training_set - INFO - 🚀 Start build van trainingset
2025-05-27 11:49:28,632 - build_training_set - INFO - 🧠 Actuals van 2025-01-01 00:00:00+00:00 t/m 2025-03-14 23:00:00+00:00
2025-05-27 11:49:28,635 - build_training_set - INFO - 📅 Forecast van run_date 2025-03-15 00:00:00+00:00, target range: 2025-03-15 00:00:00+00:00 → 2025-03-21 23:00:00+00:00
2025-05-27 11:49:28,729 - build_training_set - INFO - ✅ Actuals geladen: 1752 rijen


🔍 Testing XGBoost Model - RMSE per forecast day


2025-05-27 11:49:28,893 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:49:28,904 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:49:28,904 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:49:28,905 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:49:28,921 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 1: 🧪 Best params: {'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 1: ✅ 167 test rows, Run: 03-15


2025-05-27 11:51:37,835 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:51:37,845 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:51:37,846 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:51:37,846 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:51:37,862 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 2: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 2: ✅ 167 test rows, Run: 03-16


2025-05-27 11:54:19,095 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:54:19,106 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:54:19,106 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:54:19,107 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:54:19,126 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 3: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 0.8}
Day 3: ✅ 167 test rows, Run: 03-17


2025-05-27 11:57:11,253 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:57:11,263 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:57:11,263 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:57:11,264 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:57:11,278 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 4: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 0.6}
Day 4: ✅ 167 test rows, Run: 03-18


2025-05-27 11:59:59,454 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 11:59:59,467 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 11:59:59,467 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 11:59:59,468 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 11:59:59,487 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 5: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 5: ✅ 167 test rows, Run: 03-19


2025-05-27 12:02:53,277 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:02:53,289 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:02:53,289 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:02:53,289 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:02:53,308 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 6: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 0.6}
Day 6: ✅ 167 test rows, Run: 03-20


2025-05-27 12:06:14,529 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:06:14,544 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:06:14,545 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:06:14,545 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:06:14,571 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 7: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 0.6}
Day 7: ✅ 167 test rows, Run: 03-21


2025-05-27 12:10:16,241 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:10:16,257 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:10:16,257 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:10:16,259 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:10:16,277 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 8: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.8}
Day 8: ✅ 167 test rows, Run: 03-22


2025-05-27 12:13:14,496 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:13:14,507 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:13:14,508 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:13:14,508 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:13:14,524 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 9: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 2, 'subsample': 0.6}
Day 9: ✅ 167 test rows, Run: 03-23


2025-05-27 12:16:29,234 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:16:29,245 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:16:29,245 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:16:29,245 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:16:29,260 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 10: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 2, 'subsample': 0.6}
Day 10: ✅ 167 test rows, Run: 03-24


2025-05-27 12:18:58,245 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:18:58,256 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:18:58,256 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:18:58,256 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:18:58,273 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 11: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.8}
Day 11: ✅ 167 test rows, Run: 03-25


2025-05-27 12:21:27,935 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:21:27,945 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:21:27,945 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:21:27,946 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:21:27,963 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 12: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 12: ✅ 167 test rows, Run: 03-26


2025-05-27 12:23:57,258 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:23:57,269 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:23:57,269 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:23:57,270 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:23:57,294 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 13: 🧪 Best params: {'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 1}
Day 13: ✅ 167 test rows, Run: 03-27


2025-05-27 12:26:32,154 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:26:32,164 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:26:32,164 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:26:32,164 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:26:32,182 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 14: 🧪 Best params: {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 14: ✅ 167 test rows, Run: 03-28


2025-05-27 12:29:06,372 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:29:06,382 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:29:06,382 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:29:06,382 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:29:06,400 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 15: 🧪 Best params: {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 15: ✅ 167 test rows, Run: 03-29


2025-05-27 12:31:42,375 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:31:42,386 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:31:42,386 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:31:42,386 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:31:42,402 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 16: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 16: ✅ 167 test rows, Run: 03-30


2025-05-27 12:34:15,724 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:34:15,734 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:34:15,734 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:34:15,735 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:34:15,751 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 17: 🧪 Best params: {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 17: ✅ 167 test rows, Run: 03-31


2025-05-27 12:36:46,623 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:36:46,633 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:36:46,633 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:36:46,633 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:36:46,650 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 18: 🧪 Best params: {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 18: ✅ 167 test rows, Run: 04-01


2025-05-27 12:39:16,987 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:39:16,996 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:39:16,996 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:39:16,996 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:39:17,013 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 19: 🧪 Best params: {'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 0.8}
Day 19: ✅ 167 test rows, Run: 04-02


2025-05-27 12:41:57,729 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:41:57,740 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:41:57,741 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:41:57,741 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:41:57,758 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 20: 🧪 Best params: {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 20: ✅ 167 test rows, Run: 04-03


2025-05-27 12:44:32,638 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:44:32,648 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:44:32,648 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:44:32,649 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:44:32,666 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 21: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 2, 'subsample': 1}
Day 21: ✅ 167 test rows, Run: 04-04


2025-05-27 12:47:07,594 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:47:07,604 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:47:07,604 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:47:07,604 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:47:07,621 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 22: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 22: ✅ 167 test rows, Run: 04-05


2025-05-27 12:49:40,652 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:49:40,661 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:49:40,662 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:49:40,662 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:49:40,680 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 23: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 23: ✅ 167 test rows, Run: 04-06


2025-05-27 12:52:18,791 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:52:18,800 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:52:18,801 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:52:18,801 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:52:18,817 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 24: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 24: ✅ 167 test rows, Run: 04-07


2025-05-27 12:55:01,978 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:55:01,990 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:55:01,990 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:55:01,990 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:55:02,007 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 25: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 25: ✅ 167 test rows, Run: 04-08


2025-05-27 12:57:48,269 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 12:57:48,279 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 12:57:48,279 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 12:57:48,279 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 12:57:48,298 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 26: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 26: ✅ 167 test rows, Run: 04-09


2025-05-27 13:00:37,463 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 13:00:37,475 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 13:00:37,475 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 13:00:37,476 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 13:00:37,494 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 27: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.6}
Day 27: ✅ 167 test rows, Run: 04-10


2025-05-27 13:03:21,036 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 13:03:21,046 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 13:03:21,046 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 13:03:21,046 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 13:03:21,063 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 28: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'reg_alpha': 0, 'reg_lambda': 1.5, 'subsample': 0.6}
Day 28: ✅ 167 test rows, Run: 04-11


2025-05-27 13:06:05,382 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 13:06:05,393 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 13:06:05,393 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 13:06:05,393 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 13:06:05,413 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 29: 🧪 Best params: {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 1, 'subsample': 0.8}
Day 29: ✅ 167 test rows, Run: 04-12


2025-05-27 13:08:52,078 - build_training_set - INFO - ✅ Added actual prices to 168 forecast rows
  df_combined = pd.concat([df_actuals, df_preds], ignore_index=True)
2025-05-27 13:08:52,088 - build_training_set - INFO - 📦 Eindtabel bevat: 1920 rijen, 31 kolommen
2025-05-27 13:08:52,089 - build_training_set - INFO - 🧾 Kolommen: ['Price', 'target_datetime', 'Load', 'shortwave_radiation', 'temperature_2m', 'direct_normal_irradiance', 'diffuse_radiation', 'Flow_NO', 'yearday_cos', 'Flow_GB', 'month', 'is_dst', 'yearday_sin', 'is_non_working_day', 'hour_cos', 'is_weekend', 'cloud_cover', 'weekday_sin', 'hour_sin', 'weekday_cos', 'apparent_temperature', 'day_of_week', 'day_of_year', 'direct_radiation', 'hour', 'is_holiday', 'local_datetime', 'run_date', 'snowfall', 'wind_direction_10m', 'wind_speed_10m']
2025-05-27 13:08:52,089 - build_training_set - INFO - ❓ Price NaN count: 0/1920 (0.0%)
2025-05-27 13:08:52,106 - build_training_set - INFO - ✅ Opgeslagen als training_set in WARP.db
2025-05-

Day 30: 🧪 Best params: {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 0.8}
Day 30: ✅ 167 test rows, Run: 04-13

📊 OVERALL RMSE - XGBoost Model
Successful runs: 30/30
 iteration   run_date  valid_predictions  rmse
         1 2025-03-15                167  0.08
         2 2025-03-16                167  0.10
         3 2025-03-17                167  0.10
         4 2025-03-18                167  0.10
         5 2025-03-19                167  0.10
         6 2025-03-20                167  0.09
         7 2025-03-21                167  0.10
         8 2025-03-22                167  0.09
         9 2025-03-23                167  0.06
        10 2025-03-24                167  0.08
        11 2025-03-25                167  0.10
        12 2025-03-26                167  0.10
        13 2025-03-27                167  0.11
        14 2025-03-28                167  0.11
        15 2025-03-29           

In [None]:
# Get feature importances from the trained XGBoost model
importances = model.feature_importances_
feature_names = model.feature_names_in_

# Create a DataFrame for better visualization
feat_imp_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

# Display the feature importances
print(feat_imp_df)

# Optional: Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feat_imp_df, palette='viridis')
plt.title('Feature Importance (XGBoost)')
plt.tight_layout()
plt.show()

In [None]:
# Get feature importances from the last trained model
importances = model.feature_importances_
feature_names = model.feature_names_in_

# Create a DataFrame for better visualization
feat_imp_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

# Display the feature importances
print(feat_imp_df)

# Optional: Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feat_imp_df, palette='viridis')
plt.title('Feature Importance (XGBoost)')
plt.tight_layout()
plt.show()