In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sklearn.preprocessing
from sklearn.metrics import r2_score
from pandas import read_csv
from datetime import datetime
from keras.layers import Dense,Dropout,SimpleRNN,LSTM
from keras.models import Sequential

In [9]:
df = pd.read_csv('./hourly_predictions/hourly_charging_data.csv')
df.head()

Unnamed: 0,Hour,Chargers
0,2022-09-28 12:00:00,2.409
1,2022-09-28 13:00:00,6.0426
2,2022-09-28 14:00:00,6.0271
3,2022-09-28 15:00:00,6.0016
4,2022-09-28 16:00:00,7.1028


In [10]:
# 1. Convert Hour to DateTime type
df['Hour'] = pd.to_datetime(df['Hour'])

# 2. Set Hour as the DataFrame index
df = df.set_index('Hour')

# 3. Reindex to every hour in the range from the min to max timestamps
all_hours = pd.date_range(start=df.index.min(), end=df.index.max(), freq='H')
df_reindexed = df.reindex(all_hours)

# 4. Identify which rows are missing
missing_rows = df_reindexed[df_reindexed['Chargers'].isnull()]
print("Missing rows:")
print(missing_rows)

Missing rows:
                     Chargers
2022-10-05 13:00:00       NaN
2022-10-05 14:00:00       NaN
2022-10-05 15:00:00       NaN
2022-10-05 16:00:00       NaN
2022-10-05 17:00:00       NaN
...                       ...
2023-09-06 14:00:00       NaN
2023-09-06 15:00:00       NaN
2023-09-06 16:00:00       NaN
2024-03-31 02:00:00       NaN
2024-05-22 17:00:00       NaN

[1042 rows x 1 columns]


In [11]:
# Create a string key for month-day-hour, e.g. '09-06-03'
df_reindexed['month_day_hour'] = df_reindexed.index.strftime('%m-%d-%H')

# Compute the mean for each specific month/day/hour across all years
mdh_mean = df_reindexed.groupby('month_day_hour')['Chargers'].transform('mean')

# Fill missing values with that mean
df['Chargers'] = df_reindexed['Chargers'].fillna(mdh_mean)

# Clean up the extra grouping column
df_reindexed.drop(columns=['month_day_hour'], inplace=True)

df.isna().sum()
df

Unnamed: 0_level_0,Chargers
Hour,Unnamed: 1_level_1
2022-09-28 12:00:00,2.4090
2022-09-28 13:00:00,6.0426
2022-09-28 14:00:00,6.0271
2022-09-28 15:00:00,6.0016
2022-09-28 16:00:00,7.1028
...,...
2025-02-19 08:00:00,45.9840
2025-02-19 09:00:00,106.6935
2025-02-19 10:00:00,101.6063
2025-02-19 11:00:00,84.9264


In [None]:
"""
Multivariate, 24-step LSTM – trains with a 168-hour look-back,
saves both the network and the fitted MinMaxScaler.
"""

import pathlib, math, holidays, joblib    # ← NEW: joblib
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping

# ───────────────────────── CONFIG ──────────────────────────
TARGET_COL   = "Chargers"
LOOK_BACK    = 72          # 7 days window (168 × 1 h)
N_FORECAST   = 24             # predict 24 h ahead
EPOCHS       = 50
BATCH_SIZE   = 256
PATIENCE     = 8
MODEL_PATH   = pathlib.Path("./models/chargers/lstm_chargers.keras")
SCALER_PATH  = pathlib.Path("./models/chargers/charger_scaler.joblib")  # ← NEW
START_DATE   = "2022-09-11"
END_DATE     = "2025-02-19"

# ───────────────────────── DATA LOAD ───────────────────────
df = df.loc[START_DATE:END_DATE].copy()      # replace with your loader

# ─────────────────────── FEATURE ENGINEERING ───────────────
be_holidays = set(holidays.country_holidays(
                  "BE", years=[2022, 2023, 2024, 2025]).keys())

def add_terugkomdag_feature(df):
    # List of 'terugkomdagen' dates
    terugkomdagen = [
        datetime(2023, 9, 13), datetime(2023, 10, 26), datetime(2023, 11, 14), datetime(2023, 12, 20),
        datetime(2024, 1, 12), datetime(2024, 2, 7), datetime(2024, 3, 14), datetime(2024, 4, 16),
        datetime(2024, 5, 13), datetime(2024, 6, 7), datetime(2024, 3, 16), datetime(2024, 10, 22),
        datetime(2024, 11, 28), datetime(2024, 12, 18), datetime(2025, 1, 10), datetime(2025, 2, 13),
        datetime(2025, 3, 18), datetime(2025, 4, 22), datetime(2025, 5, 12), datetime(2025, 6, 6)
    ]
    df['is_terugkomdag'] = df.index.to_series().dt.date.isin([d.date() for d in terugkomdagen]).astype(int)

    return df
def add_cumulative_ev_phev_feature(df):
    from datetime import datetime

    # List of (date, cumulative_count) from your analysis
    cumulative_data = {
        datetime(2024, 6, 20): 35,
        datetime(2024, 6, 25): 36,
        datetime(2024, 9, 5): 38,
        datetime(2024, 9, 12): 41,
        datetime(2024, 9, 27): 42,
        datetime(2024, 10, 15): 43,
        datetime(2024, 10, 29): 45,
        datetime(2024, 11, 5): 46,
        datetime(2024, 11, 26): 47,
        datetime(2025, 1, 9): 48,
        datetime(2025, 1, 23): 49,
        datetime(2025, 1, 28): 50,
        datetime(2025, 2, 4): 51,
    }

    # Turn it into a Series and reindex to all dates in your dataset
    ev_series = pd.Series(cumulative_data)
    ev_series = ev_series.reindex(df.index.union(ev_series.index)).sort_index().ffill().fillna(0)

    # Add it to your DataFrame
    df["cumulative_ev_phev_count"] = ev_series.reindex(df.index).astype(int)

    return df

add_terugkomdag_feature(df)
add_cumulative_ev_phev_feature(df)

df["hour"]           = df.index.hour
df["day_of_week"]    = df.index.dayofweek
df["month"]          = df.index.month
df["is_weekend"]     = (df["day_of_week"] >= 5).astype(int)
df["is_festive"]     = df.index.to_series().apply(
                         lambda d: int(d.date() in be_holidays))

df["working_hour"]   = df["hour"].between(8, 18).astype(int)
df["is_summer"]      = df["month"].isin([6, 7, 8]).astype(int)
df["is_winter"]      = df["month"].isin([12, 1, 2]).astype(int)
df["is_morning_peak"] = df["hour"].between(7, 9).astype(int)
df["is_evening_peak"] = df["hour"].between(17, 20).astype(int)
df["hour_sin"]       = np.sin(2*np.pi*df["hour"]/24)
df["hour_cos"]       = np.cos(2*np.pi*df["hour"]/24)
df["dow_sin"]        = np.sin(2*np.pi*df["day_of_week"]/7)
df["dow_cos"]        = np.cos(2*np.pi*df["day_of_week"]/7)

FEATURE_COLS = [c for c in df.columns if c != TARGET_COL]

# ─────────────── 1️⃣  CHRONOLOGICAL SPLIT  ────────────────
train_size = int(len(df)*0.8)
val_size   = int(len(df)*0.1)

df_train = df.iloc[:train_size]
df_val   = df.iloc[train_size:train_size+val_size]
df_test  = df.iloc[train_size+val_size:]

# ─────────────── 2️⃣  FIT & SAVE SCALER  ──────────────────
scaler = MinMaxScaler()                                     # DOCS :contentReference[oaicite:2]{index=2}
scaler.fit(df_train[[TARGET_COL] + FEATURE_COLS])           # fit **train only** :contentReference[oaicite:3]{index=3}
joblib.dump(scaler, SCALER_PATH)                            # persist scaler :contentReference[oaicite:4]{index=4}
print("Scaler saved →", SCALER_PATH)

def scale(frame):        # helper to apply the saved scaler
    cols = [TARGET_COL] + FEATURE_COLS
    return pd.DataFrame(scaler.transform(frame[cols]), columns=cols,
                        index=frame.index)

df_train_s, df_val_s, df_test_s = map(scale, (df_train, df_val, df_test))

# ─────────────── 3️⃣  BUILD INPUT / LABEL WINDOWS ─────────
def make_xy(frame, look_back, horizon):
    data = frame[[TARGET_COL]+FEATURE_COLS].values
    X, y = [], []
    for i in range(look_back, len(data)-horizon+1):
        X.append(data[i-look_back:i])
        y.append(data[i:i+horizon, 0])
    return np.array(X), np.array(y)

X_train, y_train = make_xy(df_train_s, LOOK_BACK, N_FORECAST)
X_val,   y_val   = make_xy(df_val_s,   LOOK_BACK, N_FORECAST)
X_test,  y_test  = make_xy(df_test_s,  LOOK_BACK, N_FORECAST)

# ─────────────── 4️⃣  DEFINE & TRAIN MODEL ────────────────
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(LOOK_BACK, X_train.shape[2])),
    Dropout(0.2),
    LSTM(64, return_sequences=True),
    Dropout(0.2),
    LSTM(64),
    Dropout(0.2),
    Dense(N_FORECAST)
])
model.compile(optimizer="adam", loss="mse")
cb = EarlyStopping(patience=PATIENCE, restore_best_weights=True)  # :contentReference[oaicite:5]{index=5}

model.fit(X_train, y_train,
          epochs=EPOCHS, batch_size=BATCH_SIZE,
          validation_data=(X_val, y_val), callbacks=[cb], verbose=2)

# ─────────────── 5️⃣  EVALUATE  ───────────────────────────
y_pred = model.predict(X_test)

rmse = math.sqrt(mean_squared_error(y_test, y_pred))
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)
print(f"TEST 24-step →  RMSE={rmse:.3f}  MAE={mae:.3f}  R²={r2:.3f}")

# ─────────────── 6️⃣  SAVE MODEL (.keras format) ──────────
MODEL_PATH.parent.mkdir(parents=True, exist_ok=True)
model.save(MODEL_PATH)                                       # Keras v3 format :contentReference[oaicite:6]{index=6}
print("Model saved →", MODEL_PATH)


Scaler saved → models\chargers\charger_scaler.joblib
Epoch 1/50


  super().__init__(**kwargs)


62/62 - 15s - 246ms/step - loss: 0.0145 - val_loss: 0.0453
Epoch 2/50
62/62 - 12s - 201ms/step - loss: 0.0100 - val_loss: 0.0322
Epoch 3/50
62/62 - 18s - 292ms/step - loss: 0.0084 - val_loss: 0.0281
Epoch 4/50
62/62 - 20s - 319ms/step - loss: 0.0073 - val_loss: 0.0232
Epoch 5/50
62/62 - 20s - 316ms/step - loss: 0.0067 - val_loss: 0.0244
Epoch 6/50
62/62 - 20s - 324ms/step - loss: 0.0062 - val_loss: 0.0240
Epoch 7/50
62/62 - 20s - 320ms/step - loss: 0.0058 - val_loss: 0.0241
Epoch 8/50
62/62 - 20s - 319ms/step - loss: 0.0054 - val_loss: 0.0254
Epoch 9/50
62/62 - 17s - 278ms/step - loss: 0.0051 - val_loss: 0.0252
Epoch 10/50
62/62 - 23s - 379ms/step - loss: 0.0048 - val_loss: 0.0234
Epoch 11/50
62/62 - 20s - 328ms/step - loss: 0.0046 - val_loss: 0.0266
Epoch 12/50
62/62 - 20s - 330ms/step - loss: 0.0044 - val_loss: 0.0247
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step
TEST 24-step →  RMSE=0.181  MAE=0.109  R²=0.591
Model saved → models\chargers\lstm_chargers.ke

In [13]:
# ───────────────────────── IMPORTS ─────────────────────────
import pathlib, math, holidays, joblib, warnings
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt               # (kept because you may plot later)

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

warnings.filterwarnings("ignore", category=FutureWarning)   # optional

# ───────────────────────── CONFIG ──────────────────────────
TARGET_COL   = "Chargers"
LOOK_BACK    = 168         # 3 days of hourly lags
N_FORECAST   = 24          # predict 24 hours ahead
EPOCHS       = 50
BATCH_SIZE   = 256
PATIENCE     = 8

MODEL_PATH   = pathlib.Path("./models/chargers/ann_chargers.keras")
SCALER_PATH  = pathlib.Path("./models/chargers/charger_scaler.joblib")

START_DATE   = "2022-09-11"
END_DATE     = "2025-02-19"

# ───────────────────────── DATA LOAD ───────────────────────
# ▸ Replace this with your real loader (CSV, SQL, etc.)
#   The dataframe *must* have a DateTimeIndex at hourly frequency
#   and at least one column called "Chargers".
#

# ─────────────────────── FEATURE ENGINEERING ───────────────
be_holidays = set(
    holidays.country_holidays("BE", years=[2022, 2023, 2024, 2025]).keys()
)

def add_terugkomdag_feature(df):
    terugkomdagen = [
        datetime(2023,  9, 13), datetime(2023, 10, 26), datetime(2023, 11, 14),
        datetime(2023, 12, 20), datetime(2024,  1, 12), datetime(2024,  2,  7),
        datetime(2024,  3, 14), datetime(2024,  4, 16), datetime(2024,  5, 13),
        datetime(2024,  6,  7), datetime(2024,  3, 16), datetime(2024, 10, 22),
        datetime(2024, 11, 28), datetime(2024, 12, 18), datetime(2025,  1, 10),
        datetime(2025,  2, 13), datetime(2025,  3, 18), datetime(2025,  4, 22),
        datetime(2025,  5, 12), datetime(2025,  6,  6),
    ]
    df["is_terugkomdag"] = (
        df.index.to_series().dt.date.isin([d.date() for d in terugkomdagen]).astype(int)
    )
    return df

def add_cumulative_ev_phev_feature(df):
    cumulative_data = {
        datetime(2024,  6, 20): 35, datetime(2024,  6, 25): 36,
        datetime(2024,  9,  5): 38, datetime(2024,  9, 12): 41,
        datetime(2024,  9, 27): 42, datetime(2024, 10, 15): 43,
        datetime(2024, 10, 29): 45, datetime(2024, 11,  5): 46,
        datetime(2024, 11, 26): 47, datetime(2025,  1,  9): 48,
        datetime(2025,  1, 23): 49, datetime(2025,  1, 28): 50,
        datetime(2025,  2,  4): 51,
    }
    ev_series = (
        pd.Series(cumulative_data)
          .reindex(df.index.union(cumulative_data.keys()))
          .sort_index()
          .ffill()
          .fillna(0)
    )
    df["cumulative_ev_phev_count"] = ev_series.reindex(df.index).astype(int)
    return df

add_terugkomdag_feature(df)
add_cumulative_ev_phev_feature(df)

df["hour"]            = df.index.hour
df["day_of_week"]     = df.index.dayofweek
df["month"]           = df.index.month
df["is_weekend"]      = (df["day_of_week"] >= 5).astype(int)
df["is_festive"]      = df.index.to_series().apply(lambda d: int(d.date() in be_holidays))
df["working_hour"]    = df["hour"].between(8, 18).astype(int)
df["is_summer"]       = df["month"].isin([6, 7, 8]).astype(int)
df["is_winter"]       = df["month"].isin([12, 1, 2]).astype(int)
df["is_morning_peak"] = df["hour"].between(7,  9).astype(int)
df["is_evening_peak"] = df["hour"].between(17, 20).astype(int)
df["hour_sin"]        = np.sin(2*np.pi*df["hour"]/24)
df["hour_cos"]        = np.cos(2*np.pi*df["hour"]/24)
df["dow_sin"]         = np.sin(2*np.pi*df["day_of_week"]/7)
df["dow_cos"]         = np.cos(2*np.pi*df["day_of_week"]/7)

FEATURE_COLS = [c for c in df.columns if c != TARGET_COL]

# ─────────────── 1️⃣  CHRONOLOGICAL SPLIT  ────────────────
train_size = int(len(df) * 0.80)
val_size   = int(len(df) * 0.10)

df_train = df.iloc[:train_size]
df_val   = df.iloc[train_size:train_size + val_size]
df_test  = df.iloc[train_size + val_size:]

# ─────────────── 2️⃣  FIT & SAVE SCALER  ──────────────────
scaler = MinMaxScaler()
scaler.fit(df_train[[TARGET_COL] + FEATURE_COLS])
SCALER_PATH.parent.mkdir(parents=True, exist_ok=True)
joblib.dump(scaler, SCALER_PATH)
print("Scaler saved →", SCALER_PATH)

def scale(frame):
    cols = [TARGET_COL] + FEATURE_COLS
    return pd.DataFrame(
        scaler.transform(frame[cols]),
        columns=cols,
        index=frame.index,
    )

df_train_s, df_val_s, df_test_s = map(scale, (df_train, df_val, df_test))

# ─────────────── 3️⃣  BUILD INPUT / LABEL WINDOWS ─────────
def make_xy(frame, look_back, horizon):
    """Return (X, y) where:
       • X shape = (samples, look_back, n_features)
       • y shape = (samples, horizon)   – multi-step forecast
    """
    data = frame[[TARGET_COL] + FEATURE_COLS].values
    X, y = [], []
    for i in range(look_back, len(data) - horizon + 1):
        X.append(data[i - look_back : i])
        y.append(data[i : i + horizon, 0])
    return np.array(X), np.array(y)

X_train, y_train = make_xy(df_train_s, LOOK_BACK, N_FORECAST)
X_val,   y_val   = make_xy(df_val_s,   LOOK_BACK, N_FORECAST)
X_test,  y_test  = make_xy(df_test_s,  LOOK_BACK, N_FORECAST)

# ─────────────── 3️⃣½  RESHAPE FOR ANN  ───────────────────
# Flatten time dimension → feed-forward ANN expects 2-D inputs
n_features = X_train.shape[2]

X_train_f = X_train.reshape(X_train.shape[0], -1)
X_val_f   = X_val.reshape(  X_val.shape[0],  -1)
X_test_f  = X_test.reshape( X_test.shape[0], -1)

# ─────────────── 4️⃣  DEFINE & TRAIN ANN  ────────────────
model = Sequential([
    InputLayer(input_shape=(LOOK_BACK * n_features,)),
    Dense(256, activation="relu"),
    Dropout(0.3),
    Dense(128, activation="relu"),
    Dropout(0.3),
    Dense(64, activation="relu"),
    Dense(N_FORECAST)        # 24 output neurons – one per hour ahead
])

model.compile(optimizer="adam", loss="mse")

cb = EarlyStopping(
    patience=PATIENCE,
    restore_best_weights=True,
    verbose=1
)

history = model.fit(
    X_train_f, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val_f, y_val),
    callbacks=[cb],
    verbose=2
)

# ─────────────── 5️⃣  EVALUATE  ───────────────────────────
y_pred = model.predict(X_test_f)

rmse = math.sqrt(mean_squared_error(y_test, y_pred))
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

print(f"TEST 24-step ➜  RMSE = {rmse:.3f}   MAE = {mae:.3f}   R² = {r2:.3f}")

# ─────────────── 6️⃣  SAVE MODEL (.keras format) ──────────
MODEL_PATH.parent.mkdir(parents=True, exist_ok=True)
model.save(MODEL_PATH)
print("Model saved →", MODEL_PATH)


Scaler saved → models\chargers\charger_scaler.joblib
Epoch 1/50




62/62 - 2s - 33ms/step - loss: 0.0347 - val_loss: 0.0570
Epoch 2/50
62/62 - 1s - 12ms/step - loss: 0.0120 - val_loss: 0.0414
Epoch 3/50
62/62 - 1s - 10ms/step - loss: 0.0095 - val_loss: 0.0312
Epoch 4/50
62/62 - 1s - 10ms/step - loss: 0.0082 - val_loss: 0.0264
Epoch 5/50
62/62 - 1s - 10ms/step - loss: 0.0074 - val_loss: 0.0249
Epoch 6/50
62/62 - 1s - 10ms/step - loss: 0.0069 - val_loss: 0.0237
Epoch 7/50
62/62 - 1s - 10ms/step - loss: 0.0068 - val_loss: 0.0225
Epoch 8/50
62/62 - 1s - 10ms/step - loss: 0.0064 - val_loss: 0.0249
Epoch 9/50
62/62 - 1s - 9ms/step - loss: 0.0063 - val_loss: 0.0225
Epoch 10/50
62/62 - 1s - 10ms/step - loss: 0.0061 - val_loss: 0.0202
Epoch 11/50
62/62 - 1s - 10ms/step - loss: 0.0059 - val_loss: 0.0215
Epoch 12/50
62/62 - 1s - 10ms/step - loss: 0.0059 - val_loss: 0.0229
Epoch 13/50
62/62 - 1s - 10ms/step - loss: 0.0057 - val_loss: 0.0241
Epoch 14/50
62/62 - 1s - 10ms/step - loss: 0.0056 - val_loss: 0.0247
Epoch 15/50
62/62 - 1s - 12ms/step - loss: 0.0055 - val

In [None]:
import pandas as pd, numpy as np, tensorflow as tf, holidays, joblib
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path

# ───────────────────────── CONFIG ──────────────────────────
MODEL_PATH  = Path("./models/chargers/lstm_chargers.keras")
SCALER_PATH = Path("./models/chargers/charger_scaler.joblib")
CSV_PATH    = "./3days_charging_data.csv"

DATE_COL    = "Date"
TARGET_COL  = "Chargers"
LOOK_BACK   = 48          # 3-day context
FORECAST_HR = 24          # 24-step output
HIST_HRS    = 72          # plot last 72 h

# Same feature order used during training
FEATURE_COLS = [
    "hour", "day_of_week", "month", "is_weekend", "is_festive", "working_hour",
    "is_summer", "is_winter", "is_morning_peak", "is_evening_peak",
    "hour_sin", "hour_cos", "dow_sin", "dow_cos",
    "is_terugkomdag", "cumulative_ev_phev_count"
]

# ─────────────── feature engineering helpers ───────────────
def add_features(df_in: pd.DataFrame) -> pd.DataFrame:
    be_holidays = set(holidays.country_holidays(
        "BE", years=[2022, 2023, 2024, 2025]).keys())

    terugkomdagen = {
        datetime(2023, 9, 13), datetime(2023,10,26), datetime(2023,11,14),
        datetime(2023,12,20), datetime(2024, 1,12), datetime(2024, 2, 7),
        datetime(2024, 3,14), datetime(2024, 4,16), datetime(2024, 5,13),
        datetime(2024, 6, 7), datetime(2024, 3,16), datetime(2024,10,22),
        datetime(2024,11,28), datetime(2024,12,18), datetime(2025, 1,10),
        datetime(2025, 2,13), datetime(2025, 3,18), datetime(2025, 4,22),
        datetime(2025, 5,12), datetime(2025, 6, 6)
    }

    cumulative_data = {
        datetime(2024, 6,20): 35, datetime(2024, 6,25): 36, datetime(2024, 9, 5): 38,
        datetime(2024, 9,12): 41, datetime(2024, 9,27): 42, datetime(2024,10,15): 43,
        datetime(2024,10,29): 45, datetime(2024,11, 5): 46, datetime(2024,11,26): 47,
        datetime(2025, 1, 9): 48, datetime(2025, 1,23): 49, datetime(2025, 1,28): 50,
        datetime(2025, 2, 4): 51,
    }

    df = df_in.copy()

    # calendar basics
    df["hour"]        = df.index.hour
    df["day_of_week"] = df.index.dayofweek
    df["month"]       = df.index.month

    # categorical flags
    df["is_weekend"]       = (df["day_of_week"] >= 5).astype(int)
    df["is_festive"]       = df.index.to_series().apply(lambda d: int(d.date() in be_holidays))
    df["working_hour"]     = df["hour"].between(8, 18).astype(int)
    df["is_summer"]        = df["month"].isin([6,7,8]).astype(int)
    df["is_winter"]        = df["month"].isin([12,1,2]).astype(int)
    df["is_morning_peak"]  = df["hour"].between(7, 9).astype(int)
    df["is_evening_peak"]  = df["hour"].between(17,20).astype(int)

    # cyclical encodings
    df["hour_sin"] = np.sin(2*np.pi*df["hour"]/24)
    df["hour_cos"] = np.cos(2*np.pi*df["hour"]/24)
    df["dow_sin"]  = np.sin(2*np.pi*df["day_of_week"]/7)
    df["dow_cos"]  = np.cos(2*np.pi*df["day_of_week"]/7)

    # business flags
    df["is_terugkomdag"] = df.index.to_series().dt.date.isin(
        [d.date() for d in terugkomdagen]).astype(int)

    ev_series = (pd.Series(cumulative_data)
                   .reindex(df.index.union(cumulative_data.keys()))
                   .sort_index().ffill().fillna(0))
    df["cumulative_ev_phev_count"] = ev_series.reindex(df.index).astype(int)
    return df


def load_prepared(path):
    df = pd.read_csv(path, parse_dates=[DATE_COL], index_col=DATE_COL).sort_index()
    return add_features(df)

# ────────────────────── reversible scaling ──────────────────────
def inverse_target_direct(vec, scaler):
    """Method 1: direct min/max formula."""
    tmin, tmax = scaler.data_min_[0], scaler.data_max_[0]
    return vec * (tmax - tmin) + tmin

def inverse_target_via_params(vec, scaler):
    """Method 2: using scaler.min_ and scaler.scale_."""
    # transform: X_scaled = X * scale_ + min_
    # invert:     X = (X_scaled - min_) / scale_
    return (vec - scaler.min_[0]) / scaler.scale_[0]

# ────────────────── forecasting & debug ──────────────────
def forecast_next_24h(model, scaler, df):
    # 1) build & scale the look-back window:
    window = df[[TARGET_COL] + FEATURE_COLS].tail(LOOK_BACK).values
    X = scaler.transform(window).reshape(1, LOOK_BACK, -1)
    # 2) get normalized 24-step output:
    preds_norm = model.predict(X, verbose=0)[0]

    # 3a) inverse-scale by direct formula:
    preds_direct = inverse_target_direct(preds_norm, scaler)
    # 3b) inverse-scale by scaler params:
    preds_params = inverse_target_via_params(preds_norm, scaler)

    # 4) quick sanity print:
    print("\npreds_norm   :", np.round(preds_norm[:5], 4))
    print("direct inv   :", np.round(preds_direct[:5], 4))
    print("params inv   :", np.round(preds_params[:5], 4))

    # 5) build timestamp index
    idx = pd.date_range(df.index[-1] + pd.Timedelta(hours=1),
                        periods=FORECAST_HR, freq="h")
    # 6) return DataFrame from *one* of the methods (they match)
    return pd.DataFrame({"Predicted_kWh": preds_direct}, index=idx)

if __name__ == "__main__":
    model  = tf.keras.models.load_model(MODEL_PATH)
    scaler = joblib.load(SCALER_PATH)

    # debug: check column ordering matches scaler
    print("Scaler expects:", list(scaler.feature_names_in_))
    print("We supply   :", [TARGET_COL] + FEATURE_COLS)

    df_hist    = load_prepared(CSV_PATH)
    forecast_df= forecast_next_24h(model, scaler, df_hist)

    # print results
    print("\nNext-day forecast (kWh):")
    print(forecast_df.to_string())

    past = df_hist[TARGET_COL].iloc[-HIST_HRS:]
    print("\nLast 72 h actual consumption (kWh):")
    print(past.to_string())

    # plot
    plt.figure(figsize=(12,4))
    plt.plot(past.index, past.values, label="Past 72 h")
    plt.plot(forecast_df.index, forecast_df["Predicted_kWh"], "-o",
             label="Forecast next 24 h")
    plt.title("Charger load – history vs forecast")
    plt.xlabel("Hour"); plt.ylabel("kWh"); plt.legend(); plt.tight_layout(); plt.show()
