In [1]:
import os, time, warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# ML / DL
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, callbacks
import keras_tuner as kt

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
LAGS          = list(range(1, 25))         # past 24 hours
STEP          = "H"
MAX_EPOCHS    = 50                         # <<< reduced
EARLY_STOP    = 5
TRIALS        = 20
EXEC_PER_TR   = 2
BATCH_SIZE    = 64
KEY_DATES     = ["2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01"]

# print(f"Run started  {datetime.now()}")

Run started  2025-06-14 13:41:42.586647


## load data

In [None]:
df = (
    pd.read_csv("../../data/renewables/pv_with_weather_data.csv",
                comment="#", parse_dates=["time"])
    .rename(columns={"time": "ts",
                     "electricity": "pv",
                     "irradiance_direct": "dir_irr",
                     "irradiance_diffuse": "dif_irr",
                     "temperature": "temp"})
    .set_index("ts")
    .asfreq(STEP)
)
print(f"Data span : {df.index.min()} – {df.index.max()} ({len(df):,} rows)")

## feature engineering

In [None]:
def make_features(data, lags, include_time=True, include_weather=True):
    X = pd.DataFrame(index=data.index)
    for lag in lags:
        X[f"lag_{lag}"] = data["pv"].shift(lag)

    if include_time:
        hr = data.index.hour
        dy = data.index.dayofyear
        X["sin_hour"] = np.sin(2 * np.pi * hr / 24)
        X["cos_hour"] = np.cos(2 * np.pi * hr / 24)
        X["sin_doy"]  = np.sin(2 * np.pi * dy / 365)
        X["cos_doy"]  = np.cos(2 * np.pi * dy / 365)
    if include_weather:
        X["dir_irr"] = data["dir_irr"]
        X["dif_irr"] = data["dif_irr"]
        X["temp"]    = data["temp"]

    y = data["pv"]
    return X.join(y.rename("target")).dropna()

feats_all = make_features(df, LAGS)
print("Feature matrix shape :", feats_all.shape)

## split

In [None]:
# ------------------------------------------------------------
# 3. SPLITS
# ------------------------------------------------------------
train_val_end = "2023-12-31 23:00"
holdout_start = "2024-01-01 00:00"

train_val = feats_all.loc[:train_val_end]
hold_out  = feats_all.loc[holdout_start:]

# internal chronological 80/20 split
split_idx = int(len(train_val) * 0.8)
train = train_val.iloc[:split_idx]
ival  = train_val.iloc[split_idx:]

def split_xy(frame):
    X = frame.drop(columns="target").values
    y = frame["target"].values
    return X, y

Xt_tr, yt_tr = split_xy(train)
Xt_val, yt_val = split_xy(ival)
Xt_hold, yt_hold = split_xy(hold_out)

# scalers
x_scaler = StandardScaler().fit(Xt_tr)
y_scaler = StandardScaler().fit(yt_tr.reshape(-1, 1))

def scale_x(X): return x_scaler.transform(X)
def unscale_y(y): return y_scaler.inverse_transform(y.reshape(-1, 1)).ravel()

Xt_tr_s, Xt_val_s = scale_x(Xt_tr), scale_x(Xt_val)
Xt_hold_s          = scale_x(Xt_hold)
yt_tr_s            = y_scaler.transform(yt_tr.reshape(-1, 1)).ravel()

print(f"Train: {len(train):,}  | Val: {len(ival):,} | Hold-out: {len(hold_out):,}")

## hyper-model

In [None]:
def hp_model(hp):
    n_feat = Xt_tr_s.shape[1]

    n_layers = hp.Int("n_layers", 1, 3)
    units    = hp.Choice("units", [32, 64, 128])
    drop     = hp.Float("dropout", 0.0, 0.4, step=0.1)
    l2_reg   = hp.Float("l2", 1e-5, 1e-2, sampling="log")
    lr       = hp.Choice("lr", [1e-3, 3e-4, 1e-4])

    m = models.Sequential()
    m.add(layers.Input(shape=(n_feat,)))
    for _ in range(n_layers):
        m.add(layers.Dense(units, activation="relu",
                           kernel_regularizer=regularizers.l2(l2_reg)))
        m.add(layers.Dropout(drop))

    m.add(layers.Dense(1))
    m.compile(optimizer=tf.keras.optimizers.Adam(lr),
              loss="mse", metrics=["mae"])
    return m

## Tuner

In [None]:
tuner = kt.RandomSearch(
    hp_model,
    objective="val_loss",
    max_trials=TRIALS,
    executions_per_trial=EXEC_PER_TR,
    directory="tuner_dir",
    project_name="pv_nn"
)
tuner.search_space_summary()

es_cb = callbacks.EarlyStopping(patience=EARLY_STOP, restore_best_weights=True)

tuner.search(
    Xt_tr_s, yt_tr_s,
    validation_data=(Xt_val_s, y_scaler.transform(yt_val.reshape(-1,1))),
    epochs=MAX_EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es_cb],
    verbose=1
)

best_hp = tuner.get_best_hyperparameters(1)[0]
print("\nBest HPs:", best_hp.values)

In [None]:
# Use the best hyperparameters found from previous tuning
best_hp_values = {'n_layers': 1, 'units': 128, 'dropout': 0.0, 'l2': 0.003479689743431989, 'lr': 0.0003}
print("\nUsing Best HPs:", best_hp_values)

## retrain on full dataset

In [None]:
# best_model = tuner.hypermodel.build(best_hp)
# full_X = np.concatenate([Xt_tr_s, Xt_val_s])
# full_y = np.concatenate([yt_tr_s,
#                          y_scaler.transform(yt_val.reshape(-1,1)).ravel()])

# history = best_model.fit(
#     full_X, full_y,
#     epochs=MAX_EPOCHS,
#     batch_size=BATCH_SIZE,
#     callbacks=[es_cb],
#     verbose=0
# )

# Create the best model directly using the found hyperparameters
def create_best_model(hp_values):
    n_feat = Xt_tr_s.shape[1]
    
    m = models.Sequential()
    m.add(layers.Input(shape=(n_feat,)))
    for _ in range(hp_values['n_layers']):
        m.add(layers.Dense(hp_values['units'], activation="relu",
                           kernel_regularizer=regularizers.l2(hp_values['l2'])))
        m.add(layers.Dropout(hp_values['dropout']))
    
    m.add(layers.Dense(1))
    m.compile(optimizer=tf.keras.optimizers.Adam(hp_values['lr']),
              loss="mse", metrics=["mae"])
    return m

best_model = create_best_model(best_hp_values)
full_X = np.concatenate([Xt_tr_s, Xt_val_s])
full_y = np.concatenate([yt_tr_s,
                         y_scaler.transform(yt_val.reshape(-1,1)).ravel()])

es_cb = callbacks.EarlyStopping(patience=EARLY_STOP, restore_best_weights=True)
history = best_model.fit(
    full_X, full_y,
    epochs=MAX_EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es_cb],
    verbose=0
)

## evaluation

In [None]:
def evaluate(m, Xs, y_true):
    y_pred_s = m.predict(Xs, verbose=0).flatten()
    y_pred   = unscale_y(y_pred_s)
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return mae, rmse, y_pred

mae_hold, rmse_hold, y_hat_hold = evaluate(best_model, Xt_hold_s, yt_hold)
print(f"\nHold-out 2024  |  MAE={mae_hold:.3f}  RMSE={rmse_hold:.3f}")

## daily mae for requested dates

In [None]:
print("\nDaily MAE on selected dates (hold-out):")
hold_df = pd.DataFrame({
    "actual": yt_hold,
    "pred"  : y_hat_hold
}, index=hold_out.index)

for d in KEY_DATES:
    day = pd.to_datetime(d)
    mask = hold_df.index.normalize() == day
    if mask.any():
        mae_day = np.mean(np.abs(hold_df.loc[mask, "actual"] -
                                 hold_df.loc[mask, "pred"]))
        print(f"{d}: {mae_day:.3f}")
    else:
        print(f"{d}: not in hold-out set")

In [None]:
print("\nGenerating hourly prediction plots for key dates...")

# Create subplots for each key date
fig, axes = plt.subplots(len(KEY_DATES), 1, figsize=(12, 4*len(KEY_DATES)))
if len(KEY_DATES) == 1:
    axes = [axes]

for i, date_str in enumerate(KEY_DATES):
    day = pd.to_datetime(date_str)
    mask = hold_df.index.normalize() == day
    
    if mask.any():
        day_data = hold_df.loc[mask].copy()
        hours = day_data.index.hour
        
        axes[i].plot(hours, day_data["actual"], 'o-', label='Actual', 
                    color='blue', linewidth=2, markersize=4)
        axes[i].plot(hours, day_data["pred"], 's-', label='Predicted', 
                    color='red', linewidth=2, markersize=4, alpha=0.8)
        
        axes[i].set_title(f'Hourly PV Forecast vs Actual - {date_str}', fontsize=12, fontweight='bold')
        axes[i].set_xlabel('Hour of Day')
        axes[i].set_ylabel('PV Generation')
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)
        axes[i].set_xticks(range(0, 24, 2))
        
        # Add MAE annotation
        mae_day = np.mean(np.abs(day_data["actual"] - day_data["pred"]))
        axes[i].text(0.02, 0.98, f'MAE: {mae_day:.4f}', 
                    transform=axes[i].transAxes, verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
    else:
        axes[i].text(0.5, 0.5, f'{date_str}: No data available', 
                    ha='center', va='center', transform=axes[i].transAxes)
        axes[i].set_title(f'Data not available - {date_str}')

plt.tight_layout()
plt.show()

# Create a combined plot showing all key dates together
plt.figure(figsize=(15, 6))
colors = ['blue', 'green', 'purple', 'orange', 'red', 'brown', 'pink', 'gray']
markers = ['o', 's', '^', 'D', 'v', '<', '>', 'p']

# Ensure we have enough colors and markers for all dates
num_dates = len(KEY_DATES)
if num_dates > len(colors):
    colors = colors * ((num_dates // len(colors)) + 1)
if num_dates > len(markers):
    markers = markers * ((num_dates // len(markers)) + 1)

for i, date_str in enumerate(KEY_DATES):
    day = pd.to_datetime(date_str)
    mask = hold_df.index.normalize() == day
    
    if mask.any():
        day_data = hold_df.loc[mask].copy()
        hours = day_data.index.hour
        
        # Actual data
        plt.plot(hours + i*0.1, day_data["actual"], 
                marker=markers[i], linestyle='-', 
                color=colors[i], label=f'Actual {date_str}', 
                linewidth=2, markersize=6, alpha=0.8)
        
        # Predicted data  
        plt.plot(hours + i*0.1, day_data["pred"], 
                marker=markers[i], linestyle='--', 
                color=colors[i], label=f'Predicted {date_str}', 
                linewidth=2, markersize=4, alpha=0.6)

plt.title('Hourly PV Forecast vs Actual - All Key Dates Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Hour of Day')
plt.ylabel('PV Generation')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, alpha=0.3)
plt.xticks(range(0, 24, 2))
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(5,3))
plt.plot(history.history["loss"])
plt.title("Training loss (full 2015–2023 retrain)")
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.yscale("log")
plt.grid(alpha=.3)
plt.show()

## hp landscape look


In [1]:


# trials = []
# for t in tuner.oracle.get_trials():
#     rec = t.hyperparameters.values.copy()
#     rec["val_loss"] = t.score
#     trials.append(rec)
# hp_df = pd.DataFrame(trials)

# sns.pairplot(hp_df, vars=[c for c in hp_df.columns if c!="val_loss"],
#              hue="val_loss", palette="viridis")
# plt.suptitle("HP pair-plot (colour = val_loss)", y=1.02)
# plt.show()

# # heat-map layers × units
# pivot = (hp_df
#          .groupby(["n_layers","units"], as_index=False)["val_loss"]
#          .mean()
#          .pivot(index="n_layers", columns="units", values="val_loss"))
# sns.heatmap(pivot, annot=True, fmt=".4f", cmap="YlGnBu", cbar_kws={'label':'mean val_loss'})
# plt.title("Validation loss – layers × units")
# plt.ylabel("n_layers"); plt.xlabel("units")
# plt.show()

print(f"\nBest model architecture:")
print(f"- Layers: {best_hp_values['n_layers']}")
print(f"- Units: {best_hp_values['units']}")  
print(f"- Dropout: {best_hp_values['dropout']}")
print(f"- L2 regularization: {best_hp_values['l2']:.6f}")
print(f"- Learning rate: {best_hp_values['lr']}")
print(f"\nRun completed {datetime.now()}")


Best model architecture:


NameError: name 'best_hp_values' is not defined