In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


# ====================
# 1. Data Inlezen en Voorbereiden
# ====================

# Pad naar de actuele data
actuals_path = "https://raw.githubusercontent.com/tvgerwe/ENEXIS/refs/heads/main/workspaces/redouan/GUI_ENERGY_PRICES_202501010000-202601010000.csv"

# Evaluatieperiode
start_date = pd.Timestamp("2025-02-24")
end_date = pd.Timestamp("2025-03-29")

# --- Actuals Data ---
actuals_df = pd.read_csv(actuals_path, encoding="utf-8-sig")
actuals_df["date_anchor"] = actuals_df["MTU (UTC)"].astype(str).str.split(" - ").str[1]

# Converteer naar datetime
actuals_df["actual_datetime"] = pd.to_datetime(
    actuals_df["date_anchor"], format="%d/%m/%Y %H:%M:%S", errors="coerce"
)

# Bereken prijs in EUR/kWh en target_hour
actuals_df["price_kwh"] = actuals_df["Day-ahead Price (EUR/MWh)"] / 1000
actuals_df["target_hour"] = actuals_df["actual_datetime"].dt.floor("H").dt.tz_localize(None)

# Voeg kolommen toe voor datum en uur
actuals_df["target_date"] = actuals_df["target_hour"].dt.date
actuals_df["hour"] = actuals_df["target_hour"].dt.hour

# Sorteer op datum en uur
actuals_df = actuals_df.sort_values(["target_date", "hour"])

# ====================
# 2. Genereer Naive Forecasts (voor horizon 1-7 dagen)
# ====================

# Initialiseer een lege dataframe voor alle naive forecasts
naive_forecasts_list = []

# Voor elke horizon (1 t/m 7 dagen) genereren we een naive forecast
for horizon in range(1, 8):
    # Maak een kopie van de actuals data
    actuals_copy = actuals_df.copy()
    
    # Voor de naive forecast gebruiken we de prijs van dezelfde uur 'horizon' dagen geleden
    # Bereken het target_hour voor de forecast ('horizon' dagen vooruit)
    actuals_copy["forecast_target_hour"] = actuals_copy["target_hour"] + pd.Timedelta(days=horizon)
    
    # Hernoem kolommen voor een duidelijk onderscheid tussen actuele en forecast data
    horizon_forecast = actuals_copy.rename(columns={
        "target_hour": "issuance_hour",
        "forecast_target_hour": "target_hour",
        "price_kwh": "y"  # Gebruik 'y' als kolom voor de voorspelde prijs (zoals in benchmark script)
    })[["issuance_hour", "target_hour", "y"]]
    
    # Voeg de horizon kolom toe
    horizon_forecast["horizon"] = horizon
    
    # Voeg toe aan de lijst met forecasts
    naive_forecasts_list.append(horizon_forecast)

# Combineer alle forecasts in één dataframe
naive_forecasts = pd.concat(naive_forecasts_list, ignore_index=True)

# Filter forecasts op de evaluatieperiode
naive_forecasts = naive_forecasts[
    (naive_forecasts["target_hour"] >= start_date) & 
    (naive_forecasts["target_hour"] <= end_date)
].copy()

# Filter actuele data op evaluatieperiode
filtered_actuals = actuals_df[
    (actuals_df["target_hour"] >= start_date) & 
    (actuals_df["target_hour"] <= end_date)
].copy()

# Merge forecasts met actuele data
merged = pd.merge(naive_forecasts, filtered_actuals[["target_hour", "price_kwh"]], 
                 on="target_hour", how="inner")
merged = merged.dropna(subset=["y", "price_kwh"]).copy()
merged = merged.sort_values(["target_hour", "horizon"])

# ====================
# 3. Evaluatie: Bereken de Foutmaten (overall)
# ====================
y_true = merged["price_kwh"]
y_pred = merged["y"]

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
mape = np.mean(np.abs((y_true - y_pred) / np.where(y_true != 0, y_true, np.nan))) * 100
mase_scale = np.mean(np.abs(y_true.diff().dropna()))
mase = mae / mase_scale if mase_scale != 0 else np.nan

# Ook per horizon berekenen voor visualisatie
horizon_metrics = {}
for h in range(1, 8):
    horizon_data = merged[merged["horizon"] == h]
    if len(horizon_data) > 0:
        h_rmse = np.sqrt(mean_squared_error(horizon_data["price_kwh"], horizon_data["y"]))
        h_mae = mean_absolute_error(horizon_data["price_kwh"], horizon_data["y"])
        h_mape = np.mean(np.abs((horizon_data["price_kwh"] - horizon_data["y"]) / 
                              np.where(horizon_data["price_kwh"] != 0, horizon_data["price_kwh"], np.nan))) * 100
        h_mase = h_mae / mase_scale if mase_scale != 0 else np.nan
        
        horizon_metrics[h] = {
            "RMSE": h_rmse,
            "MAE": h_mae,
            "MAPE": h_mape,
            "MASE": h_mase
        }

# ====================
# 4. Aggregatie voor Overzichtsmatrices
# ====================

# --- Aggregated Hourly Matrix ---
merged["hour"] = merged["target_hour"].dt.hour

# Groepeer op 'hour' en 'horizon' en bereken de RMSE
rmse_hour_agg = merged.groupby(["hour", "horizon"]).apply(
    lambda g: np.sqrt(np.mean((g["price_kwh"] - g["y"])**2))
)
rmse_hour_df = pd.DataFrame(rmse_hour_agg, columns=["rmse"]).reset_index()

# Pivot: rijen = hour, kolommen = horizon (1-7)
rmse_hour_pivot = rmse_hour_df.pivot(index="hour", columns="horizon", values="rmse")

# Zorg dat alle uren (0-23) aanwezig zijn in de index
missing_hours = set(range(24)) - set(rmse_hour_pivot.index)
for hour in missing_hours:
    rmse_hour_pivot.loc[hour] = np.nan
rmse_hour_pivot = rmse_hour_pivot.sort_index()

# Zorg dat kolommen voor horizon 1 t/m 7 altijd aanwezig zijn
rmse_hour_pivot = rmse_hour_pivot.reindex(columns=range(1, 8))

# Voeg de gemiddelde actuals toe per uur (over de evaluatieperiode)
actuals_by_hour = merged.groupby("hour")["price_kwh"].mean()
rmse_hour_pivot["Actuals"] = actuals_by_hour

# Bereken Total_rmse
horizon_columns = [col for col in range(1, 8) if col in rmse_hour_pivot.columns]
rmse_hour_pivot["Total_rmse"] = rmse_hour_pivot[horizon_columns].mean(axis=1, skipna=True)

# Zet de kolomvolgorde
ordered_cols = ["Actuals"] + list(range(1, 8)) + ["Total_rmse"]
ordered_cols = [col for col in ordered_cols if col in rmse_hour_pivot.columns]
rmse_hour_pivot = rmse_hour_pivot[ordered_cols]

# --- Aggregated Daily Matrix ---
merged["target_date"] = merged["target_hour"].dt.normalize()
rmse_day_agg = merged.groupby(["target_date", "horizon"]).apply(
    lambda g: np.sqrt(np.mean((g["price_kwh"] - g["y"])**2))
)
rmse_day_df = pd.DataFrame(rmse_day_agg, columns=["rmse"]).reset_index()

# Pivot: rijen = target_date, kolommen = horizon (1-7)
rmse_day_pivot = rmse_day_df.pivot(index="target_date", columns="horizon", values="rmse")

# Zorg dat kolommen voor horizon 1 t/m 7 altijd aanwezig zijn
rmse_day_pivot = rmse_day_pivot.reindex(columns=range(1, 8))
rmse_day_pivot = rmse_day_pivot.sort_index()

# Voeg de gemiddelde actuals toe per dag
actuals_by_day = merged.groupby("target_date")["price_kwh"].mean()
rmse_day_pivot["Actuals"] = actuals_by_day

# Bereken Total_rmse
horizon_columns = [col for col in range(1, 8) if col in rmse_day_pivot.columns]
rmse_day_pivot["Total_rmse"] = rmse_day_pivot[horizon_columns].mean(axis=1, skipna=True)

# Zet de kolomvolgorde
ordered_cols_day = ["Actuals"] + list(range(1, 8)) + ["Total_rmse"]
ordered_cols_day = [col for col in ordered_cols_day if col in rmse_day_pivot.columns]
rmse_day_pivot = rmse_day_pivot[ordered_cols_day]

#%%
# ====================
# 5. Visualisatie
# ====================

# --- 5.1 Overzicht Metrics (Naive Model) ---
metrics_table_data = [
    ["RMSE", "MAE", "MAPE (%)", "MASE"],
    [f"{rmse:.4f}", f"{mae:.4f}", f"{mape:.2f}", f"{mase:.4f}"]
]

metrics_fig = go.Figure(data=[go.Table(
    header=dict(
        values=["Metric", "Value"],
        fill_color='#B3E5FC',
        align='center',
        font=dict(size=14, color='black')
    ),
    cells=dict(
        values=metrics_table_data,
        fill_color='#F5F5F5',
        align='center',
        font=dict(size=12)
    ))
])

metrics_fig.update_layout(
    title="Naive Model: Overall Forecast Evaluation Metrics",
    width=500,
    height=200,
    margin=dict(l=20, r=20, t=60, b=20)
)

metrics_fig.show()

# --- 5.2 Metrics per Horizon ---
horizon_metrics_table = []
horizon_metrics_table.append(["Horizon", "RMSE", "MAE", "MAPE (%)", "MASE"])

for h in range(1, 8):
    if h in horizon_metrics:
        metrics = horizon_metrics[h]
        horizon_metrics_table.append([
            f"Day {h}",
            f"{metrics['RMSE']:.4f}",
            f"{metrics['MAE']:.4f}",
            f"{metrics['MAPE']:.2f}",
            f"{metrics['MASE']:.4f}"
        ])

horizon_metrics_fig = go.Figure(data=[go.Table(
    header=dict(
        values=horizon_metrics_table[0],
        fill_color='#B3E5FC',
        align='center',
        font=dict(size=14, color='black')
    ),
    cells=dict(
        values=list(zip(*horizon_metrics_table[1:])),  # Transpose the data for correct display
        fill_color='#F5F5F5',
        align='center',
        font=dict(size=12)
    ))
])

horizon_metrics_fig.update_layout(
    title="Naive Model: Forecast Metrics by Horizon",
    width=800,
    height=300,
    margin=dict(l=20, r=20, t=60, b=20)
)

horizon_metrics_fig.show()

# --- 5.3 Plotly Time Series Graph ---
# Maak een subset van de data voor day-ahead forecasts (horizon=1)
day_ahead_data = merged[merged["horizon"] == 1].sort_values("target_hour")

# Teken lijndiagram van werkelijke vs. voorspelde prijzen (alleen day-ahead)
timeseries_fig = go.Figure()

# Trace voor de werkelijke prijzen
timeseries_fig.add_trace(go.Scatter(
    x=day_ahead_data["target_hour"],
    y=day_ahead_data["price_kwh"],
    mode="lines+markers",
    name="Werkelijke prijs (kWh)",
    line=dict(color="blue")
))

# Trace voor de voorspelde prijzen (naive model)
timeseries_fig.add_trace(go.Scatter(
    x=day_ahead_data["target_hour"],
    y=day_ahead_data["y"],
    mode="lines+markers",
    name="Naive voorspelling (kWh)",
    line=dict(color="orange", dash="dash")
))

# Layout update
timeseries_fig.update_layout(
    title="⚡ Naive Model: Voorspelling vs Realiteit (1-Day-Ahead)",
    xaxis_title="Datum & Uur",
    yaxis_title="Prijs (EUR/kWh)",
    paper_bgcolor="white",
    plot_bgcolor="white",
    xaxis=dict(showgrid=True, gridcolor="lightgray"),
    yaxis=dict(showgrid=True, gridcolor="lightgray"),
    width=1200,
    height=600
)

timeseries_fig.show()

# --- 5.4 Plotly Tables voor Matrices ---
# Functie om DataFrame voor te bereiden voor Plotly tabel-visualisatie
def prepare_for_plotly_table(df, is_hourly=True):
    plot_df = df.copy()
    
    # Hernoem kolommen volgens vereist format
    column_mapping = {
        "Actuals": "actual_price_kwh",
        "Total_rmse": "RMSE_total"
    }
    
    # Voeg horizon kolommen toe aan mapping
    for i in range(1, 8):
        if i in plot_df.columns:
            column_mapping[i] = f"RMSE_day{i}"
    
    # Hernoem kolommen waar van toepassing
    plot_df = plot_df.rename(columns=column_mapping)
    
    # Zorg voor de juiste kolomvolgorde
    ordered_cols = ["actual_price_kwh"] + [f"RMSE_day{i}" for i in range(1, 8) if f"RMSE_day{i}" in plot_df.columns] + ["RMSE_total"]
    ordered_cols = [col for col in ordered_cols if col in plot_df.columns]
    plot_df = plot_df[ordered_cols]
    
    # Formateer de indextitel
    if is_hourly:
        index_values = [f"{hour:02d}:00" for hour in plot_df.index]
    else:
        index_values = [date.strftime("%d-%m-%Y") for date in plot_df.index]
    
    # Formateer de waarden in het DataFrame
    if "actual_price_kwh" in plot_df.columns:
        plot_df["actual_price_kwh"] = plot_df["actual_price_kwh"].apply(lambda x: f"€{x:.4f}" if pd.notna(x) else "")
    
    # Formateer RMSE kolommen met 4 decimalen
    for col in plot_df.columns:
        if col.startswith("RMSE_"):
            plot_df[col] = plot_df[col].apply(lambda x: f"{x:.4f}" if pd.notna(x) else "")
    
    return plot_df, index_values

# Bereid DataFrames voor op Plotly tabel visualisatie
rmse_hour_plotly, hour_index_values = prepare_for_plotly_table(rmse_hour_pivot, is_hourly=True)
rmse_day_plotly, day_index_values = prepare_for_plotly_table(rmse_day_pivot, is_hourly=False)

# Definieer kleuren
header_color = '#B3E5FC'  # Lichtblauw
cell_color = '#F5F5F5'    # Lichtgrijs

# Creëer plotly table voor Hourly Matrix
hourly_table = go.Figure(data=[go.Table(
    header=dict(
        values=['Hour'] + list(rmse_hour_plotly.columns),
        fill_color=header_color,
        align='center',
        font=dict(size=12, color='black')
    ),
    cells=dict(
        values=[hour_index_values] + [rmse_hour_plotly[col] for col in rmse_hour_plotly.columns],
        fill_color=cell_color,
        align=['center'] + ['right'] * len(rmse_hour_plotly.columns),
        font=dict(size=11)
    )
)])

# Pas layout aan voor Hourly Table
hourly_table.update_layout(
    title="Naive Model: Hourly Electricity Price Forecast Evaluation",
    width=1000,
    height=600,
    margin=dict(l=20, r=20, t=60, b=20)
)

# Toon Hourly Table
hourly_table.show()

# Creëer plotly table voor Daily Matrix
daily_table = go.Figure(data=[go.Table(
    header=dict(
        values=['Date'] + list(rmse_day_plotly.columns),
        fill_color=header_color,
        align='center',
        font=dict(size=12, color='black')
    ),
    cells=dict(
        values=[day_index_values] + [rmse_day_plotly[col] for col in rmse_day_plotly.columns],
        fill_color=cell_color,
        align=['center'] + ['right'] * len(rmse_day_plotly.columns),
        font=dict(size=11)
    )
)])

# Pas layout aan voor Daily Table
daily_table.update_layout(
    title="Naive Model: Daily Electricity Price Forecast Evaluation",
    width=1000,
    height=800,
    margin=dict(l=20, r=20, t=60, b=20)
)

# Toon Daily Table
daily_table.show()


'H' is deprecated and will be removed in a future version, please use 'h' instead.





