# 1. Environment Setup & Library Imports
This cell installs the necessary `dash` library (if not already present) and imports core packages for data manipulation (Pandas, NumPy), visualization (Plotly, Dash), and machine learning (TensorFlow, Scikit-Learn, XGBoost). It also suppresses warnings to ensure a clean output.

In [2]:
pip install jupyter_dash

Collecting jupyter_dash
  Downloading jupyter_dash-0.4.2-py3-none-any.whl.metadata (3.6 kB)
Collecting ansi2html (from jupyter_dash)
  Downloading ansi2html-1.9.2-py3-none-any.whl.metadata (3.7 kB)
Downloading jupyter_dash-0.4.2-py3-none-any.whl (23 kB)
Downloading ansi2html-1.9.2-py3-none-any.whl (17 kB)
Installing collected packages: ansi2html, jupyter_dash
Successfully installed ansi2html-1.9.2 jupyter_dash-0.4.2
Note: you may need to restart the kernel to use updated packages.


In [15]:

# Install necessary packages if running in a new environment
# !pip install dash pandas numpy scikit-learn tensorflow xgboost plotly
import os
import io
import base64
import datetime
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from dash import Dash, dcc, html, Input, Output, State, callback_context
# from jupyter_dash import JupyterDash # Deprecated, using standard Dash with jupyter_mode
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import xgboost as xgb
import warnings

import ipywidgets as widgets
from IPython.display import display
import plotly.graph_objects as go
warnings.filterwarnings('ignore')

# 2. Data Configuration & Helper Functions
**# ** Here we define the dataset file paths and categorize columns (Dates, Weather, Power). We also implement helper functions to identify column types and standardise numeric values, ensuring consistent data formatting across different datasets.**

In [17]:
# Adapting paths for local environment or Kaggle
DATA_DIR = "/kaggle/input/dataset1"
datasets = {
    "Gibe I": "Gibe1.csv",
    "Gibe III": "Gibe3.csv",
    "Koka": "Koka Plant.csv",
    "Tana Beles": "Tana_Beles.csv",
    "Tekeze": "Tekeze.csv",
    "Fincha": "fincha.csv",
}
DATE_COLS = ['Date_GC', 'Date_EC']
WEATHER_COLS = ['T2M', 'PRECTOTCORR', 'ALLSKY_SFC_SW_DWN', 'RH2M', 'WS2M']
POWER_PREFIXES = ['U', 'V', 'W', 'Max_ALoad', 'Min_ALoad', 'Auxiliary', 'Water_Level', 'Energy', 'Discharge']
def identify_columns(df):
    cols = df.columns.tolist()
    date_c = [c for c in cols if c in DATE_COLS]
    weather_c = [c for c in cols if c in WEATHER_COLS]
    power_c = [c for c in cols if any(c.startswith(p) for p in POWER_PREFIXES) and c not in weather_c]
    other_c = [c for c in cols if c not in date_c + weather_c + power_c and c not in ['Date_EC', 'Date']]
    return date_c, weather_c, power_c, other_c
def clean_numeric(x):
    if isinstance(x, str):
        x = x.replace(',', '').replace(' ', '')
    return pd.to_numeric(x, errors='coerce')

# 3. Data Loading & Logic Separation
*This section separates the preprocessing into more granular steps. `load_raw_formatted` handles loading and initial formatting, while `apply_cleaning` applies imputation and outlier treatments. This modularity enables the "Before vs. After" comparison. We then batch-process all datasets into `data_store` for modeling.

In [18]:
# %% [code]
def load_raw_formatted(filepath):
    try:
        df = pd.read_csv(filepath)
    except FileNotFoundError:
        return None
    
    date_c, weather_c, power_c, other_c = identify_columns(df)
    
    # 1. Date Formatting
    if 'Date_GC' in df.columns:
        df['Date'] = pd.to_datetime(df['Date_GC'])
        df.set_index('Date', inplace=True)
        df.sort_index(inplace=True)
        df.drop(columns=[c for c in DATE_COLS if c in df.columns], inplace=True)
    
    # 2. Numeric Formatting (Dirty Phase)
    all_numeric = weather_c + power_c + other_c
    for col in all_numeric:
        if col in df.columns:
            df[col] = df[col].apply(clean_numeric)
            
    return df
def apply_cleaning(df_in):
    df = df_in.copy()
    date_c, weather_c, power_c, other_c = identify_columns(df)
    
    # 1. Weather Columns: Fill Mean + Clip IQR
    for col in weather_c:
        if col in df.columns:
            df[col] = df[col].fillna(df[col].mean())
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            df[col] = np.clip(df[col], Q1 - 1.5 * IQR, Q3 + 1.5 * IQR)
    # 2. Power/Other Columns: Fill 0 + Clip 5th-95th Percentile
    for col in power_c + other_c:
        if col in df.columns:
            # For visualization purpose, we might want to catch NaNs here mostly
            df[col] = df[col].fillna(0)
            # Clip
            low = df[col].quantile(0.05)
            high = df[col].quantile(0.95)
            df[col] = np.clip(df[col], low, high)
            
    return df
# Load Processed Data for Modeling
data_store = {}
print("Loading data for modeling...")
for name, file in datasets.items():
    path = os.path.join(DATA_DIR, file)
    if os.path.exists(path):
        raw = load_raw_formatted(path)
        if raw is not None:
            cleaned = apply_cleaning(raw)
            data_store[name] = cleaned
            print(f"Loaded & Cleaned {name}: {cleaned.shape}")
    else:
        print(f"Warning: {file} NOT FOUND at {path}")

Loading data for modeling...
Loaded & Cleaned Gibe I: (4383, 13)
Loaded & Cleaned Gibe III: (3653, 21)
Loaded & Cleaned Koka: (4383, 14)
Loaded & Cleaned Tana Beles: (4383, 15)
Loaded & Cleaned Tekeze: (4383, 15)
Loaded & Cleaned Fincha: (4383, 15)


# # 4. Interactive Preprocessing Dashboard (Dash)
 This cell defines a Dash application to visualize the preprocessing steps.
 **Instructions:**
 1. Run this cell.
 2. Select a Plant Dataset from the dropdown.
 3. Click "Run Preprocessing Step" to execute the cleaning logic on the raw data.
 4. Observe the "Before" vs "After" metrics for Missing Values, Data Distribution (Box Plots), and general statistics.
5. Note: This app runs inside the notebook output area.

In [19]:
# -----------------------------
# Widgets
# -----------------------------
plant_dropdown = widgets.Dropdown(
    options=list(datasets.keys()),
    description='Plant:',
    style={'description_width': 'initial'}
)

run_button = widgets.Button(
    description='Run Preprocessing Validation',
    button_style='success',
    icon='play'
)

output = widgets.Output()

# -----------------------------
# Dashboard Logic
# -----------------------------
def run_dashboard(b):
    output.clear_output()
    
    with output:
        plant = plant_dropdown.value
        filepath = os.path.join(DATA_DIR, datasets[plant])
        
        # Load data (RAW & CLEANED)
        df_before = load_raw_formatted(filepath)
        df_after = apply_cleaning(df_before)
        
        date_c, weather_c, power_c, other_c = identify_columns(df_before)
        
        # =============================
        # 1. Missing Values (Before vs After)
        # =============================
        miss_before = df_before.isnull().sum()
        miss_after = df_after.isnull().sum()
        
        miss_cols = miss_before[miss_before > 0].index.tolist()
        
        fig_missing = go.Figure()
        if miss_cols:
            fig_missing.add_bar(
                x=miss_cols,
                y=miss_before[miss_cols],
                name='Before'
            )
            fig_missing.add_bar(
                x=miss_cols,
                y=miss_after[miss_cols],
                name='After'
            )
            fig_missing.update_layout(
                title=f"Missing Values Before vs After ‚Äì {plant}",
                barmode='group'
            )
        else:
            fig_missing.update_layout(
                title=f"No Missing Values Detected ‚Äì {plant}"
            )
        
        # =============================
        # 2. WEATHER OUTLIERS (IQR)
        # =============================
        fig_weather = go.Figure()
        for col in weather_c:
            if col in df_before.columns:
                fig_weather.add_box(
                    y=df_before[col],
                    name=f"{col} (Before)",
                    boxpoints='outliers'
                )
                fig_weather.add_box(
                    y=df_after[col],
                    name=f"{col} (After)",
                    boxpoints='outliers'
                )
        
        fig_weather.update_layout(
            title=f"Weather Columns ‚Äì IQR Outlier Treatment (Before vs After) ‚Äì {plant}",
            boxmode='group'
        )
        
        # =============================
        # 3. POWER / OTHER OUTLIERS (5‚Äì95% Capping)
        # =============================
        fig_power = go.Figure()
        for col in (power_c + other_c):
            if col in df_before.columns:
                fig_power.add_box(
                    y=df_before[col],
                    name=f"{col} (Before)",
                    boxpoints='outliers'
                )
                fig_power.add_box(
                    y=df_after[col],
                    name=f"{col} (After)",
                    boxpoints='outliers'
                )
        
        fig_power.update_layout(
            title=f"Power & Other Columns ‚Äì Percentile Capping (Before vs After) ‚Äì {plant}",
            boxmode='group'
        )
        
        # =============================
        # 4. Non-Null Count per Column
        # =============================
        non_null_before = df_before.count()
        non_null_after = df_after.count()
        
        fig_nonnull = go.Figure()
        fig_nonnull.add_bar(
            x=non_null_before.index,
            y=non_null_before.values,
            name='Before'
        )
        fig_nonnull.add_bar(
            x=non_null_after.index,
            y=non_null_after.values,
            name='After'
        )
        
        fig_nonnull.update_layout(
            title=f"Non-Null Count per Column ‚Äì {plant}",
            barmode='group'
        )
        
        # =============================
        # 5. Summary Table
        # =============================
        display(widgets.HTML(f"""
        <h3>Preprocessing Summary ‚Äì {plant}</h3>
        <table border="1" style="border-collapse:collapse">
            <tr><th>Metric</th><th>Before</th><th>After</th></tr>
            <tr><td>Rows</td><td>{len(df_before)}</td><td>{len(df_after)}</td></tr>
            <tr><td>Columns</td><td>{df_before.shape[1]}</td><td>{df_after.shape[1]}</td></tr>
            <tr><td>Total Missing Values</td>
                <td>{df_before.isnull().sum().sum()}</td>
                <td>{df_after.isnull().sum().sum()}</td>
            </tr>
            <tr><td>Weather Columns (IQR)</td><td colspan="2">{', '.join(weather_c)}</td></tr>
            <tr><td>Power Columns (5‚Äì95%)</td><td colspan="2">{', '.join(power_c)}</td></tr>
        </table>
        """))
        
        # =============================
        # Display Plots
        # =============================
        fig_missing.show()
        fig_weather.show()
        fig_power.show()
        fig_nonnull.show()

# -----------------------------
# Bind & Display
# -----------------------------
run_button.on_click(run_dashboard)

display(widgets.VBox([
    plant_dropdown,
    run_button,
    output
]))


VBox(children=(Dropdown(description='Plant:', options=('Gibe I', 'Gibe III', 'Koka', 'Tana Beles', 'Tekeze', '‚Ä¶

# # 5. Model Architecture & Training Utilities
Defines the deep learning models (LSTM, GRU) and machine learning regressors (XGBoost). It includes the `train_predict_evaluate` function which handles feature engineering (seasonality), scaling, sequence generation, model training, and performance evaluation

In [20]:
import pandas as pd
import numpy as np
import datetime
import warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

warnings.filterwarnings('ignore')

# ---------- HELPERS ----------

def prepare_features_direct(df, target_col, feature_cols=None):
    """
    Prepares data in the 'XGBoost Style': 
    - Adds time features (Year, Month, Day)
    - Keep specified feature columns (weather, etc)
    - Target column
    """
    df = df.copy()
    if feature_cols is None:
        # Default fallback if not provided: use all numeric except target
        feature_cols = [c for c in df.select_dtypes(include=np.number).columns if c != target_col]
    
    # Ensure date index or column availability (assuming df index is datetime based on context)
    # If index is not datetime, try to find a date column or convert index
    if not isinstance(df.index, pd.DatetimeIndex):
         # Try to find a date column
         date_col = next((c for c in df.columns if 'date' in c.lower()), None)
         if date_col:
             df[date_col] = pd.to_datetime(df[date_col])
             df = df.set_index(date_col)
    
    df['month'] = df.index.month
    df['day'] = df.index.day
    df['dayofyear'] = df.index.dayofyear
    df['year'] = df.index.year
    
    # Select features X and target y
    # X includes: Provided Features + Time Features
    x_cols = feature_cols + ['month', 'day', 'dayofyear', 'year']
    
    # Handle missing values in X (important for LSTM)
    df[x_cols] = df[x_cols].fillna(df[x_cols].median())
    df = df.dropna(subset=[target_col])
    
    X = df[x_cols].values
    y = df[target_col].values
    
    return X, y, x_cols, df

def create_future_dataframe(df_historical, feature_cols, forecast_years):
    """
    Generates future X input data by estimating future weather features 
    using historical monthly medians (Seasonality Preservation).
    """
    last_date = df_historical.index[-1]
    start_date = last_date + datetime.timedelta(days=1)
    # Calculate end date based on float years (approx)
    days_to_predict = int(forecast_years * 365)
    end_date = start_date + datetime.timedelta(days=days_to_predict)
    
    future_dates = pd.date_range(start=start_date, end=end_date, freq='D')
    future_df = pd.DataFrame(index=future_dates)
    
    future_df['month'] = future_df.index.month
    future_df['day'] = future_df.index.day
    future_df['dayofyear'] = future_df.index.dayofyear
    future_df['year'] = future_df.index.year
    
    # Fill weather features using historical monthly medians
    df_historical['month'] = df_historical.index.month # Ensure month col exists
    monthly_stats = df_historical.groupby('month')[feature_cols].median().to_dict('index')
    
    for col in feature_cols:
        if col in df_historical.columns:
            future_df[col] = future_df['month'].map(lambda x: monthly_stats.get(x, {}).get(col, 0))
        else:
            future_df[col] = 0
            
    # Assemble final X matrix
    x_cols = feature_cols + ['month', 'day', 'dayofyear', 'year']
    return future_df[x_cols].values, future_dates

def build_dl_regressor(model_type, input_dim):
    """
    Builds an LSTM or GRU model configured as a direct regressor 
    (Input: [1, Features], Output: [1])
    """
    model = Sequential()
    # Reshape input to (Samples, 1, Features) implicitly via input_shape
    if model_type == 'LSTM':
        model.add(LSTM(64, input_shape=(1, input_dim), return_sequences=False))
    elif model_type == 'GRU':
        model.add(GRU(64, input_shape=(1, input_dim), return_sequences=False))
        
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1)) # Linear output for regression
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# ---------- MAIN FUNCTION ----------
def train_predict_evaluate(
    df: pd.DataFrame,
    target_col: str,
    selected_model: str,
    forecast_years: float,
    # Kept arguments for compatibility but they are less relevant in direct mode
    seq_length: int = 1, 
    energy_lags: int = 0,
    keep_predictor_names = None,
    xgb_params: dict = None
):
    """
    Train selected_model(s) and forecast using Direct Regression Strategy.
    
    Logic matches the XGBoost style:
    1. Train on [Features + Time] -> [Target]
    2. Create Future Data using Seasonal Estimation
    3. Predict entirely at once (No recursive loops)
    """
    
    # 1. Setup Defaults
    if xgb_params is None:
        xgb_params = dict(
            n_estimators=100, learning_rate=0.03, max_depth=6,
            subsample=0.9, colsample_bytree=0.9,
            objective='reg:squarederror', n_jobs=-1, random_state=42
        )

    if target_col not in df.columns:
        return None, None, None, None, f"Target {target_col} not found!"
        
    # Identify feature columns (everything numeric except target)
    feature_cols = [c for c in df.select_dtypes(include=np.number).columns if c != target_col]
    
    # 2. Prepare Data (XGBoost Style)
    X, y, x_col_names, df_processed = prepare_features_direct(df, target_col, feature_cols)
    
    # Scale X for Neural Nets compatibility (XGB handles unscaled fine, but scaling hurts nothing)
    scaler_x = MinMaxScaler()
    X_scaled = scaler_x.fit_transform(X)
    
    # Scale Y usually helps DL convergence
    scaler_y = MinMaxScaler()
    y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()
    
    # Split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, shuffle=False)
    
    # 3. Train Models
    models = {}
    history = {}
    
    if selected_model in ['Ensemble', 'Weighted_Avg']:
        types_to_train = ['LSTM', 'GRU', 'XGBoost']
    else:
        types_to_train = [selected_model]
        
    input_dim = X_train.shape[1]
    
    for m_type in types_to_train:
        print(f"Training {m_type}...")
        
        if m_type == 'XGBoost':
            model = xgb.XGBRegressor(**xgb_params)
            model.fit(X_train, y_train)
            models[m_type] = model
            history[m_type] = {} # XGB doesn't return keras-style history objects
            
        elif m_type in ('LSTM', 'GRU'):
            # Reshape X for RNN: (Samples, Timesteps=1, Features)
            X_train_dl = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
            X_test_dl = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
            
            model = build_dl_regressor(m_type, input_dim)
            es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=0)
            
            hist = model.fit(
                X_train_dl, y_train,
                validation_data=(X_test_dl, y_test),
                epochs=50, 
                batch_size=32, 
                verbose=0,
                callbacks=[es]
            )
            models[m_type] = model
            history[m_type] = hist.history

    # 4. Calculation Weights (if Weighted_Avg)
    weights = {}
    if selected_model == 'Weighted_Avg':
        errors = {}
        for name, m in models.items():
            if name == 'XGBoost':
                p = m.predict(X_test)
            else:
                X_test_dl = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
                p = m.predict(X_test_dl, verbose=0).flatten()
            errors[name] = np.sqrt(mean_squared_error(y_test, p))
        
        # Inverse error weighting
        inv = {k: 1.0 / (v + 1e-6) for k, v in errors.items()}
        total = sum(inv.values())
        weights = {k: v / total for k, v in inv.items()}

    # 5. Forecasting (XGBoost Style: Direct Prediction on Future Features)
    X_future, future_dates = create_future_dataframe(df_processed, feature_cols, forecast_years)
    X_future_scaled = scaler_x.transform(X_future)
    
    # Collect predictions from all models
    future_preds_collection = []
    
    # Order models consistent with training
    model_names = list(models.keys())
    
    for name in model_names:
        m = models[name]
        if name == 'XGBoost':
            pred = m.predict(X_future_scaled)
        else:
            X_future_dl = X_future_scaled.reshape((X_future_scaled.shape[0], 1, X_future_scaled.shape[1]))
            pred = m.predict(X_future_dl, verbose=0).flatten()
        future_preds_collection.append(pred)
    
    # Combine Predictions
    if selected_model == 'Weighted_Avg':
        final_scaled_pred = np.zeros_like(future_preds_collection[0])
        for i, name in enumerate(model_names):
            final_scaled_pred += future_preds_collection[i] * weights[name]
            
    elif selected_model == 'Ensemble':
        final_scaled_pred = np.mean(future_preds_collection, axis=0)
        
    else:
        # Single model case
        final_scaled_pred = future_preds_collection[0]
        
    # Inverse Transform Target (Scale back to original units)
    final_forecast = scaler_y.inverse_transform(final_scaled_pred.reshape(-1, 1)).flatten()
    
    # 6. Metrics Calculation (on Test Set)
    test_preds_collection = []
    for name in model_names:
        m = models[name]
        if name == 'XGBoost':
            p = m.predict(X_test)
        else:
            X_test_dl = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
            p = m.predict(X_test_dl, verbose=0).flatten()
        test_preds_collection.append(p)
        
    if selected_model == 'Weighted_Avg':
        final_test_pred_scaled = np.zeros_like(test_preds_collection[0])
        for i, name in enumerate(model_names):
            final_test_pred_scaled += test_preds_collection[i] * weights[name]
    elif selected_model == 'Ensemble':
        final_test_pred_scaled = np.mean(test_preds_collection, axis=0)
    else:
        final_test_pred_scaled = test_preds_collection[0]
        
    # Inverse Test Metrics
    inv_y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()
    inv_pred_test = scaler_y.inverse_transform(final_test_pred_scaled.reshape(-1, 1)).flatten()
    
    mse = mean_squared_error(inv_y_test, inv_pred_test)
    mae = mean_absolute_error(inv_y_test, inv_pred_test)
    r2 = r2_score(inv_y_test, inv_pred_test)
    
    metrics_str = f"{selected_model} Results:\nMSE: {mse:.2f}\nMAE: {mae:.2f}\nR2: {r2:.4f}"
    
    if selected_model == 'Weighted_Avg':
        w_str = "\n".join([f"{k}: {v:.3f}" for k, v in weights.items()])
        metrics_str += f"\n\nWeights:\n{w_str}"

    return final_forecast, history, metrics_str, inv_y_test, inv_pred_test

# # 6. Forecasting Dashboard (Deployment)
 A widget-based control panel allows users to select a plant, target variable, and model to generate future forecasts. It displays the forecast plot, training loss history, and evaluation metrics, and provides a CSV download of the results

In [25]:
# %% [code]
import ipywidgets as widgets
from IPython.display import display, FileLink
import plotly.graph_objects as go
import pandas as pd
import datetime

# Ensure plants are loaded from data_store
plants = list(data_store.keys())

# --- Widgets ---
w_plant = widgets.Dropdown(
    options=plants,
    value=plants[0] if plants else None,
    description='Plant:',
    style={'description_width': 'initial'}
)

w_target = widgets.Dropdown(
    description='Target:',
    style={'description_width': 'initial'}
)

w_model = widgets.Dropdown(
    options=['LSTM', 'GRU', 'XGBoost', 'Ensemble', 'Weighted_Avg'],
    value='Weighted_Avg',
    description='Model:',
    style={'description_width': 'initial'}
)

w_years = widgets.IntSlider(
    min=1,
    max=10,
    step=1,
    value=4,
    description='Years:',
    style={'description_width': 'initial'}
)

w_run = widgets.Button(
    description='Run Forecast',
    button_style='success',
    icon='play'
)

output_area = widgets.Output()

# --- Logic: Update Targets to Only Energy & Water_Level ---
def update_targets(change=None):
    if not w_plant.value:
        return
    df = data_store[w_plant.value]
    
    options = []
    # Explicitly check and add ONLY these two columns
    desired_targets = ['Energy', 'Water_Level']
    
    for t in desired_targets:
        if t in df.columns:
            options.append(t)
            
    # Note: We removed the loop that added other numeric columns
    # so the dropdown will ONLY show Energy or Water_Level
    
    w_target.options = options
    w_target.value = options[0] if options else None

w_plant.observe(update_targets, names='value')
update_targets() # Initialize

# --- Run Logic ---
# Updated run_forecast (notebook cell) ‚Äî show up to 7 years of historical data for clearer seasonality
def run_forecast(b):
    output_area.clear_output()
    with output_area:
        plant = w_plant.value
        target = w_target.value
        model_type = w_model.value
        years = w_years.value

        if not plant or not target:
            print("Please select all options.")
            return

        print(f"üîÑ Running {model_type} forecast for {plant} - Target: {target} ({years} years)...")
        
        try:
            # Calls the train_predict_evaluate function from the previous cell
            forecast, history, metrics_txt, val_actual, val_pred = train_predict_evaluate(
                data_store[plant], target, model_type, years
            )
        except Exception as e:
            print(f"‚ùå Error: {e}")
            return

        if forecast is None:
            print("‚ùå Forecast failed.")
            return

        # Ensure the index is datetime
        df_plant = data_store[plant].copy()
        if not pd.api.types.is_datetime64_any_dtype(df_plant.index):
            try:
                df_plant.index = pd.to_datetime(df_plant.index)
            except Exception:
                print("‚ùå Error: plant dataframe index is not datetime and could not be converted.")
                return

        # Dates Calculation for forecast
        last_date = df_plant.index[-1]
        future_dates = [last_date + datetime.timedelta(days=i) for i in range(1, len(forecast) + 1)]

        # HISTORICAL RANGE: up to 7 years (adjust if less data available)
        max_hist_days = 7 * 365
        available_days = len(df_plant)
        hist_days = min(available_days, max_hist_days)
        hist_series = df_plant[target].iloc[-hist_days:].copy()

        # 1. Forecast Plot (7-year historical)
        fig_forecast = go.Figure()
        fig_forecast.add_trace(
            go.Scatter(
                x=hist_series.index,
                y=hist_series.values,
                mode='lines',
                name=f'Historical (last {hist_days} days ‚âà {hist_days//365} years)',
                line=dict(color='blue', width=2),
                opacity=0.8
            )
        )
        fig_forecast.add_trace(
            go.Scatter(
                x=future_dates,
                y=forecast,
                mode='lines+markers',
                name='Forecast',
                line=dict(color='red', width=2),
                marker=dict(size=4)
            )
        )
        # Improve readability: vertical line separating history and forecast
        fig_forecast.add_vline(x=last_date, line=dict(color='gray', dash='dash'))
        fig_forecast.update_layout(
            title=f"{model_type} Forecast: {target} ({years} Years) ‚Äî Historical shown up to 7 years",
            xaxis_title="Date",
            yaxis_title=target,
            height=500,
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
        )
        fig_forecast.show()

        # 2. Training Loss Plot (defensive plotting)
        fig_loss = go.Figure()
        for m_name, h in (history or {}).items():
            if not isinstance(h, dict):
                continue
            train_loss = h.get('loss') or h.get('training_loss') or []
            val_loss = h.get('val_loss') or h.get('validation_loss') or []
            if train_loss:
                fig_loss.add_trace(go.Scatter(
                    x=list(range(1, len(train_loss) + 1)),
                    y=train_loss,
                    mode='lines',
                    name=f'{m_name} Train Loss'
                ))
            if val_loss:
                fig_loss.add_trace(go.Scatter(
                    x=list(range(1, len(val_loss) + 1)),
                    y=val_loss,
                    mode='lines',
                    name=f'{m_name} Val Loss',
                    line=dict(dash='dash')
                ))
        if not fig_loss.data:
            print("\nNo training history available to plot (no 'loss' or 'val_loss' keys).")
        else:
            fig_loss.update_layout(
                title="Training Loss",
                xaxis_title="Epoch",
                yaxis_title="Loss",
                height=350
            )
            fig_loss.show()

        # 3. Metrics
        print("\n‚úÖ Evaluation Metrics:")
        print(metrics_txt)
        
        # 4. CSV Download
        df_out = pd.DataFrame({
            'Date': future_dates,
            f'Forecast_{target}': forecast
        })
        filename = f"{plant}_{target}_Forecast_{years}Y.csv".replace(" ", "_")
        df_out.to_csv(filename, index=False)
        print("\n‚¨áÔ∏è Download Forecast:")
        display(FileLink(filename))

w_run.on_click(run_forecast)

# --- Layout ---
ui = widgets.VBox([
    widgets.HTML("<h2 style='text-align:center;'>EEP Hydropower Forecasting Dashboard (Kaggle)</h2>"),
    widgets.HBox([w_plant, w_target]),
    widgets.HBox([w_model, w_years]),
    w_run,
    output_area
])

display(ui)

VBox(children=(HTML(value="<h2 style='text-align:center;'>EEP Hydropower Forecasting Dashboard (Kaggle)</h2>")‚Ä¶