# Temporal Fusion Transformer for Electricity Price Forecasting

This notebook implements TFT using PyTorch Forecasting for weekly electricity price prediction.

In [None]:
# Install required packages (uncomment if needed)
# !pip install pytorch-forecasting pytorch-lightning torch pandas numpy matplotlib scikit-learn openpyxl xlrd

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt
import warnings
warnings.filterwarnings('ignore')

import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss, RMSE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

print(f"PyTorch version: {torch.__version__}")
print(f"PyTorch Lightning version: {pl.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 1. Data Preparation

In [None]:
def prepare_tft_data(csv_path, sheet_name, date_column='ini_date'):
    """
    Prepare data for Temporal Fusion Transformer.
    
    Args:
        csv_path: Path to the Excel file
        sheet_name: Name of the sheet to read
        date_column: Name of the date column
    
    Returns:
        DataFrame ready for TFT training
    """
    # Read data
    df = pd.read_excel(csv_path, sheet_name=sheet_name)
    
    # Convert date to datetime
    df[date_column] = pd.to_datetime(df[date_column])
    
    # Sort by date
    df = df.sort_values(date_column).reset_index(drop=True)
    
    # Create time index (sequential integer for each time step)
    df['time_idx'] = range(len(df))
    
    # Create group identifier (needed for TFT, use submarket if available, else constant)
    if 'submarket' in df.columns:
        df['group'] = df['submarket']
    else:
        df['group'] = sheet_name
    
    # Add temporal features
    df['month'] = df[date_column].dt.month
    df['quarter'] = df[date_column].dt.quarter
    df['year'] = df[date_column].dt.year
    df['week_of_year'] = df[date_column].dt.isocalendar().week
    
    # Handle missing values
    df = df.fillna(method='ffill').fillna(method='bfill')
    
    return df

In [None]:
# Configuration
CSV_PATH = 'FINAL_INPUTS_v2.xls'
SHEET_NAME = 'southeast'  # Change to 'northeast', 'north', 'south' as needed
MAX_PREDICTION_LENGTH = 1  # Forecast horizon (1 week ahead)
MAX_ENCODER_LENGTH = 52    # Look back window (52 weeks = 1 year)
TRAINING_CUTOFF = None     # Will be set based on 70% split
BATCH_SIZE = 64
MAX_EPOCHS = 100

# Load and prepare data
df = prepare_tft_data(CSV_PATH, SHEET_NAME)

print(f"Data shape: {df.shape}")
print(f"Date range: {df['ini_date'].min()} to {df['ini_date'].max()}")
print(f"\nFirst few rows:")
print(df.head())
print(f"\nColumns: {df.columns.tolist()}")

In [None]:
# Set training cutoff (70% for training, 30% for validation)
TRAINING_CUTOFF = int(len(df) * 0.7)
print(f"Training samples: {TRAINING_CUTOFF}")
print(f"Validation samples: {len(df) - TRAINING_CUTOFF}")
print(f"Training cutoff time_idx: {df.iloc[TRAINING_CUTOFF]['time_idx']}")

## 2. Create TimeSeriesDataSet

In [None]:
# Define features
# Target variable
target = 'pld'

# Time-varying known features (known in the future)
# These are typically calendar features or planned interventions
time_varying_known_reals = []  # Add if you have future-known continuous variables
time_varying_known_categoricals = ['month', 'quarter', 'week_of_year']

# Time-varying unknown features (only known up to present)
# These are the features we want to use for prediction but don't know their future values
time_varying_unknown_reals = [
    'load_energy',
    'max_demand',
    'ena',
    'hidro_gen',
    'thermo_gen',
    'stored_energy',
    'exports',
    'imports'
]

# Static features (constant for each group)
static_categoricals = ['group']
static_reals = []  # Add if you have static continuous features

# Create training dataset
training = TimeSeriesDataSet(
    df[lambda x: x.time_idx <= TRAINING_CUTOFF],
    time_idx='time_idx',
    target=target,
    group_ids=['group'],
    min_encoder_length=MAX_ENCODER_LENGTH // 2,  # Allow some flexibility
    max_encoder_length=MAX_ENCODER_LENGTH,
    min_prediction_length=1,
    max_prediction_length=MAX_PREDICTION_LENGTH,
    static_categoricals=static_categoricals,
    static_reals=static_reals,
    time_varying_known_categoricals=time_varying_known_categoricals,
    time_varying_known_reals=time_varying_known_reals,
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=time_varying_unknown_reals + [target],  # Add target to features
    target_normalizer=GroupNormalizer(
        groups=['group'], transformation='softplus'
    ),  # Use softplus for positive values
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=False
)

# Create validation dataset
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

# Create dataloaders
train_dataloader = training.to_dataloader(train=True, batch_size=BATCH_SIZE, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=BATCH_SIZE * 10, num_workers=0)

print(f"Training dataset size: {len(training)}")
print(f"Validation dataset size: {len(validation)}")

## 3. Configure and Train TFT Model

In [None]:
# Configure callbacks
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,
    patience=10,
    verbose=False,
    mode="min"
)

lr_logger = LearningRateMonitor()

# Configure logger
logger = TensorBoardLogger("lightning_logs")

# Create trainer
trainer = pl.Trainer(
    max_epochs=MAX_EPOCHS,
    accelerator="auto",  # Automatically uses GPU if available
    devices="auto",
    gradient_clip_val=0.1,
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
    enable_model_summary=True,
)

print("Trainer configured successfully")

In [None]:
# Configure the TFT model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=64,  # Size of hidden layers
    attention_head_size=4,  # Number of attention heads
    dropout=0.1,
    hidden_continuous_size=32,  # Size of hidden continuous variable processing
    loss=QuantileLoss(),  # Use quantile loss for probabilistic forecasting
    log_interval=10,  # Log every 10 batches
    reduce_on_plateau_patience=4,  # Reduce learning rate if no improvement
    optimizer="ranger",  # Use Ranger optimizer (combination of RAdam and LookAhead)
)

print(f"Model size: {tft.size()/1e3:.1f}k parameters")
print("\nModel architecture:")
print(tft)

In [None]:
# Train the model
print("Starting training...")
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)
print("\nTraining completed!")

## 4. Load Best Model and Evaluate

In [None]:
# Load the best model from checkpoint
best_model_path = trainer.checkpoint_callback.best_model_path
print(f"Loading best model from: {best_model_path}")
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

# Make predictions on validation set
print("\nGenerating predictions on validation set...")
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_tft.predict(val_dataloader, mode="prediction", return_x=False)

print(f"Predictions shape: {predictions.shape}")
print(f"Actuals shape: {actuals.shape}")

In [None]:
# Calculate metrics
actuals_np = actuals.cpu().numpy().flatten()
predictions_np = predictions.cpu().numpy().flatten()

# Remove any NaN values for metric calculation
mask = ~(np.isnan(actuals_np) | np.isnan(predictions_np))
actuals_clean = actuals_np[mask]
predictions_clean = predictions_np[mask]

rmse = sqrt(mean_squared_error(actuals_clean, predictions_clean))
mae = np.mean(np.abs(actuals_clean - predictions_clean))
mape = np.mean(np.abs((actuals_clean - predictions_clean) / actuals_clean)) * 100

print(f"\n{'='*50}")
print(f"PERFORMANCE METRICS ({SHEET_NAME.upper()})")
print(f"{'='*50}")
print(f"RMSE: {rmse:.3f}")
print(f"MAE: {mae:.3f}")
print(f"MAPE: {mape:.2f}%")
print(f"{'='*50}")

## 5. Visualization

In [None]:
# Plot predictions vs actuals
plt.figure(figsize=(16, 6))
plt.plot(actuals_clean, label='Actual', alpha=0.7, linewidth=2)
plt.plot(predictions_clean, label='TFT Prediction', alpha=0.7, linewidth=2)
plt.title(f'TFT Predictions vs Actuals - {SHEET_NAME.upper()}', fontsize=14, fontweight='bold')
plt.xlabel('Time Steps', fontsize=12)
plt.ylabel('Price (PLD)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'tft_predictions_{SHEET_NAME}.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"Plot saved as: tft_predictions_{SHEET_NAME}.png")

In [None]:
# Plot residuals
residuals = actuals_clean - predictions_clean

fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Residuals over time
axes[0].plot(residuals, alpha=0.7)
axes[0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
axes[0].set_title('Residuals Over Time', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Time Steps')
axes[0].set_ylabel('Residual')
axes[0].grid(True, alpha=0.3)

# Residuals distribution
axes[1].hist(residuals, bins=50, edgecolor='black', alpha=0.7)
axes[1].axvline(x=0, color='r', linestyle='--', alpha=0.5)
axes[1].set_title('Residuals Distribution', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Residual')
axes[1].set_ylabel('Frequency')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f'tft_residuals_{SHEET_NAME}.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"Residuals plot saved as: tft_residuals_{SHEET_NAME}.png")

In [None]:
# Scatter plot: Predictions vs Actuals
plt.figure(figsize=(8, 8))
plt.scatter(actuals_clean, predictions_clean, alpha=0.5, s=20)
plt.plot([actuals_clean.min(), actuals_clean.max()], 
         [actuals_clean.min(), actuals_clean.max()], 
         'r--', lw=2, label='Perfect Prediction')
plt.xlabel('Actual Price', fontsize=12)
plt.ylabel('Predicted Price', fontsize=12)
plt.title(f'TFT: Predicted vs Actual - {SHEET_NAME.upper()}', fontsize=14, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'tft_scatter_{SHEET_NAME}.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"Scatter plot saved as: tft_scatter_{SHEET_NAME}.png")

## 6. Feature Importance and Interpretability

In [None]:
# Get interpretation/feature importance
print("Calculating feature importance...")
interpretation = best_tft.interpret_output(val_dataloader.dataset[:100], reduction="sum")

# Plot variable importance
print("\nVariable Importance:")
best_tft.plot_interpretation(interpretation)
plt.savefig(f'tft_interpretation_{SHEET_NAME}.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"Interpretation plot saved as: tft_interpretation_{SHEET_NAME}.png")

## 7. Compare with LSTM Results

In [None]:
# Load LSTM benchmark results if available
try:
    benchmark_df = pd.read_excel('benchmark_consolidated.xlsx', sheet_name=SHEET_NAME)
    
    # Calculate LSTM metrics for comparison
    lstm_rmse = sqrt(mean_squared_error(benchmark_df['actuals'], benchmark_df['predictions']))
    decomp_rmse = sqrt(mean_squared_error(benchmark_df['actuals'], benchmark_df['decomp_predictions_as_of']))
    
    print(f"\n{'='*50}")
    print(f"MODEL COMPARISON ({SHEET_NAME.upper()})")
    print(f"{'='*50}")
    print(f"TFT RMSE:          {rmse:.3f}")
    print(f"LSTM RMSE:         {lstm_rmse:.3f}")
    print(f"DECOMP RMSE:       {decomp_rmse:.3f}")
    print(f"\nTFT Improvement over LSTM:  {((lstm_rmse - rmse) / lstm_rmse * 100):.2f}%")
    print(f"TFT Improvement over DECOMP: {((decomp_rmse - rmse) / decomp_rmse * 100):.2f}%")
    print(f"{'='*50}")
    
except Exception as e:
    print(f"Could not load benchmark data: {e}")

## 8. Save Model and Results

In [None]:
# Save predictions to CSV
results_df = pd.DataFrame({
    'actual': actuals_clean,
    'tft_prediction': predictions_clean,
    'residual': residuals
})

results_df.to_csv(f'tft_results_{SHEET_NAME}.csv', index=False)
print(f"Results saved to: tft_results_{SHEET_NAME}.csv")

# Save model
torch.save(best_tft.state_dict(), f'tft_model_{SHEET_NAME}.pt')
print(f"Model saved to: tft_model_{SHEET_NAME}.pt")

## 9. Make Future Predictions (Optional)

In [None]:
# Get raw predictions with quantiles for uncertainty estimation
raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)

# Plot predictions with uncertainty
for idx in range(min(5, len(raw_predictions.x))):
    best_tft.plot_prediction(raw_predictions.x, raw_predictions.output, idx=idx, add_loss_to_title=True)
    plt.savefig(f'tft_uncertainty_{SHEET_NAME}_sample_{idx}.png', dpi=150, bbox_inches='tight')
    plt.show()

print(f"Uncertainty plots saved")

## Summary

This notebook demonstrates:
1. Data preparation for TFT with proper feature engineering
2. TimeSeriesDataSet creation with appropriate normalizers
3. TFT model configuration and training
4. Comprehensive evaluation metrics (RMSE, MAE, MAPE)
5. Multiple visualization types (predictions, residuals, scatter plots)
6. Feature importance interpretation
7. Comparison with LSTM baseline
8. Uncertainty quantification with prediction intervals

To use this for different submarkets, simply change the `SHEET_NAME` variable to 'northeast', 'north', or 'south'.