# Transformer Model

In [1]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy
import yfinance as yf
from sklearn.preprocessing import StandardScaler

# Set random seed
np.random.seed(42)

# Load and clean the CSV
df = pd.read_csv("../Data/^GSPC.csv")
df = df[df["Price"] != "Ticker"]
df = df[df["Price"] != "Date"]
df[['Close','High', 'Low', 'Open', 'Volume']] = df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
df['Price'] = pd.to_datetime(df['Price'])
df.set_index('Price', inplace=True)
df.index.name = None

# Define feature generation
def add_features(data):
    """Create additional technical indicators and prediction target while keeping alignment intact"""
    df = data.copy()

    # Technical indicators
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['Volatility'] = df['Close'].pct_change().rolling(window=10).std()
    df['Momentum'] = df['Close'] - df['Close'].shift(5)
    df['Return'] = df['Close'].pct_change()

    # Feature engineering
    df['X_MA5'] = (df['Close'] - df['MA5']) / df['Close']
    df['X_MA10'] = (df['Close'] - df['MA10']) / df['Close']
    df['X_MA20'] = (df['Close'] - df['MA20']) / df['Close']
    df['X_MA5_10'] = (df['MA5'] - df['MA10']) / df['Close']
    df['X_MA10_20'] = (df['MA10'] - df['MA20']) / df['Close']
    df['X_Volatility'] = df['Volatility']
    df['X_Momentum'] = df['Momentum']
    df['X_Return'] = df['Return']
    df['X_Return_5'] = df['Return'].rolling(5).sum()
    df['X_VOL_CHG'] = df['Volume'].pct_change(5)

    # Target: use binary or multi-class depending on your setup
    df['Target'] = np.where(df['Return'].shift(-1) > 0.005, 1,
                    np.where(df['Return'].shift(-1) < -0.005, -1, 0))

    return df  # ❗ Keep full index, no dropna()

# Helper functions to extract features and labels
def get_X(data):
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    return data.Target.values

# Apply features
df = add_features(df)




In [7]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import Adam

class SimpleTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=32, nhead=4, num_layers=2):
        super().__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, num_classes)
        
    def forward(self, x):
        # Input shape: [batch_size, seq_len, features]
        x = self.embedding(x)
        x = self.transformer_encoder(x)
        # Take the last sequence element for prediction
        x = x[:, -1, :]
        x = self.fc(x)
        return x

class TransformerStrategy(Strategy):
    price_delta = 0.01
    lookback_window = 200
    d_model = 32
    nhead = 4
    num_layers = 2
    learning_rate = 0.001
    batch_size = 32
    epochs = 3
    
    def init(self):
        # Feature + label prep
        full_df = add_features(self.data.df).fillna(method='ffill').fillna(method='bfill')
        self.all_data = full_df.loc[self.data.df.index.intersection(full_df.index)]
        
        # Extract features
        self.features = [col for col in self.all_data.columns if col.startswith("X_")]
        self.input_dim = len(self.features)
        
        # Initialize model
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = SimpleTransformer(
            input_dim=self.input_dim,
            num_classes=3,  # -1, 0, 1
            d_model=self.d_model,
            nhead=self.nhead,
            num_layers=self.num_layers
        ).to(self.device)
        
        self.optimizer = Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.CrossEntropyLoss()
        
        # Prediction tracking
        self.predictions = []
        self.actuals = []
        
    def next(self):
        # Current position in the backtest
        current_idx = len(self.data) - 1
        
        # Safety check - make sure we have enough data
        if current_idx < self.lookback_window:
            return
            
        # Get current data index
        current_candle_time = self.data.index[-1]
        
        try:
            # Find corresponding index in our all_data DataFrame
            idx_in_all_data = self.all_data.index.get_indexer([current_candle_time])[0]
            
            # Check if valid index found
            if idx_in_all_data < 0 or idx_in_all_data < self.lookback_window:
                return
                
            # Safe slicing with loc instead of iloc
            start_idx = self.all_data.index[idx_in_all_data - self.lookback_window + 1]
            end_idx = self.all_data.index[idx_in_all_data]
            train_data = self.all_data.loc[start_idx:end_idx].iloc[:-1]  # All but last point
            test_data = self.all_data.loc[end_idx:end_idx]  # Just the last point
            
            # Check if we have enough data
            if len(train_data) < self.lookback_window // 2:
                return
                
            # Extract features and target
            X_train = train_data[self.features].values
            y_train = train_data["Target"].values
            X_test = test_data[self.features].values
            y_true = test_data["Target"].values[0]
            
            # Convert to PyTorch tensors
            X_train_tensor = torch.FloatTensor(X_train).to(self.device)
            y_train_tensor = torch.LongTensor(y_train).to(self.device)
            # Reshape for transformer (batch, sequence, features)
            X_train_tensor = X_train_tensor.unsqueeze(0)  # [1, seq_len, features]
            
            # Train model dynamically on each call - FIX: Use the last target only
            self.model.train()
            for _ in range(self.epochs):
                self.optimizer.zero_grad()
                outputs = self.model(X_train_tensor)
                # Fix: use only the single prediction output and target
                loss = self.criterion(outputs, y_train_tensor[-1].unsqueeze(0))
                loss.backward()
                self.optimizer.step()
            
            # Make prediction
            self.model.eval()
            with torch.no_grad():
                X_test_tensor = torch.FloatTensor(X_test).unsqueeze(0).to(self.device)
                outputs = self.model(X_test_tensor)
                _, predicted = torch.max(outputs, 1)
                # Map from 0,1,2 to -1,0,1
                prediction_map = {0: -1, 1: 0, 2: 1}
                prediction = prediction_map[predicted.item()]
                
            # Log predictions and labels
            if prediction in [-1, 0, 1]:
                self.predictions.append(prediction)
                self.actuals.append(int(y_true))
                
            # Trade execution with take-profit and stop-loss
            close = self.data.Close[-1]
            tp_long = close * (1 + self.price_delta)
            sl_long = close * (1 - self.price_delta)
            tp_short = close * (1 - self.price_delta)
            sl_short = close * (1 + self.price_delta)
            
            # Trading logic
            if prediction == 1:
                if not self.position.is_long:
                    self.position.close()
                    self.buy(tp=tp_long, sl=sl_long)
                    
            elif prediction == -1:
                if not self.position.is_short:
                    self.position.close()
                    self.sell(tp=tp_short, sl=sl_short)
                    
            # Risk management with trailing stop
            for trade in self.trades:
                if self.data.index[-1] - trade.entry_time > pd.Timedelta(days=5):
                    if trade.is_long:
                        trade.sl = max(trade.sl, trade.entry_price)
                    else:
                        trade.sl = min(trade.sl, trade.entry_price)
                        
        except (IndexError, KeyError, RuntimeError) as e:
            # Handle exceptions gracefully to avoid breaking the backtest
            pass

In [8]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

def evaluate_strategy(y_true, y_pred):

    print("Evaluation Metrics")
    print("Accuracy:", round(accuracy_score(y_true, y_pred), 4))
    print("Precision (macro):", round(precision_score(y_true, y_pred, average='macro'), 4))
    print("F1 Score (macro):", round(f1_score(y_true, y_pred, average='macro'), 4))

    cm = confusion_matrix(y_true, y_pred, labels=[-1, 0, 1])
    disp = ConfusionMatrixDisplay(cm, display_labels=['Down (-1)', 'Neutral (0)', 'Up (1)'])
    disp.plot(cmap='Blues')
    plt.title("Confusion Matrix")
    plt.show()

In [9]:
bt = Backtest(df, TransformerStrategy, cash=10_000, commission=.0002)
backtest = bt.run()
backtest

  full_df = add_features(self.data.df).fillna(method='ffill').fillna(method='bfill')


Backtest.run:   0%|          | 0/986 [00:00<?, ?bar/s]

Start                     2020-01-30 00:00:00
End                       2023-12-29 00:00:00
Duration                   1429 days 00:00:00
Exposure Time [%]                    44.78217
Equity Final [$]                   8262.92706
Equity Peak [$]                   10074.78499
Commissions [$]                     580.20524
Return [%]                          -17.37073
Buy & Hold Return [%]                45.25956
Return (Ann.) [%]                    -4.75489
Volatility (Ann.) [%]                 4.94544
CAGR [%]                             -3.30883
Sharpe Ratio                         -0.96147
Sortino Ratio                        -1.27674
Calmar Ratio                         -0.26276
Alpha [%]                           -14.29779
Beta                                  -0.0679
Max. Drawdown [%]                   -18.09604
Avg. Drawdown [%]                    -6.77822
Max. Drawdown Duration     1061 days 00:00:00
Avg. Drawdown Duration      376 days 00:00:00
# Trades                          

In [10]:
bt.plot()