In [4]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy
from xgboost import XGBClassifier

# Set random seed
np.random.seed(42)

# Load and clean the CSV
df = pd.read_csv("../Data/^GSPC.csv")
df = df[df["Price"] != "Ticker"]
df = df[df["Price"] != "Date"]
df[['Close','High', 'Low', 'Open', 'Volume']] = df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
df['Price'] = pd.to_datetime(df['Price'])
df.set_index('Price', inplace=True)
df.index.name = None

# Define feature generation
def add_features(data):
    """Create additional technical indicators and prediction target while keeping alignment intact"""
    df = data.copy()

    # Technical indicators
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['Volatility'] = df['Close'].pct_change().rolling(window=10).std()
    df['Momentum'] = df['Close'] - df['Close'].shift(5)
    df['Return'] = df['Close'].pct_change()

    # Feature engineering
    df['X_MA5'] = (df['Close'] - df['MA5']) / df['Close']
    df['X_MA10'] = (df['Close'] - df['MA10']) / df['Close']
    df['X_MA20'] = (df['Close'] - df['MA20']) / df['Close']
    df['X_MA5_10'] = (df['MA5'] - df['MA10']) / df['Close']
    df['X_MA10_20'] = (df['MA10'] - df['MA20']) / df['Close']
    df['X_Volatility'] = df['Volatility']
    df['X_Momentum'] = df['Momentum']
    df['X_Return'] = df['Return']
    df['X_Return_5'] = df['Return'].rolling(5).sum()
    df['X_VOL_CHG'] = df['Volume'].pct_change(5)

    # Target: use binary or multi-class depending on your setup
    df['Target'] = np.where(df['Return'].shift(-1) > 0.005, 1,
                    np.where(df['Return'].shift(-1) < -0.005, -1, 0))

    return df  # ❗ Keep full index, no dropna()

# Helper functions to extract features and labels
def get_X(data):
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    return data.Target.values

# Apply features
df = add_features(df)

In [None]:
class XGBoostStrategy2(Strategy):
    def init(self):
        self.model = XGBClassifier(n_estimators=50, max_depth=3)
        
        # Simple parameters
        self.lookback = 20  # Bars to use for features
        self.train_size = 100  # Minimum data needed
        
        # Track if model is trained
        self.trained = False
        self.predictions = []
        self.actuals = []

    def next(self):
        # Wait for enough data
        if len(self.data) < self.train_size + self.lookback:
            return
            
        # Train model once at the beginning
        if not self.trained:
            # Create simple features from closing prices
            X = []
            y = []
            
            for i in range(self.lookback, len(self.data) - 1):
                # Features: price changes over lookback period
                features = [self.data.Close[i-j]/self.data.Close[i-j-1] - 1 for j in range(self.lookback)]
                # Target: next day's direction (1=up, 0=down)
                target = 1 if self.data.Close[i+1] > self.data.Close[i] else 0
                X.append(features)
                y.append(target)
            
            # Train model
            self.model.fit(X, y)
            self.trained = True
        
        # Make prediction for current bar
        features = [self.data.Close[-i-1]/self.data.Close[-i-2] - 1 for i in range(self.lookback)]
        prediction = self.model.predict([features])[0]

        y_true = self.y_all[i]
        if prediction in [0,1,2]:
            self.predictions.append(prediction)
            self.actuals.append(int(y_true))
        
        # Simple trading: all-in based on prediction
        if prediction == 1 and not self.position.is_long:
            # Close any short positions
            if self.position.is_short:
                self.position.close()
            # Go all-in long
            self.buy(size=.5)
        elif prediction == 0 and not self.position.is_short:
            # Close any long positions
            if self.position.is_long:
                self.position.close()
            # Go all-in short
            self.sell(size=.6)

In [43]:
bt = Backtest(df, XGBoostStrategy2, cash=10_000, commission=.0002)
backtest = bt.run()
backtest

Backtest.run:   0%|          | 0/986 [00:00<?, ?it/s]

Start                     2020-01-30 00:00:00
End                       2023-12-29 00:00:00
Duration                   1429 days 00:00:00
Exposure Time [%]                    87.23404
Equity Final [$]                  10739.23737
Equity Peak [$]                   11082.13716
Commissions [$]                      393.7777
Return [%]                            7.39237
Buy & Hold Return [%]                45.25956
Return (Ann.) [%]                     1.83759
Volatility (Ann.) [%]                 6.26033
CAGR [%]                              1.26563
Sharpe Ratio                          0.29353
Sortino Ratio                         0.42183
Calmar Ratio                          0.14136
Alpha [%]                              1.9764
Beta                                  0.11966
Max. Drawdown [%]                   -12.99922
Avg. Drawdown [%]                     -1.3831
Max. Drawdown Duration      730 days 00:00:00
Avg. Drawdown Duration       58 days 00:00:00
# Trades                          