In [12]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

# Set random seed
np.random.seed(42)

def compute_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def compute_macd(series, fast=12, slow=26, signal=9):
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line

def compute_stochastic_oscillator(high, low, close, k_period=14, d_period=3):
    lowest_low = low.rolling(window=k_period).min()
    highest_high = high.rolling(window=k_period).max()
    stoch_k = 100 * (close - lowest_low) / (highest_high - lowest_low)
    stoch_d = stoch_k.rolling(window=d_period).mean()
    return stoch_k, stoch_d

def add_features_with_macro(ticker="^GSPC", start="2015-01-01", end="2024-01-01", prediction_horizon=5, big_move_threshold=0.01):
    # Download price data
    df = yf.download(ticker, start=start, end=end)

    # Download macro indicators
    vix = yf.download('^VIX', start=start, end=end)
    tnx = yf.download('^TNX', start=start, end=end)

    # Cleaning the multiindexed columns
    df.columns = [col if isinstance(col, str) else col[0] for col in df.columns]

    # Mid-price & returns
    df['Mid'] = (df['High'] + df['Low']) / 2
    df['Return'] = df['Close'].pct_change()
    df['Mid_Return'] = df['Mid'].pct_change()

    # Technical indicators
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['Volatility'] = df['Return'].rolling(window=10).std()
    df['Momentum'] = df['Close'] - df['Close'].shift(5)
    df['RSI'] = compute_rsi(df['Close'])
    df['MACD'], df['MACD_signal'] = compute_macd(df['Close'])
    df['Stoch_K'], df['Stoch_D'] = compute_stochastic_oscillator(df['High'], df['Low'], df['Close'])

    # Macro indicators (align by date index)
    df['VIX_Close'] = vix['Close']
    df['TNX_Close'] = tnx['Close']

    # Future return target
    df['Future_Return'] = df['Close'].shift(-prediction_horizon) / df['Close'] - 1

    # Multi-class label: big moves only
    df['Target'] = np.where(df['Future_Return'] > big_move_threshold, 1,
                    np.where(df['Future_Return'] < -big_move_threshold, -1, 0))

    # Feature columns (can customize if needed)
    df['X_MA5'] = (df['Close'] - df['MA5']) / df['Close']
    df['X_MA10'] = (df['Close'] - df['MA10']) / df['Close']
    df['X_MA20'] = (df['Close'] - df['MA20']) / df['Close']
    df['X_MA5_10'] = (df['MA5'] - df['MA10']) / df['Close']
    df['X_MA10_20'] = (df['MA10'] - df['MA20']) / df['Close']
    df['X_Volatility'] = df['Volatility']
    df['X_Momentum'] = df['Momentum']
    df['X_Return'] = df['Return']
    df['X_Return_5'] = df['Return'].rolling(5).sum()
    df['X_VOL_CHG'] = df['Volume'].pct_change(5)
    df['X_RSI'] = df['RSI']
    df['X_MACD'] = df['MACD']
    df['X_MACD_signal'] = df['MACD_signal']
    df['X_Stoch_K'] = df['Stoch_K']
    df['X_Stoch_D'] = df['Stoch_D']
    df['X_VIX'] = df['VIX_Close']
    df['X_TNX'] = df['TNX_Close']

    return df  # no dropna(), keep full index

# Helper functions to extract features and labels
def get_X(data):
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    """Return target variable y, mapped to [0, 1, 2]"""
    y = data.Target.values
    return np.where(y == -1, 0, np.where(y == 0, 1, 2))


# Apply features
df = add_features_with_macro()
df = df.dropna(subset=["Open", "High", "Low", "Close", "Volume"])

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [30]:
class XGBoostStrategy(Strategy):
    n_estimators = 100
    max_depth = 5
    learning_rate = 0.1
    train_size = 500
    price_delta = 0.01  # take-profit and stop-loss % (1%)

    def init(self):
        # Load macro-enhanced data
        full_df = add_features_with_macro(ticker="^GSPC").fillna(method='ffill').fillna(method='bfill')

        # Align with backtesting data
        self.all_data = full_df.loc[self.data.df.index.intersection(full_df.index)]

        # Precompute features and mapping
        self.X_all = get_X(self.all_data)
        self.y_all = get_y(self.all_data)
        self.feature_index = {idx: i for i, idx in enumerate(self.all_data.index)}

        # Train XGBoost model once
        self.model = XGBClassifier(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            learning_rate=self.learning_rate,
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42
        )

        # Use only bars with enough data
        if len(self.X_all) >= self.train_size:
            X_train = self.X_all[:self.train_size]
            y_train = self.y_all[:self.train_size]
            self.model.fit(X_train, y_train)

    def next(self):
        current_index = self.data.index[-1]

        # Skip if current index not in all_data
        if current_index not in self.feature_index:
            return

        i = self.feature_index[current_index]
        features = self.X_all[i].reshape(1, -1)

        # Get prediction and confidence
        try:
            pred_class = self.model.predict(features)[0]
            confidence = max(self.model.predict_proba(features)[0])
        except:
            return

        # Map back to original labels: 0 → -1, 1 → 0, 2 → 1
        prediction = pred_class - 1
        close = self.data.Close[-1]
        size = min(0.5, confidence * 0.5)

        tp_long = close * (1 + self.price_delta)
        sl_long = close * (1 - self.price_delta)
        tp_short = close * (1 - self.price_delta)
        sl_short = close * (1 + self.price_delta)

        # Execute trade based on prediction
        if prediction == 1 and not self.position.is_long:
            if self.position.is_short:
                self.position.close()
            self.buy(size=size, tp=tp_long, sl=sl_long)

        elif prediction == -1 and not self.position.is_short:
            if self.position.is_long:
                self.position.close()
            self.sell(size=size, tp=tp_short, sl=sl_short)

        elif prediction == 0:
            if confidence > 0.75 and (self.position.is_long or self.position.is_short):
                self.position.close()


In [33]:
bt = Backtest(df, XGBoostStrategy, cash=10_000, commission=.0002)
bt.run()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
  full_df = add_features_with_macro(ticker="^GSPC").fillna(method='ffill').fillna(method='bfill')
Parameters: { "use_label_encoder" } are not used.



Start                     2015-01-02 00:00:00
End                       2023-12-29 00:00:00
Duration                   3283 days 00:00:00
Exposure Time [%]                    54.32862
Equity Final [$]                  11501.23916
Equity Peak [$]                   12597.03819
Commissions [$]                     988.71323
Return [%]                           15.01239
Buy & Hold Return [%]               131.74765
Return (Ann.) [%]                     1.56904
Volatility (Ann.) [%]                 2.47061
CAGR [%]                              1.07941
Sharpe Ratio                          0.63508
Sortino Ratio                         0.92697
Calmar Ratio                          0.15202
Alpha [%]                            11.91484
Beta                                  0.02351
Max. Drawdown [%]                   -10.32149
Avg. Drawdown [%]                    -0.43546
Max. Drawdown Duration     2306 days 00:00:00
Avg. Drawdown Duration       55 days 00:00:00
# Trades                          

In [34]:
bt.plot()

  fig = gridplot(
  fig = gridplot(
