In [5]:
from backtesting import Backtest, Strategy
import talib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np

## Load Data

In [2]:
df = pd.read_csv("../Data/^GSPC.csv")
df = df[df["Price"] != "Ticker"]
df = df[df["Price"] != "Date"]
df[['Close','High', 'Low', 'Open', 'Volume']]= df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
print(df.columns)
df['Price'] = pd.to_datetime(df['Price'])
df.set_index('Price', inplace=True)
df.index.name = None

Index(['Price', 'Close', 'High', 'Low', 'Open', 'Volume', 'Return', 'MA5',
       'MA10', 'MA20', 'Volatility', 'Momentum', 'Target'],
      dtype='object')


## Trading Function

In [None]:
def add_features(data):
    """Use existing features and add additional ones for ML model"""
    df = data.copy()
    
    # Use the existing indicators (MA5, MA10, MA20, Volatility, Momentum)
    # and create additional features from them
    
    # Normalized price distances from MAs
    df['X_MA5'] = (df.Close - df.MA5) / df.Close
    df['X_MA10'] = (df.Close - df.MA10) / df.Close
    df['X_MA20'] = (df.Close - df.MA20) / df.Close
    
    # MA crossovers
    df['X_MA5_10'] = (df.MA5 - df.MA10) / df.Close
    df['X_MA10_20'] = (df.MA10 - df.MA20) / df.Close
    
    # Use existing Volatility and Momentum
    df['X_Volatility'] = df.Volatility
    df['X_Momentum'] = df.Momentum
    
    # Relative return features
    df['X_Return'] = df.Return
    df['X_Return_5'] = df.Return.rolling(5).sum()
    
    # Volume features
    df['X_VOL_CHG'] = df.Volume.pct_change(5)
    
    # Use existing Target if not binary, otherwise keep as is
    if 'Target' in df.columns and set(df.Target.dropna().unique()) != {0, 1}:
        # Convert to our desired format if it's not already binary
        df['Target_orig'] = df.Target
        df['Target'] = np.where(df.Return.shift(-1) > 0.005, 1,    # Long if >0.5% up
                       np.where(df.Return.shift(-1) < -0.005, -1,  # Short if >0.5% down
                       0))                                         # No position if in between
    
    return df.dropna()

def get_X(data):
    """Return feature matrix X"""
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    """Return target variable y"""
    return data.Target.values


class RandomForestStrategy(Strategy):
    n_estimators = 100  # Number of trees in the forest
    price_delta = 0.01  # 1% for take-profit and stop-loss
    train_size = 500    # Number of bars to use for training
    
    def init(self):
        # Prepare the data with features
        self.all_data = add_features(self.data.df)
        
        # Initialize the model - RandomForestClassifier
        self.model = RandomForestClassifier(
            n_estimators=self.n_estimators,
            max_depth=5,
            min_samples_split=10,
            random_state=42
        )
        
        # Train the model on the first train_size bars
        train_data = self.all_data.iloc[:self.train_size]
        X_train = get_X(train_data)
        y_train = get_y(train_data)
        
        # Only keep valid targets (1 or -1)
        mask = (y_train == 1) | (y_train == -1)
        X_train, y_train = X_train[mask], y_train[mask]
        
        self.model.fit(X_train, y_train)
        
        # Initialize prediction indicator only
        self.prediction = self.I(lambda: np.zeros(len(self.data)), name='prediction')
        
    def next(self):
        # Skip if we don't have enough data
        if len(self.data) <= self.train_size:
            return
            
        # Check if current index exists in our processed data
        current_idx = self.data.index[-1]
        if current_idx not in self.all_data.index:
            return
        
        # Current bar's features
        current_row_idx = self.all_data.index.get_loc(current_idx)
        features = get_X(self.all_data.iloc[[current_row_idx]])
        
        # Predict on current features
        try:
            prediction = self.model.predict(features)[0]
            # Update plot indicator
            self.prediction[-1] = prediction
        except:
            # In case of errors, don't make a prediction
            prediction = 0
            
        # Current prices
        close = self.data.Close[-1]
        
        # Set take-profit and stop-loss levels
        tp_long = close * (1 + self.price_delta)
        sl_long = close * (1 - self.price_delta)
        tp_short = close * (1 - self.price_delta)
        sl_short = close * (1 + self.price_delta)
        
        # Execute trades based on model prediction
        if prediction == 1 and not self.position.is_long:
            # Close any existing short position first
            if self.position.is_short:
                self.position.close()
            # Enter long position
            self.buy(size=0.2, tp=tp_long, sl=sl_long)
            
        elif prediction == -1 and not self.position.is_short:
            # Close any existing long position first
            if self.position.is_long:
                self.position.close()
            # Enter short position
            self.sell(size=0.2, tp=tp_short, sl=sl_short)
        
        # Risk management for existing trades
        for trade in self.trades:
            # If trade is open more than 5 days, tighten stop loss
            if self.data.index[-1] - trade.entry_time > pd.Timedelta(days=5):
                if trade.is_long:
                    # Move stop loss up to entry price to avoid bigger losses
                    trade.sl = max(trade.sl, trade.entry_price)
                else:
                    # Move stop loss down to entry price to avoid bigger losses
                    trade.sl = min(trade.sl, trade.entry_price)


In [13]:
bt = Backtest(df, RandomForestStrategy, cash=10_000, commission=.0002, margin=0.05)
bt.run()


Backtest.run:   0%|          | 0/986 [00:00<?, ?bar/s]

TypeError: '<=' not supported between instances of 'int' and 'Timestamp'

RuntimeError: First issue `backtest.run()` to obtain results.