In [1]:
from backtesting import Backtest, Strategy
import talib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np



## Load Data

In [2]:
df = pd.read_csv("../Data/^GSPC.csv")
df = df[df["Price"] != "Ticker"]
df = df[df["Price"] != "Date"]
df[['Close','High', 'Low', 'Open', 'Volume']]= df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
print(df.columns)
df['Price'] = pd.to_datetime(df['Price'])
df.set_index('Price', inplace=True)
df.index.name = None

Index(['Price', 'Close', 'High', 'Low', 'Open', 'Volume', 'Return', 'MA5',
       'MA10', 'MA20', 'Volatility', 'Momentum', 'Target'],
      dtype='object')


## Trading Function

In [19]:
def add_features(data):
    """Use existing features and add additional ones for ML model"""
    df = data.copy()
    
    # Use the existing indicators (MA5, MA10, MA20, Volatility, Momentum)
    # and create additional features from them
    
    # Normalized price distances from MAs
    df['X_MA5'] = (df.Close - df.MA5) / df.Close
    df['X_MA10'] = (df.Close - df.MA10) / df.Close
    df['X_MA20'] = (df.Close - df.MA20) / df.Close
    
    # MA crossovers
    df['X_MA5_10'] = (df.MA5 - df.MA10) / df.Close
    df['X_MA10_20'] = (df.MA10 - df.MA20) / df.Close
    
    # Use existing Volatility and Momentum
    df['X_Volatility'] = df.Volatility
    df['X_Momentum'] = df.Momentum
    
    # Relative return features
    df['X_Return'] = df.Return
    df['X_Return_5'] = df.Return.rolling(5).sum()
    
    # Volume features
    df['X_VOL_CHG'] = df.Volume.pct_change(5)
    
    # Use existing Target if not binary, otherwise keep as is
    if 'Target' in df.columns and set(df.Target.dropna().unique()) != {0, 1}:
        # Convert to our desired format if it's not already binary
        df['Target_orig'] = df.Target
        df['Target'] = np.where(df.Return.shift(-1) > 1000, 1,    # Long if >0.5% up
                       np.where(df.Return.shift(-1) < -1000, -1,  # Short if >0.5% down
                       0))                                         # No position if in between
    
    return df.dropna()

def get_X(data):
    """Return feature matrix X"""
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    """Return target variable y"""
    return data.Target.values


class StabilizedRandomForestStrategy(Strategy):
    n_estimators = 100  # Number of trees in the forest
    lookback = 20       # Bars to use for features
    train_size = 100    # Minimum data needed for training
    price_delta = 0.01  # 1% for stop-loss
    position_size = 0.1 # 10% of portfolio per trade
    retrain_period = 50 # Retrain every 50 bars
    confidence_threshold = 0.5 # Minimum confidence to trade
    
    def init(self):
        # Initialize the model - RandomForestClassifier
        self.model = RandomForestClassifier(
            n_estimators=self.n_estimators,
            max_depth=5,
            min_samples_split=10,
            random_state=42
        )
        
        # Track if model is trained and when it was last trained
        self.trained = False
        self.last_train_idx = 0
        
        # Initialize prediction indicator
        self.prediction = self.I(lambda: np.zeros(len(self.data)), name='prediction')
        self.confidence = self.I(lambda: np.zeros(len(self.data)), name='confidence')
        
    def next(self):
        # Wait for enough data
        if len(self.data) < self.train_size + self.lookback:
            return
            
        current_idx = len(self.data)
        
        # Train model initially OR retrain periodically
        if not self.trained or (current_idx - self.last_train_idx >= self.retrain_period):
            # Create features from price changes
            X = []
            y = []
            
            # Use most recent data for training (rolling window approach)
            start_idx = max(self.lookback, current_idx - 300)  # Limit to recent 300 bars
            
            for i in range(start_idx, current_idx - 1):
                # Features: price changes over lookback period
                features = [self.data.Close[i-j]/self.data.Close[i-j-1] - 1 for j in range(self.lookback)]
                
                # Add additional features:
                # High-Low range
                features.append((self.data.High[i] - self.data.Low[i])/self.data.Close[i])
                
                # Moving average cross
                if i >= 20:
                    sma5 = sum(self.data.Close[i-4:i+1])/5
                    sma20 = sum(self.data.Close[i-19:i+1])/20
                    features.append(sma5/sma20 - 1)
                else:
                    features.append(0)
                
                # Volume features if available
                if hasattr(self.data, 'Volume') and self.data.Volume[i-1] > 0:
                    vol_change = self.data.Volume[i]/self.data.Volume[i-1] - 1
                    features.append(vol_change)
                
                # Target: next day's direction (1=up, -1=down) with minimum threshold
                # Only consider significant moves (0.3%) as signals
                price_change_pct = self.data.Close[i+1]/self.data.Close[i] - 1
                if price_change_pct > 0.003:
                    target = 1
                elif price_change_pct < -0.003:
                    target = -1
                else:
                    target = 0  # Neutral for small moves
                
                X.append(features)
                y.append(target)
            
            # Only train if we have enough samples
            if len(X) > 30:
                # Train model
                self.model.fit(X, y)
                self.trained = True
                self.last_train_idx = current_idx
                print(f"Model trained/retrained on {len(X)} samples at bar {current_idx}")
        
        # Prepare features for prediction
        features = [self.data.Close[-i-1]/self.data.Close[-i-2] - 1 for i in range(self.lookback)]
        
        # Add same additional features as in training
        # High-Low range
        features.append((self.data.High[-1] - self.data.Low[-1])/self.data.Close[-1])
        
        # Moving average cross
        if len(self.data) >= 20:
            sma5 = sum(self.data.Close[-5:])/5
            sma20 = sum(self.data.Close[-20:])/20
            features.append(sma5/sma20 - 1)
        else:
            features.append(0)
            
        # Volume feature if available
        if hasattr(self.data, 'Volume') and self.data.Volume[-2] > 0:
            vol_change = self.data.Volume[-1]/self.data.Volume[-2] - 1
            features.append(vol_change)
        
        # Current price
        close = self.data.Close[-1]
        
        # Default prediction if not trained
        prediction = 0
        pred_confidence = 0
        
        # Make prediction if model is trained
        if self.trained:
            try:
                # Get prediction and confidence
                prediction = self.model.predict([features])[0]
                
                # Get class probabilities
                pred_proba = self.model.predict_proba([features])[0]
                # Get confidence of predicted class
                if prediction == 1:
                    pred_confidence = pred_proba[list(self.model.classes_).index(1)]
                elif prediction == -1: 
                    pred_confidence = pred_proba[list(self.model.classes_).index(-1)]
                else:
                    # For neutral prediction, use lowest confidence
                    pred_confidence = min(pred_proba)
                
                # Update indicators for plotting
                self.prediction[-1] = prediction
                self.confidence[-1] = pred_confidence
            except Exception as e:
                print(f"Prediction error: {e}")
                prediction = 0
                pred_confidence = 0
        
        # Only trade if confidence is high enough
        if pred_confidence >= self.confidence_threshold:
            # Trading logic with proper risk management
            if prediction == 1 and not self.position.is_long:
                # Close any short positions first
                if self.position.is_short:
                    self.position.close()
                    
                # Wait one bar before entering new position (avoid whipsaws)
                if not self.trades:
                    # Go long with proper position sizing and stop loss
                    sl_price = close * (1 - self.price_delta)
                    self.buy(size=self.position_size, sl=sl_price)
                    
            elif prediction == -1 and not self.position.is_short:
                # Close any long positions first
                if self.position.is_long:
                    self.position.close()
                    
                # Wait one bar before entering new position (avoid whipsaws)
                if not self.trades:
                    # Go short with proper position sizing and stop loss
                    sl_price = close * (1 + self.price_delta)
                    self.sell(size=self.position_size, sl=sl_price)
        
        # Position management for existing trades
        for trade in self.trades:
            # Time in trade
            days_in_trade = (self.data.index[-1] - trade.entry_time).days
            
            # Adjust stops based on trade duration and profitability
            if days_in_trade >= 2:
                if trade.is_long:
                    # Trailing stop logic for longs
                    new_sl = close * (1 - self.price_delta * 0.5)  # Tighter stop
                    
                    # Move stop loss only if it would move it higher
                    if new_sl > trade.sl:
                        trade.sl = new_sl
                    
                    # If very profitable, lock in profits
                    if close > trade.entry_price * 1.03:  # 3% profit
                        break_even_sl = max(trade.entry_price * 1.001, trade.sl)
                        trade.sl = break_even_sl
                    
                elif trade.is_short:
                    # Trailing stop logic for shorts
                    new_sl = close * (1 + self.price_delta * 0.5)  # Tighter stop
                    
                    # Move stop loss only if it would move it lower
                    if new_sl < trade.sl:
                        trade.sl = new_sl
                    
                    # If very profitable, lock in profits
                    if close < trade.entry_price * 0.97:  # 3% profit
                        break_even_sl = min(trade.entry_price * 0.999, trade.sl)
                        trade.sl = break_even_sl
            
            # Close trades that have been open too long regardless of profit
            if days_in_trade > 10:
                trade.close()
                print(f"Closing trade after {days_in_trade} days")


In [20]:
bt = Backtest(df, StabilizedRandomForestStrategy, cash=10_000, margin=0.05)
bt.run()


Backtest.run:   0%|          | 0/986 [00:00<?, ?bar/s]

Model trained/retrained on 99 samples at bar 120
Model trained/retrained on 149 samples at bar 170
Model trained/retrained on 199 samples at bar 220
Model trained/retrained on 249 samples at bar 270
Model trained/retrained on 299 samples at bar 320
Closing trade after 12 days
Model trained/retrained on 299 samples at bar 370
Model trained/retrained on 299 samples at bar 420
Model trained/retrained on 299 samples at bar 470
Model trained/retrained on 299 samples at bar 520
Model trained/retrained on 299 samples at bar 570
Model trained/retrained on 299 samples at bar 620
Model trained/retrained on 299 samples at bar 670
Model trained/retrained on 299 samples at bar 720
Model trained/retrained on 299 samples at bar 770
Model trained/retrained on 299 samples at bar 820
Model trained/retrained on 299 samples at bar 870
Model trained/retrained on 299 samples at bar 920
Model trained/retrained on 299 samples at bar 970


Start                     2020-01-30 00:00:00
End                       2023-12-29 00:00:00
Duration                   1429 days 00:00:00
Exposure Time [%]                    32.42148
Equity Final [$]                  13348.54496
Equity Peak [$]                   16167.57112
Return [%]                           33.48545
Buy & Hold Return [%]                45.25956
Return (Ann.) [%]                     7.65289
Volatility (Ann.) [%]                15.25409
CAGR [%]                              5.22524
Sharpe Ratio                          0.50169
Sortino Ratio                         0.82828
Calmar Ratio                          0.35154
Alpha [%]                            26.04404
Beta                                  0.16442
Max. Drawdown [%]                   -21.76981
Avg. Drawdown [%]                    -3.36123
Max. Drawdown Duration      730 days 00:00:00
Avg. Drawdown Duration       41 days 00:00:00
# Trades                                  104
Win Rate [%]                      

In [21]:
bt.plot()