In [1]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy
import yfinance as yf
from sklearn.preprocessing import StandardScaler

# Setting a random seed
np.random.seed(42)

# Cleaning and reading the csv file 
df = pd.read_csv("../Data/^GSPC.csv")
df = df[df["Price"] != "Ticker"]
df = df[df["Price"] != "Date"]
df[['Close','High', 'Low', 'Open', 'Volume']] = df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
df['Price'] = pd.to_datetime(df['Price'])
df.set_index('Price', inplace=True)
df.index.name = None

# This function adds technical indicators and features to the data 
def add_features(data):
    df = data.copy()

    # These are the technical indicators that we will use from the yfinance library
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['Volatility'] = df['Close'].pct_change().rolling(window=10).std()
    df['Momentum'] = df['Close'] - df['Close'].shift(5)
    df['Return'] = df['Close'].pct_change()

    # These are features that we engineered from the technical indicators that we will use for our model. We added the 
    # X_ prefix to the features to indicate that they are engineered features
    df['X_MA5'] = (df['Close'] - df['MA5']) / df['Close']
    df['X_MA10'] = (df['Close'] - df['MA10']) / df['Close']
    df['X_MA20'] = (df['Close'] - df['MA20']) / df['Close']
    df['X_MA5_10'] = (df['MA5'] - df['MA10']) / df['Close']
    df['X_MA10_20'] = (df['MA10'] - df['MA20']) / df['Close']
    df['X_Volatility'] = df['Volatility']
    df['X_Momentum'] = df['Momentum']
    df['X_Return'] = df['Return']
    df['X_Return_5'] = df['Return'].rolling(5).sum()
    df['X_VOL_CHG'] = df['Volume'].pct_change(5)
    # The 'Target' column is a ternary classification problem with labels -1, 0, and 1 indicating down, neutral, and up movements
    df['Target'] = np.where(df['Return'].shift(-1) > 0.005, 1,
                    np.where(df['Return'].shift(-1) < -0.005, -1, 0))

    return df  

# The get_X and get_y functions are helper functions that extract features and labels from the data
def get_X(data):
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    return data.Target.values

# We apply the features to the dataframe 
df = add_features(df)



In [7]:
class BuyAndHoldStrategy(Strategy):
    def init(self):
        self.buy_executed = False

    def next(self):
        if not self.buy_executed:
            self.buy()
            self.buy_executed = True

In [8]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

def evaluate_strategy(y_true, y_pred):

    print("Evaluation Metrics")
    print("Accuracy:", round(accuracy_score(y_true, y_pred), 4))
    print("Precision (macro):", round(precision_score(y_true, y_pred, average='macro'), 4))
    print("F1 Score (macro):", round(f1_score(y_true, y_pred, average='macro'), 4))

    cm = confusion_matrix(y_true, y_pred, labels=[-1, 0, 1])
    disp = ConfusionMatrixDisplay(cm, display_labels=['Down (-1)', 'Neutral (0)', 'Up (1)'])
    disp.plot(cmap='Blues')
    plt.title("Confusion Matrix")
    plt.show()

In [9]:
bt = Backtest(df, BuyAndHoldStrategy, cash=10_000, commission=.0002)
backtest = bt.run()
backtest

Start                     2020-01-30 00:00:00
End                       2023-12-29 00:00:00
Duration                   1429 days 00:00:00
Exposure Time [%]                         0.0
Equity Final [$]                   14600.5691
Equity Peak [$]                   14680.75904
Return [%]                           46.00569
Buy & Hold Return [%]                45.25956
Return (Ann.) [%]                     10.1455
Volatility (Ann.) [%]                25.08356
CAGR [%]                              6.90207
Sharpe Ratio                          0.40447
Sortino Ratio                         0.62889
Calmar Ratio                          0.30763
Alpha [%]                             2.14985
Beta                                  0.96899
Max. Drawdown [%]                   -32.97996
Avg. Drawdown [%]                    -2.37456
Max. Drawdown Duration      725 days 00:00:00
Avg. Drawdown Duration       27 days 00:00:00
# Trades                                    0
Win Rate [%]                      

In [5]:
bt.plot()

  fig = gridplot(
  fig = gridplot(
