# KNN Backtesting

In [31]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy

# Set random seed
np.random.seed(42)

# Load and clean the CSV
df = pd.read_csv("../Data/^GSPC.csv")
df = df[df["Price"] != "Ticker"]
df = df[df["Price"] != "Date"]
df[['Close','High', 'Low', 'Open', 'Volume']] = df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
df['Price'] = pd.to_datetime(df['Price'])
df.set_index('Price', inplace=True)
df.index.name = None

# Define feature generation
def add_features(data):
    """Create additional technical indicators and prediction target while keeping alignment intact"""
    df = data.copy()

    # Technical indicators
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['Volatility'] = df['Close'].pct_change().rolling(window=10).std()
    df['Momentum'] = df['Close'] - df['Close'].shift(5)
    df['Return'] = df['Close'].pct_change()

    # Feature engineering
    df['X_MA5'] = (df['Close'] - df['MA5']) / df['Close']
    df['X_MA10'] = (df['Close'] - df['MA10']) / df['Close']
    df['X_MA20'] = (df['Close'] - df['MA20']) / df['Close']
    df['X_MA5_10'] = (df['MA5'] - df['MA10']) / df['Close']
    df['X_MA10_20'] = (df['MA10'] - df['MA20']) / df['Close']
    df['X_Volatility'] = df['Volatility']
    df['X_Momentum'] = df['Momentum']
    df['X_Return'] = df['Return']
    df['X_Return_5'] = df['Return'].rolling(5).sum()
    df['X_VOL_CHG'] = df['Volume'].pct_change(5)

    # Target: use binary or multi-class depending on your setup
    df['Target'] = np.where(df['Return'].shift(-1) > 0.005, 1,
                    np.where(df['Return'].shift(-1) < -0.005, -1, 0))

    return df  # ❗ Keep full index, no dropna()

# Helper functions to extract features and labels
def get_X(data):
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    return data.Target.values

# Apply features
df = add_features(df)


In [32]:
from sklearn.neighbors import KNeighborsClassifier

class strategy_class(Strategy):
    def init(self):
        self.model = KNeighborsClassifier(n_neighbors=5)
        self.df = add_features(self.data.df).fillna(method='ffill').fillna(method='bfill')
        self.features = [col for col in self.df.columns if col.startswith("X_")]
        self.pred = self.I(lambda: np.zeros(len(self.data)), name='pred')  # ensure same length as original data

    def next(self):
        current_index = self.data.index[-1]

        # Skip if current index not present in the processed feature DataFrame
        if current_index not in self.df.index:
            return

        df_index = self.df.index.get_loc(current_index)

        # Skip if not enough history to train
        if df_index < 200:
            return

        # Prepare training and test data
        train = self.df.iloc[df_index - 200:df_index]
        test = self.df.iloc[[df_index]]
        X_train = train[self.features].values
        y_train = train["Target"].values
        X_test = test[self.features].values

        # Fit and predict
        self.model.fit(X_train, y_train)
        pred = self.model.predict(X_test)[0]
        self.pred[-1] = pred

        # Execute trades
        if pred == 1:
            if not self.position.is_long:
                self.position.close()
                self.buy()
        elif pred == -1:
            if not self.position.is_short:
                self.position.close()
                self.sell()



In [35]:
bt = Backtest(df, strategy_class, cash=10_000, commission=.0002)
bt.run()


  self.df = add_features(self.data.df).fillna(method='ffill').fillna(method='bfill')


Start                     2020-01-30 00:00:00
End                       2023-12-29 00:00:00
Duration                   1429 days 00:00:00
Exposure Time [%]                    78.31814
Equity Final [$]                   8483.35977
Equity Peak [$]                   11350.51701
Commissions [$]                     590.67908
Return [%]                           -15.1664
Buy & Hold Return [%]                45.25956
Return (Ann.) [%]                     -4.1125
Volatility (Ann.) [%]                12.12823
CAGR [%]                             -2.85887
Sharpe Ratio                         -0.33908
Sortino Ratio                        -0.44856
Calmar Ratio                         -0.14602
Alpha [%]                           -11.92286
Beta                                 -0.07167
Max. Drawdown [%]                   -28.16397
Avg. Drawdown [%]                    -3.29207
Max. Drawdown Duration      921 days 00:00:00
Avg. Drawdown Duration       87 days 00:00:00
# Trades                          

In [36]:
bt.plot()

  fig = gridplot(
  fig = gridplot(
