# Logistic Regression (Local CSV) Backtesting

In [8]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy

# Set random seed
np.random.seed(42)

# Load and clean the CSV
df = pd.read_csv("../Data/^GSPC.csv")
df = df[df["Price"] != "Ticker"]
df = df[df["Price"] != "Date"]
df[['Close','High', 'Low', 'Open', 'Volume']] = df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
df['Price'] = pd.to_datetime(df['Price'])
df.set_index('Price', inplace=True)
df.index.name = None

# Define feature generation
def add_features(data):
    """Create additional technical indicators and prediction target while keeping alignment intact"""
    df = data.copy()

    # Technical indicators
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['Volatility'] = df['Close'].pct_change().rolling(window=10).std()
    df['Momentum'] = df['Close'] - df['Close'].shift(5)
    df['Return'] = df['Close'].pct_change()

    # Feature engineering
    df['X_MA5'] = (df['Close'] - df['MA5']) / df['Close']
    df['X_MA10'] = (df['Close'] - df['MA10']) / df['Close']
    df['X_MA20'] = (df['Close'] - df['MA20']) / df['Close']
    df['X_MA5_10'] = (df['MA5'] - df['MA10']) / df['Close']
    df['X_MA10_20'] = (df['MA10'] - df['MA20']) / df['Close']
    df['X_Volatility'] = df['Volatility']
    df['X_Momentum'] = df['Momentum']
    df['X_Return'] = df['Return']
    df['X_Return_5'] = df['Return'].rolling(5).sum()
    df['X_VOL_CHG'] = df['Volume'].pct_change(5)

    # Target: use binary or multi-class depending on your setup
    df['Target'] = np.where(df['Return'].shift(-1) > 0.005, 1,
                    np.where(df['Return'].shift(-1) < -0.005, -1, 0))

    return df  # ❗ Keep full index, no dropna()

# Helper functions to extract features and labels
def get_X(data):
    feature_columns = [col for col in data.columns if col.startswith('X_')]
    return data[feature_columns].values

def get_y(data):
    return data.Target.values

# Apply features
df = add_features(df)


In [9]:
from sklearn.linear_model import LogisticRegression

class strategy_class(Strategy):
    def init(self):
        self.model = LogisticRegression(max_iter=1000, random_state=42)
        self.features = [col for col in self.data.df.columns if col.startswith("X_")]
        self.df = add_features(self.data.df).fillna(method='ffill').fillna(method='bfill')
        self.pred = self.I(lambda: np.zeros(len(self.df)), name='pred')

    def next(self):
        i = len(self.data)
        if i < 200:
            return
        train = self.df.iloc[i-200:i]
        test = self.df.iloc[[i-1]]
        X_train = train[self.features].values
        y_train = train["Target"].values
        X_test = test[self.features].values
        self.model.fit(X_train, y_train)
        pred = self.model.predict(X_test)[0]
        self.pred[-1] = pred
        if pred == 1:
            if not self.position.is_long:
                self.position.close()
                self.buy()
        elif pred == -1:
            if not self.position.is_short:
                self.position.close()
                self.sell()


In [10]:
bt = Backtest(df, strategy_class, cash=10_000, commission=.0002, margin=0.05)
bt.run()

  self.df = add_features(self.data.df).fillna(method='ffill').fillna(method='bfill')
  equity_log_returns = np.log(equity[1:] / equity[:-1])


Start                     2020-01-30 00:00:00
End                       2023-12-29 00:00:00
Duration                   1429 days 00:00:00
Exposure Time [%]                    30.39514
Equity Final [$]                          0.0
Equity Peak [$]                  200914.89822
Commissions [$]                    2764.20937
Return [%]                             -100.0
Buy & Hold Return [%]                45.25956
Return (Ann.) [%]                           0
Volatility (Ann.) [%]               241.75409
CAGR [%]                               -100.0
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Alpha [%]                                 NaN
Beta                                      NaN
Max. Drawdown [%]                      -100.0
Avg. Drawdown [%]                   -10.99124
Max. Drawdown Duration      716 days 00:00:00
Avg. Drawdown Duration       24 days 00:00:00
# Trades                          

In [11]:
bt.plot()

  fig = gridplot(
  fig = gridplot(
