In [1]:
# import libraries
import pandas as pd
import yfinance as yf
import hvplot.pandas
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout

import warnings
warnings.filterwarnings('ignore')

In [2]:
# download the historical prices of pltr
pltr_df = yf.download(tickers = 'pltr', period='2Y', interval = '1h')
pltr_df

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,PLTR,PLTR,PLTR,PLTR,PLTR
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2023-07-03 13:30:00+00:00,15.319300,15.590000,15.220000,15.490000,12379571
2023-07-03 14:30:00+00:00,15.405000,15.420000,15.190000,15.310000,5627704
2023-07-03 15:30:00+00:00,15.470000,15.470000,15.340000,15.407100,3471561
2023-07-05 13:30:00+00:00,15.425000,15.570000,15.230000,15.436700,9888941
2023-07-05 14:30:00+00:00,15.345000,15.580000,15.330000,15.430000,6028562
...,...,...,...,...,...
2025-07-01 15:30:00+00:00,129.970093,130.389893,129.179993,130.100006,8717682
2025-07-01 16:30:00+00:00,130.080002,131.199997,129.800003,129.972900,9918720
2025-07-01 17:30:00+00:00,129.529999,130.354996,129.409607,130.154999,6948591
2025-07-01 18:30:00+00:00,130.305206,130.479004,129.009995,129.520004,7668921


In [3]:
# Remove multilevels of the column headers
pltr_df = pltr_df.droplevel(level = 1, axis = 1)

# Remove the name 'Price' from the headers
pltr_df.columns.name = None

pltr_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-03 13:30:00+00:00,15.319300,15.590000,15.220000,15.490000,12379571
2023-07-03 14:30:00+00:00,15.405000,15.420000,15.190000,15.310000,5627704
2023-07-03 15:30:00+00:00,15.470000,15.470000,15.340000,15.407100,3471561
2023-07-05 13:30:00+00:00,15.425000,15.570000,15.230000,15.436700,9888941
2023-07-05 14:30:00+00:00,15.345000,15.580000,15.330000,15.430000,6028562
...,...,...,...,...,...
2025-07-01 15:30:00+00:00,129.970093,130.389893,129.179993,130.100006,8717682
2025-07-01 16:30:00+00:00,130.080002,131.199997,129.800003,129.972900,9918720
2025-07-01 17:30:00+00:00,129.529999,130.354996,129.409607,130.154999,6948591
2025-07-01 18:30:00+00:00,130.305206,130.479004,129.009995,129.520004,7668921


In [4]:
# Round the values of the dataframe to 2 decimal points
signals_df = round(pltr_df[['Close', 'High', 'Low', 'Open', 'Volume']], 2)

# Change the index to the required timezone
signals_df.index = signals_df.index.tz_convert('US/Eastern')

# Display the data
signals_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-03 09:30:00-04:00,15.32,15.59,15.22,15.49,12379571
2023-07-03 10:30:00-04:00,15.40,15.42,15.19,15.31,5627704
2023-07-03 11:30:00-04:00,15.47,15.47,15.34,15.41,3471561
2023-07-05 09:30:00-04:00,15.43,15.57,15.23,15.44,9888941
2023-07-05 10:30:00-04:00,15.35,15.58,15.33,15.43,6028562
...,...,...,...,...,...
2025-07-01 11:30:00-04:00,129.97,130.39,129.18,130.10,8717682
2025-07-01 12:30:00-04:00,130.08,131.20,129.80,129.97,9918720
2025-07-01 13:30:00-04:00,129.53,130.35,129.41,130.15,6948591
2025-07-01 14:30:00-04:00,130.31,130.48,129.01,129.52,7668921


In [5]:
# visulise the closing price
signals_df['Close'].hvplot()

In [6]:
# Set the long and short windows
short_window = 20
long_window = 50

# Obtain the Exponential Moving Average of the Close prices with short and long windows
signals_df['EMA20_Close'] =  round(signals_df['Close'].ewm(span = short_window).mean(), 2)
signals_df['EMA50_Close'] =  round(signals_df['Close'].ewm(span = long_window).mean(), 2)

# Calculate Relative Strength Index (RSI)
delta = signals_df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)

avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

rs = avg_gain / avg_loss
signals_df['RSI'] = 100 - (100 / (1 + rs))

# Calculate the 14-day Average True Range (ATR) for volatility
high_low = signals_df['High'] - signals_df['Low']
high_close = (signals_df['High'] - signals_df['Close'].shift()).abs()
low_close = (signals_df['Low'] - signals_df['Close'].shift()).abs()
tr = pd.concat([high_low, high_close, low_close], axis=1)
signals_df['ATR'] = tr.max(axis=1).rolling(window=14).mean()

# Drop missing values
signals_df.dropna(inplace=True)

# view data
signals_df.head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-07-06 12:30:00-04:00,15.06,15.14,14.95,14.97,4736592,15.27,15.31,42.857143,0.274286
2023-07-06 13:30:00-04:00,15.13,15.16,15.01,15.05,5024314,15.25,15.29,44.973545,0.258571
2023-07-06 14:30:00-04:00,15.17,15.19,15.09,15.14,4433036,15.24,15.28,43.783784,0.249286
2023-07-06 15:30:00-04:00,15.13,15.18,15.1,15.17,4850850,15.23,15.27,40.659341,0.245714
2023-07-07 09:30:00-04:00,15.63,15.9,15.22,15.23,17897837,15.27,15.3,54.385965,0.276429


In [7]:
# Create signals
signals_df['Target'] = (signals_df['EMA20_Close'] > signals_df['EMA50_Close']).astype(int)

# Display data
signals_df.head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-07-06 12:30:00-04:00,15.06,15.14,14.95,14.97,4736592,15.27,15.31,42.857143,0.274286,0
2023-07-06 13:30:00-04:00,15.13,15.16,15.01,15.05,5024314,15.25,15.29,44.973545,0.258571,0
2023-07-06 14:30:00-04:00,15.17,15.19,15.09,15.14,4433036,15.24,15.28,43.783784,0.249286,0
2023-07-06 15:30:00-04:00,15.13,15.18,15.1,15.17,4850850,15.23,15.27,40.659341,0.245714,0
2023-07-07 09:30:00-04:00,15.63,15.9,15.22,15.23,17897837,15.27,15.3,54.385965,0.276429,0


## Machine Learning

In [23]:
# Define the datasets X and y for machine learing
X = signals_df.drop(columns = ['Target'], axis = 1)
y = signals_df['Target']


# Split the data into test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, shuffle = False, random_state = 2)

In [34]:
# Scale the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Conduct PCA transformation
pca = PCA(n_components= 5)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [35]:
# Check if the training data label classes are balanced 
y_train.value_counts()

Target
1    1390
0    1041
Name: count, dtype: int64

### Gradienct Boosting Classifier

In [44]:
# initiate the model
gb_model = GradientBoostingClassifier(n_estimators = 300,
                                      max_depth = 5,
                                      learning_rate = .2,
                                      random_state = 1)

# fit the model with the training data
gb_model.fit(X_train_pca, y_train)


# Obtain the prediction with the model
y_pred_gb = gb_model.predict(X_test_pca)

# Generate the Classification report
print(classification_report(y_test, y_pred_gb))

              precision    recall  f1-score   support

           0       0.19      0.07      0.11       308
           1       0.69      0.87      0.77       735

    accuracy                           0.63      1043
   macro avg       0.44      0.47      0.44      1043
weighted avg       0.54      0.63      0.57      1043



### Supported Vector Machine

In [45]:
# Initialize and fit the data to SVC
svm_model = SVC(kernel = 'linear', gamma = 'auto', C=1.0)
svm_model.fit(X_train_pca, y_train)


# make predictions with the model
y_pred_svm = svm_model.predict(X_test_pca)

# Obtain the classification report of predictions against the test data
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

           0       0.91      0.32      0.48       308
           1       0.78      0.99      0.87       735

    accuracy                           0.79      1043
   macro avg       0.84      0.66      0.67      1043
weighted avg       0.82      0.79      0.75      1043



### Logistic Regression

In [46]:
# Initiate and fit the data to logistic regression model
lr_model = LogisticRegression(solver = 'liblinear')
lr_model.fit(X_train_pca, y_train)

# Make predictions with the model
y_pred_lr = lr_model.predict(X_test_pca)

# Obtain the classification report of the predictions against the test data
print(classification_report(y_test, y_pred_lr))

              precision    recall  f1-score   support

           0       0.95      0.19      0.31       308
           1       0.74      1.00      0.85       735

    accuracy                           0.76      1043
   macro avg       0.85      0.59      0.58      1043
weighted avg       0.81      0.76      0.69      1043



In [47]:
# Initialize and fit the data to RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators = 500, max_depth = 5, random_state = 2)
model_rf.fit(X_train_pca, y_train)

# Obtain the predictions with the model
y_pred_rf = model_rf.predict(X_test_pca)

# Generate the classification report of the predictions against the test set
print(classification_report(y_test, y_pred_rf))

              precision    recall  f1-score   support

           0       0.19      0.02      0.04       308
           1       0.70      0.96      0.81       735

    accuracy                           0.68      1043
   macro avg       0.45      0.49      0.43      1043
weighted avg       0.55      0.68      0.58      1043



In [48]:
print(Counter(y_pred_rf))

NameError: name 'Counter' is not defined

In [49]:
# Initialize and fit the data to RandomForestClassifier
model_ada = AdaBoostClassifier(n_estimators = 250, learning_rate = 0.5,  random_state = 10)
model_ada.fit(X_train_pca, y_train)

# Obtain the predictions with the model
y_pred_ada = model_ada.predict(X_test_pca)

# Generate the classification report of the predictions against the test set
print(classification_report(y_test, y_pred_ada))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       308
           1       0.70      1.00      0.83       735

    accuracy                           0.70      1043
   macro avg       0.35      0.50      0.41      1043
weighted avg       0.50      0.70      0.58      1043



In [50]:
print(Counter(y_pred_ada))

NameError: name 'Counter' is not defined

### LSTM

In [51]:
# Reshape input to 3D [samples, time_steps, features] to appropriately fir the data into the LSTM model
X_train_reshaped = np.reshape(X_train_pca, (X_train_pca.shape[0], X_train_pca.shape[1], 1))

# Initialized LSTM Classifier
lstm = Sequential()

# Creating the first hidden layer
lstm.add(LSTM(units = 70, return_sequences = True, input_shape = (X_train_pca.shape[1],1)))
lstm.add(Dropout(0.2))

# Creating the second hidden layer
lstm.add(LSTM(units = 60, return_sequences = True, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the third hidden layer
lstm.add(LSTM(units = 60, return_sequences = True, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the fourth hidden layer
lstm.add(LSTM(units = 60, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the output layer to the model 
lstm.add(Dense(units = 1, activation= 'sigmoid'))

In [52]:
# Obtain the LSTM model symmary
lstm.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 5, 70)             20160     
                                                                 
 dropout_4 (Dropout)         (None, 5, 70)             0         
                                                                 
 lstm_5 (LSTM)               (None, 5, 60)             31440     
                                                                 
 dropout_5 (Dropout)         (None, 5, 60)             0         
                                                                 
 lstm_6 (LSTM)               (None, 5, 60)             29040     
                                                                 
 dropout_6 (Dropout)         (None, 5, 60)             0         
                                                                 
 lstm_7 (LSTM)               (None, 60)               

In [53]:
# Compile the model
lstm.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])

In [54]:
# Fit the model
lstm.fit(X_train_reshaped, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x198122cb970>

In [70]:
# Make predictions
y_pred = lstm.predict(X_test_pca)

# Obtain the labels in the required format
y_pred_lstm = np.where(y_pred > y_pred.mean(), 1, 0).ravel()




In [71]:
# Generate the classification report of the predictions against the test set
print(classification_report(y_test, y_pred_lstm))

              precision    recall  f1-score   support

           0       0.25      0.24      0.25       308
           1       0.69      0.70      0.70       735

    accuracy                           0.57      1043
   macro avg       0.47      0.47      0.47      1043
weighted avg       0.56      0.57      0.56      1043



## Backtesting

In [72]:
# Create function to accept the validation input data and model predictions
# Generate a data frome with the function with the corresponding returns based on the model predictions entered
def backtest_df(df, pred):
    initial_capital = 100000
    capital = initial_capital
    position = 0

    portfolio = []


    for i in range(len(df)):
        if pred[i] == 1:

            if position == 0:
                position = capital / df.iloc[i]['Close']
                capital -= position * df.iloc[i]['Close']

        elif pred[i] == 0 and position !=0:
            capital = capital + df.iloc[i]['Close'] * position
            position = 0

        portfolio_value = capital + position * df.iloc[i]['Close']
        portfolio.append(portfolio_value)

    portfolio_df = pd.DataFrame(portfolio , columns = ['Portfolio_Total'], index = df.index)

    portfolio_df['Model_Predictions'] = pred
    
    return portfolio_df[['Model_Predictions', 'Portfolio_Total']]
    

In [73]:
# Fucntion to generate dataframe with the validation input and cumulative returns and its plot based on the model predictions 
def cumualtive_returns(portfolio_df, X_test):
    
    # Merge
    merged_df = pd.concat([X_test, portfolio_df], axis = 1)
    
    # Obtain the Daily returns of the portfolio and clean the data
    merged_df['Portfolio_Returns'] = merged_df['Portfolio_Total'].pct_change()
    merged_df.dropna(inplace = True)
    
    # Calculate cumualtive returns at every hour
    merged_df['Cumulative_Returns'] = (1 + merged_df['Portfolio_Returns']).cumprod() - 1
    
    return merged_df
    

### Gradient Boosting Classifier

In [76]:
# Conduct backtest with the function created
gb_backtest = backtest_df(X_test, y_pred_gb)

# Obtain the cumulateive returns the backtesting algorithm generates
gb_backtest_cumulative = cumualtive_returns(gb_backtest, X_test)
gb_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 09:30:00-05:00,61.64,63.40,61.55,62.04,20509211,61.74,60.57,49.341142,0.957857,1,99243.278055,-0.007567,-0.007567
2024-11-21 10:30:00-05:00,62.72,62.94,61.36,61.61,8661118,61.83,60.66,60.053981,0.990000,1,100982.128482,0.017521,0.009821
2024-11-21 11:30:00-05:00,61.99,62.75,61.84,62.72,5477367,61.85,60.71,50.403226,0.990714,1,99806.794397,-0.011639,-0.001932
2024-11-21 12:30:00-05:00,61.18,62.09,60.95,61.98,6508591,61.79,60.73,45.509709,1.032143,1,98502.656577,-0.013067,-0.014973
2024-11-21 13:30:00-05:00,61.58,61.59,60.90,61.18,5399566,61.77,60.76,47.911833,1.053571,1,99146.675254,0.006538,-0.008533
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-01 11:30:00-04:00,129.97,130.39,129.18,130.10,8717682,136.39,138.38,34.910837,3.012857,1,198470.748842,-0.001230,0.984707
2025-07-01 12:30:00-04:00,130.08,131.20,129.80,129.97,9918720,135.79,138.06,32.165833,2.934286,1,198638.724393,0.000846,0.986387
2025-07-01 13:30:00-04:00,129.53,130.35,129.41,130.15,6948591,135.19,137.72,33.796470,2.812857,1,197798.846638,-0.004228,0.977988
2025-07-01 14:30:00-04:00,130.31,130.48,129.01,129.52,7668921,134.73,137.43,35.916269,2.805000,1,198989.946000,0.006022,0.989899


In [79]:
# Plot the cumulative returns of the model from the backtesting
gb_backtest_cumulative['Cumulative_Returns'].hvplot()

### Supported Vector Machine

In [80]:
# Conduct backtest with the function created
svm_backtest = backtest_df(X_test, y_pred_svm)

# Obtain the cumulateive returns the backtesting algorithm generates
svm_backtest_cumulative = cumualtive_returns(svm_backtest, X_test)
svm_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 09:30:00-05:00,61.64,63.40,61.55,62.04,20509211,61.74,60.57,49.341142,0.957857,1,99243.278055,-0.007567,-0.007567
2024-11-21 10:30:00-05:00,62.72,62.94,61.36,61.61,8661118,61.83,60.66,60.053981,0.990000,1,100982.128482,0.017521,0.009821
2024-11-21 11:30:00-05:00,61.99,62.75,61.84,62.72,5477367,61.85,60.71,50.403226,0.990714,1,99806.794397,-0.011639,-0.001932
2024-11-21 12:30:00-05:00,61.18,62.09,60.95,61.98,6508591,61.79,60.73,45.509709,1.032143,1,98502.656577,-0.013067,-0.014973
2024-11-21 13:30:00-05:00,61.58,61.59,60.90,61.18,5399566,61.77,60.76,47.911833,1.053571,1,99146.675254,0.006538,-0.008533
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-01 11:30:00-04:00,129.97,130.39,129.18,130.10,8717682,136.39,138.38,34.910837,3.012857,0,228929.623877,0.000000,1.289296
2025-07-01 12:30:00-04:00,130.08,131.20,129.80,129.97,9918720,135.79,138.06,32.165833,2.934286,0,228929.623877,0.000000,1.289296
2025-07-01 13:30:00-04:00,129.53,130.35,129.41,130.15,6948591,135.19,137.72,33.796470,2.812857,0,228929.623877,0.000000,1.289296
2025-07-01 14:30:00-04:00,130.31,130.48,129.01,129.52,7668921,134.73,137.43,35.916269,2.805000,0,228929.623877,0.000000,1.289296


In [81]:
# Plot the cumulative returns of the model from the backtesting
svm_backtest_cumulative['Cumulative_Returns'].hvplot()

### Logistic Regression

In [82]:
# Conduct backtest with the function created
lr_backtest = backtest_df(X_test, y_pred_lstm)

# Obtain the cumulateive returns the backtesting algorithm generates
lr_backtest_cumulative = cumualtive_returns(lr_backtest, X_test)
lr_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 09:30:00-05:00,61.64,63.40,61.55,62.04,20509211,61.74,60.57,49.341142,0.957857,1,99243.278055,-0.007567,-0.007567
2024-11-21 10:30:00-05:00,62.72,62.94,61.36,61.61,8661118,61.83,60.66,60.053981,0.990000,1,100982.128482,0.017521,0.009821
2024-11-21 11:30:00-05:00,61.99,62.75,61.84,62.72,5477367,61.85,60.71,50.403226,0.990714,1,99806.794397,-0.011639,-0.001932
2024-11-21 12:30:00-05:00,61.18,62.09,60.95,61.98,6508591,61.79,60.73,45.509709,1.032143,1,98502.656577,-0.013067,-0.014973
2024-11-21 13:30:00-05:00,61.58,61.59,60.90,61.18,5399566,61.77,60.76,47.911833,1.053571,1,99146.675254,0.006538,-0.008533
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-01 11:30:00-04:00,129.97,130.39,129.18,130.10,8717682,136.39,138.38,34.910837,3.012857,0,237277.121473,-0.001230,1.372771
2025-07-01 12:30:00-04:00,130.08,131.20,129.80,129.97,9918720,135.79,138.06,32.165833,2.934286,0,237277.121473,0.000000,1.372771
2025-07-01 13:30:00-04:00,129.53,130.35,129.41,130.15,6948591,135.19,137.72,33.796470,2.812857,0,237277.121473,0.000000,1.372771
2025-07-01 14:30:00-04:00,130.31,130.48,129.01,129.52,7668921,134.73,137.43,35.916269,2.805000,0,237277.121473,0.000000,1.372771


In [83]:
# Plot the cumulative returns of the model from the backtesting
lr_backtest_cumulative['Cumulative_Returns'].hvplot()

### LSTM

In [84]:
# Conduct backtest with the function created
lstm_backtest = backtest_df(X_test, y_pred_lstm)

# Obtain the cumulateive returns the backtesting algorithm generates
lstm_backtest_cumulative = cumualtive_returns(lstm_backtest, X_test)
lstm_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 09:30:00-05:00,61.64,63.40,61.55,62.04,20509211,61.74,60.57,49.341142,0.957857,1,99243.278055,-0.007567,-0.007567
2024-11-21 10:30:00-05:00,62.72,62.94,61.36,61.61,8661118,61.83,60.66,60.053981,0.990000,1,100982.128482,0.017521,0.009821
2024-11-21 11:30:00-05:00,61.99,62.75,61.84,62.72,5477367,61.85,60.71,50.403226,0.990714,1,99806.794397,-0.011639,-0.001932
2024-11-21 12:30:00-05:00,61.18,62.09,60.95,61.98,6508591,61.79,60.73,45.509709,1.032143,1,98502.656577,-0.013067,-0.014973
2024-11-21 13:30:00-05:00,61.58,61.59,60.90,61.18,5399566,61.77,60.76,47.911833,1.053571,1,99146.675254,0.006538,-0.008533
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-01 11:30:00-04:00,129.97,130.39,129.18,130.10,8717682,136.39,138.38,34.910837,3.012857,0,237277.121473,-0.001230,1.372771
2025-07-01 12:30:00-04:00,130.08,131.20,129.80,129.97,9918720,135.79,138.06,32.165833,2.934286,0,237277.121473,0.000000,1.372771
2025-07-01 13:30:00-04:00,129.53,130.35,129.41,130.15,6948591,135.19,137.72,33.796470,2.812857,0,237277.121473,0.000000,1.372771
2025-07-01 14:30:00-04:00,130.31,130.48,129.01,129.52,7668921,134.73,137.43,35.916269,2.805000,0,237277.121473,0.000000,1.372771


In [85]:
# Plot the cumulative returns of the model from the backtesting
lstm_backtest_cumulative['Cumulative_Returns'].hvplot()

In [86]:
# function to plot entry and exit
def plot_entry_exit(df, model:str):
    # Obtain exit and entry points (1 to buy, -1 to sell and 0 to hold)
    df['Entry/Exit'] = df['Model_Predictions'].diff()
    df['Entry/Exit'][0] = df['Model_Predictions'][0]
    df['Entry/Exit'] = df['Entry/Exit'].astype(int)
    df[['Model_Predictions', 'Entry/Exit']]
    

    # Plot the points to sell on the graph
    exit = df[df['Entry/Exit'] == -1]['Portfolio_Total'].hvplot.scatter(color = 'red',
                                           marker = 'v',
                                           legend = False,
                                           width = 1000,
                                           height = 500,
                                           size = 200, ylabel = 'Price in $')

    # Plot the points to buy on the graph
    entry = df[df['Entry/Exit'] == 1]['Portfolio_Total'].hvplot.scatter(color = 'green',
                                           marker = '^',
                                           legend = False,
                                           width = 1000,
                                           height = 500,
                                           size = 200, ylabel = 'Price in $')

    


    # plot cumulative returns through the model
    portfolio_price_chart = df['Portfolio_Total'].hvplot(color = 'lightgray')

    entry_exit_chart = portfolio_price_chart * entry * exit 

    entry_exit_chart.opts(title = f'Entry Exit Plot on the Portfolio Cumulative Returns with the {model} model',
                         height = 500,
                         width = 1000)

    return entry_exit_chart

#### Plot the entry and exit points the model generated on their respective cumulative returns

In [92]:
# Make the list of dataframes with the cumulative returns of each model
model_pred = [gb_backtest_cumulative, svm_backtest_cumulative, lr_backtest_cumulative, lstm_backtest_cumulative]

# List of names of the respective models
model_name = ['Gradient Boosting Classifier', 
             'Supported Vector Machine',
             'Logistic Regression Classifier',
             'LSTM']

# Loop throught the lists to pass them as arguments in to the function to generate plots
for i in range(len(model_name)):
    display(plot_entry_exit(model_pred[i], model_name[i]))


## Calculating Metrics

In [94]:
# Create a function to calculate sortino ratio
def sortino(df):
    # Convert the returns to numneric and drop 'NaN's for calulation
    daily_returns = pd.to_numeric(df['Portfolio_Returns']).dropna()


    # Calculate average daily return
    average_return = daily_returns.mean()

    # Calculate downside returns (only negative returns)
    downside_returns = daily_returns[daily_returns < 0]

    # Calculate the downside standard deviation
    downside_std = downside_returns.std()

    # Calculate the Sortino Ratio
    sortino_ratio = (average_return / downside_std) * np.sqrt(252)
    
    
    return sortino_ratio

In [101]:
# Create funciton to calculate the metrics in the dataframe format
def evaluation(eval_df, df):
    eval_df.loc['Annualized Returns'] = df['Portfolio_Returns'].mean() * 252
    eval_df.loc['Cumulative Returns'] = df['Cumulative_Returns'][-1]
    eval_df.loc['Annualized Volatility'] = df['Portfolio_Returns'].std() * np.sqrt(252)
    eval_df.loc['Sharpe Ratio'] = (df['Portfolio_Returns'].mean() * 252) / (df['Portfolio_Returns'].std() * np.sqrt(252))
    eval_df.loc['Sortino Ratio'] = sortino(df)
    
    return eval_df
    

In [102]:
metics = ['Annualized Returns',
          'Cumulative Returns',
          'Annualized Volatility',
          'Sharpe Ratio',
          'Sortino Ratio']

evaluation_df = pd.DataFrame(columns = ['Backtest'], index = metics)
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,
Cumulative Returns,
Annualized Volatility,
Sharpe Ratio,
Sortino Ratio,


### Gradient Boosting Classifier

In [103]:
# Generate the performance mertics of the Gradient Boosting Classifier model
evaluation(evaluation_df, gb_backtest_cumulative)

Unnamed: 0,Backtest
Annualized Returns,0.212202
Cumulative Returns,0.995397
Annualized Volatility,0.302167
Sharpe Ratio,0.702267
Sortino Ratio,0.834599


### Supported Vector Machine

In [106]:
# Generate the performance mertics of the Supported Vector Machine model
evaluation(evaluation_df, svm_backtest_cumulative)

Unnamed: 0,Backtest
Annualized Returns,0.241612
Cumulative Returns,1.289296
Annualized Volatility,0.289679
Sharpe Ratio,0.834068
Sortino Ratio,1.035147


### Logistic Regression Classifier

In [107]:
# Generate the performance mertics of the Logistic Regression Classifier model
evaluation(evaluation_df, lr_backtest_cumulative)

Unnamed: 0,Backtest
Annualized Returns,0.248307
Cumulative Returns,1.372771
Annualized Volatility,0.282538
Sharpe Ratio,0.878845
Sortino Ratio,0.96732


#### LSTM

In [108]:
# Generate the performance mertics of the LSTM model
evaluation(evaluation_df, lr_backtest_cumulative)

Unnamed: 0,Backtest
Annualized Returns,0.248307
Cumulative Returns,1.372771
Annualized Volatility,0.282538
Sharpe Ratio,0.878845
Sortino Ratio,0.96732


## Post trade performance analysis of the strategies


In [111]:
# Create a function to obtain the records of the trades with the respective models
def performance(df, model:str):
    # Create the dataframe specifying the features of the trades along with its profit and loss
    performance_data = []


    for index, row in df.iterrows():
        if row['Entry/Exit'] == 1:
            entry_date = index
            entry_share_price = row['Close']
            share_size = abs(row['Portfolio_Total'] / row['Close'])
            entry_portfolio_holdings = row['Close'] * (abs(row['Portfolio_Total'] / row['Close']))

        elif row['Entry/Exit'] == -1 and entry_date is not None:
            exit_date = index
            exit_share_price = row['Close']
            share_size = abs(row['Portfolio_Total'] / row['Close'])
            exit_portfolio_holdings = row['Close'] * (abs(row['Portfolio_Total'] / row['Close']))
            profit_loss = exit_portfolio_holdings - entry_portfolio_holdings

            performance_data.append({
                'Stock': 'PLTR',
                'Entry Date': entry_date,
                'Exit Date': exit_date,
                'Entry Price': entry_share_price,
                'Exit Price': exit_share_price,
                'Shares': share_size,
                'Entry Portfolio Holding': entry_portfolio_holdings,
                'Exit Portfolio Holding': exit_portfolio_holdings,
                'Profit/Loss': profit_loss
            })



    performance_data_df = pd.DataFrame(performance_data)
    print(f'\n \nPerformance of {model} : ')
    
    return performance_data_df

In [112]:
for i in range(len(model_name)):
    display(performance(model_pred[i], model_name[i]))


 
Performance of Gradient Boosting Classifier : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 09:30:00-05:00,2024-12-27 14:30:00-05:00,61.64,79.28,1610.046691,99243.278055,127644.501691,28401.223635
1,PLTR,2024-12-27 15:30:00-05:00,2024-12-31 09:30:00-05:00,79.13,77.36,1613.09872,127644.501691,124789.316957,-2855.184734
2,PLTR,2025-01-02 09:30:00-05:00,2025-03-26 11:30:00-04:00,74.96,92.74,1664.745424,124789.316957,154388.490589,29599.173632
3,PLTR,2025-03-26 14:30:00-04:00,2025-05-12 11:30:00-04:00,91.21,118.27,1692.670657,154388.490589,200192.158557,45803.667968
4,PLTR,2025-05-13 09:30:00-04:00,2025-05-15 15:30:00-04:00,123.33,128.08,1623.223535,200192.158557,207902.470348,7710.311791
5,PLTR,2025-05-16 09:30:00-04:00,2025-05-16 10:30:00-04:00,128.34,128.48,1619.935097,207902.470348,208129.261261,226.790914
6,PLTR,2025-05-16 11:30:00-04:00,2025-05-19 10:30:00-04:00,128.24,124.9,1622.966791,208129.261261,202708.55218,-5420.709082
7,PLTR,2025-05-20 09:30:00-04:00,2025-05-20 10:30:00-04:00,126.0,125.85,1608.798033,202708.55218,202467.232475,-241.319705
8,PLTR,2025-05-21 11:30:00-04:00,2025-05-21 12:30:00-04:00,126.57,121.59,1599.646302,202467.232475,194500.99389,-7966.238585
9,PLTR,2025-05-21 14:30:00-04:00,2025-05-23 13:30:00-04:00,120.19,124.6,1618.27934,194500.99389,201637.605779,7136.61189



 
Performance of Supported Vector Machine : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 09:30:00-05:00,2025-01-02 12:30:00-05:00,61.64,73.82,1610.046691,99243.278055,118853.646756,19610.368701
1,PLTR,2025-01-02 13:30:00-05:00,2025-01-07 10:30:00-05:00,74.32,72.32,1599.214838,118853.646756,115655.21708,-3198.429676
2,PLTR,2025-01-07 11:30:00-05:00,2025-01-07 12:30:00-05:00,70.93,71.23,1630.554308,115655.21708,116144.383372,489.166292
3,PLTR,2025-01-14 09:30:00-05:00,2025-01-14 11:30:00-05:00,67.22,66.07,1727.824805,116144.383372,114157.384847,-1986.998525
4,PLTR,2025-01-14 13:30:00-05:00,2025-01-14 15:30:00-05:00,66.53,65.91,1715.878323,114157.384847,113093.540286,-1063.84456
5,PLTR,2025-01-15 09:30:00-05:00,2025-02-20 09:30:00-05:00,68.45,100.69,1652.206578,113093.540286,166360.680372,53267.140085
6,PLTR,2025-02-20 14:30:00-05:00,2025-02-21 15:30:00-05:00,107.08,101.33,1553.611135,166360.680372,157427.416344,-8933.264028
7,PLTR,2025-03-03 09:30:00-05:00,2025-03-03 15:30:00-05:00,88.88,83.51,1771.235557,157427.416344,147915.8814,-9511.534943
8,PLTR,2025-03-04 12:30:00-05:00,2025-03-06 15:30:00-05:00,83.14,80.4,1779.118131,147915.8814,143041.097722,-4874.783679
9,PLTR,2025-03-07 09:30:00-05:00,2025-03-10 10:30:00-04:00,81.99,79.46,1744.616389,143041.097722,138627.218258,-4413.879464



 
Performance of Logistic Regression Classifier : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 09:30:00-05:00,2024-11-21 15:30:00-05:00,61.64,61.35,1610.046691,99243.278055,98776.364515,-466.91354
1,PLTR,2024-11-22 09:30:00-05:00,2024-11-27 11:30:00-05:00,62.21,64.54,1587.789174,98776.364515,102475.91329,3699.548775
2,PLTR,2024-11-27 12:30:00-05:00,2024-12-09 09:30:00-05:00,65.27,75.92,1570.030846,102475.91329,119196.741795,16720.828505
3,PLTR,2024-12-09 10:30:00-05:00,2024-12-27 13:30:00-05:00,72.66,78.79,1640.472637,119196.741795,129252.839059,10056.097264
4,PLTR,2025-01-02 09:30:00-05:00,2025-01-02 10:30:00-05:00,74.96,74.96,1724.290809,129252.839059,129252.839059,0.0
5,PLTR,2025-01-03 09:30:00-05:00,2025-01-07 11:30:00-05:00,77.85,70.93,1660.280527,129252.839059,117763.69781,-11489.14125
6,PLTR,2025-01-07 14:30:00-05:00,2025-01-07 15:30:00-05:00,69.97,70.0,1683.059852,117763.69781,117814.189605,50.491796
7,PLTR,2025-01-08 09:30:00-05:00,2025-01-08 10:30:00-05:00,67.82,67.27,1737.159976,117814.189605,116858.751618,-955.437987
8,PLTR,2025-01-08 12:30:00-05:00,2025-01-08 14:30:00-05:00,68.5,68.31,1705.967177,116858.751618,116534.617854,-324.133764
9,PLTR,2025-01-10 09:30:00-05:00,2025-01-10 10:30:00-05:00,66.17,65.32,1761.139759,116534.617854,115037.649059,-1496.968795



 
Performance of LSTM : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 09:30:00-05:00,2024-11-21 15:30:00-05:00,61.64,61.35,1610.046691,99243.278055,98776.364515,-466.91354
1,PLTR,2024-11-22 09:30:00-05:00,2024-11-27 11:30:00-05:00,62.21,64.54,1587.789174,98776.364515,102475.91329,3699.548775
2,PLTR,2024-11-27 12:30:00-05:00,2024-12-09 09:30:00-05:00,65.27,75.92,1570.030846,102475.91329,119196.741795,16720.828505
3,PLTR,2024-12-09 10:30:00-05:00,2024-12-27 13:30:00-05:00,72.66,78.79,1640.472637,119196.741795,129252.839059,10056.097264
4,PLTR,2025-01-02 09:30:00-05:00,2025-01-02 10:30:00-05:00,74.96,74.96,1724.290809,129252.839059,129252.839059,0.0
5,PLTR,2025-01-03 09:30:00-05:00,2025-01-07 11:30:00-05:00,77.85,70.93,1660.280527,129252.839059,117763.69781,-11489.14125
6,PLTR,2025-01-07 14:30:00-05:00,2025-01-07 15:30:00-05:00,69.97,70.0,1683.059852,117763.69781,117814.189605,50.491796
7,PLTR,2025-01-08 09:30:00-05:00,2025-01-08 10:30:00-05:00,67.82,67.27,1737.159976,117814.189605,116858.751618,-955.437987
8,PLTR,2025-01-08 12:30:00-05:00,2025-01-08 14:30:00-05:00,68.5,68.31,1705.967177,116858.751618,116534.617854,-324.133764
9,PLTR,2025-01-10 09:30:00-05:00,2025-01-10 10:30:00-05:00,66.17,65.32,1761.139759,116534.617854,115037.649059,-1496.968795
