In [1]:
# import libraries
import pandas as pd
import yfinance as yf
import hvplot.pandas
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout

import warnings
warnings.filterwarnings('ignore')

In [2]:
# download the historical prices of pltr
pltr_df = yf.download(tickers = 'pltr', period='2Y', interval = '1h')
pltr_df

# Remove multilevels of the column headers
pltr_df = pltr_df.droplevel(level = 1, axis = 1)

# Remove the name 'Price' from the headers
pltr_df.columns.name = None

pltr_df

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-03 13:30:00+00:00,15.319300,15.590000,15.220000,15.490000,12379571
2023-07-03 14:30:00+00:00,15.405000,15.420000,15.190000,15.310000,5627704
2023-07-03 15:30:00+00:00,15.470000,15.470000,15.340000,15.407100,3471561
2023-07-05 13:30:00+00:00,15.425000,15.570000,15.230000,15.436700,9888941
2023-07-05 14:30:00+00:00,15.345000,15.580000,15.330000,15.430000,6028562
...,...,...,...,...,...
2025-07-02 15:30:00+00:00,132.240005,132.320007,130.960007,131.490005,5249743
2025-07-02 16:30:00+00:00,132.660004,133.369995,132.110001,132.220001,6213849
2025-07-02 17:30:00+00:00,132.509995,133.300293,132.179993,132.670700,5675254
2025-07-02 18:30:00+00:00,132.169998,132.949997,131.639999,132.520004,5045756


In [3]:
# Round the values of the dataframe to 2 decimal points
signals_df = round(pltr_df[['Close', 'High', 'Low', 'Open', 'Volume']], 2)

# Change the index to the required timezone
signals_df.index = signals_df.index.tz_convert('US/Eastern')

# Display the data
signals_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-03 09:30:00-04:00,15.32,15.59,15.22,15.49,12379571
2023-07-03 10:30:00-04:00,15.40,15.42,15.19,15.31,5627704
2023-07-03 11:30:00-04:00,15.47,15.47,15.34,15.41,3471561
2023-07-05 09:30:00-04:00,15.43,15.57,15.23,15.44,9888941
2023-07-05 10:30:00-04:00,15.35,15.58,15.33,15.43,6028562
...,...,...,...,...,...
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254
2025-07-02 14:30:00-04:00,132.17,132.95,131.64,132.52,5045756


In [4]:
# visulise the closing price
signals_df['Close'].hvplot()

In [5]:
# Set the long and short windows
short_window = 20
long_window = 50

# Obtain the Exponential Moving Average of the Close prices with short and long windows
signals_df['EMA20_Close'] =  round(signals_df['Close'].ewm(span = short_window).mean(), 2)
signals_df['EMA50_Close'] =  round(signals_df['Close'].ewm(span = long_window).mean(), 2)

# Calculate Relative Strength Index (RSI)
delta = signals_df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)

avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

rs = avg_gain / avg_loss
signals_df['RSI'] = 100 - (100 / (1 + rs))

# Calculate the 14-day Average True Range (ATR) for volatility
high_low = signals_df['High'] - signals_df['Low']
high_close = (signals_df['High'] - signals_df['Close'].shift()).abs()
low_close = (signals_df['Low'] - signals_df['Close'].shift()).abs()
tr = pd.concat([high_low, high_close, low_close], axis=1)
signals_df['ATR'] = tr.max(axis=1).rolling(window=14).mean()

# Drop missing values
signals_df.dropna(inplace=True)

# view data
signals_df.head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-07-06 12:30:00-04:00,15.06,15.14,14.95,14.97,4736592,15.27,15.31,42.857143,0.274286
2023-07-06 13:30:00-04:00,15.13,15.16,15.01,15.05,5024314,15.25,15.29,44.973545,0.258571
2023-07-06 14:30:00-04:00,15.17,15.19,15.09,15.14,4433036,15.24,15.28,43.783784,0.249286
2023-07-06 15:30:00-04:00,15.13,15.18,15.1,15.17,4850850,15.23,15.27,40.659341,0.245714
2023-07-07 09:30:00-04:00,15.63,15.9,15.22,15.23,17897837,15.27,15.3,54.385965,0.276429


In [6]:
# Create signals
signals_df['Target'] = (signals_df['EMA20_Close'] > signals_df['EMA50_Close']).astype(int)

# Display data
signals_df.head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-07-06 12:30:00-04:00,15.06,15.14,14.95,14.97,4736592,15.27,15.31,42.857143,0.274286,0
2023-07-06 13:30:00-04:00,15.13,15.16,15.01,15.05,5024314,15.25,15.29,44.973545,0.258571,0
2023-07-06 14:30:00-04:00,15.17,15.19,15.09,15.14,4433036,15.24,15.28,43.783784,0.249286,0
2023-07-06 15:30:00-04:00,15.13,15.18,15.1,15.17,4850850,15.23,15.27,40.659341,0.245714,0
2023-07-07 09:30:00-04:00,15.63,15.9,15.22,15.23,17897837,15.27,15.3,54.385965,0.276429,0


## Machine Learning

In [7]:
# Define the datasets X and y for machine learing
X = signals_df.drop(columns = ['Target'], axis = 1)
y = signals_df['Target']


# Split the data into test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, shuffle = False, random_state = 2)

In [8]:
# Scale the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Conduct PCA transformation
pca = PCA(n_components= 5)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [9]:
# Check if the training data label classes are balanced 
y_train.value_counts()

Target
1    1395
0    1041
Name: count, dtype: int64

### Gradienct Boosting Classifier

In [10]:
# initiate the model
gb_model = GradientBoostingClassifier(n_estimators = 300,
                                      max_depth = 3,
                                      learning_rate = .1,
                                      random_state = 1)

# fit the model with the training data
gb_model.fit(X_train_pca, y_train)


# Obtain the prediction with the model
y_pred_gb = gb_model.predict(X_test_pca)

# Generate the Classification report
print('Gradient Boositing Classifier',classification_report(y_test, y_pred_gb))

Gradient Boositing Classifier               precision    recall  f1-score   support

           0       0.23      0.10      0.13       315
           1       0.69      0.86      0.76       730

    accuracy                           0.63      1045
   macro avg       0.46      0.48      0.45      1045
weighted avg       0.55      0.63      0.57      1045



### Supported Vector Machine

In [11]:
# Initialize and fit the data to SVC
svm_model = SVC(kernel = 'linear', gamma = 'auto', C=1.0)
svm_model.fit(X_train_pca, y_train)


# make predictions with the model
y_pred_svm = svm_model.predict(X_test_pca)

# Obtain the classification report of predictions against the test data
print('Supported Vector Machine \n',classification_report(y_test, y_pred_svm))

Supported Vector Machine 
               precision    recall  f1-score   support

           0       0.92      0.32      0.47       315
           1       0.77      0.99      0.87       730

    accuracy                           0.79      1045
   macro avg       0.84      0.65      0.67      1045
weighted avg       0.81      0.79      0.75      1045



### Logistic Regression

In [12]:
# Initiate and fit the data to logistic regression model
lr_model = LogisticRegression(solver = 'liblinear')
lr_model.fit(X_train_pca, y_train)

# Make predictions with the model
y_pred_lr = lr_model.predict(X_test_pca)

# Obtain the classification report of the predictions against the test data
print('Logistic Regression\n',classification_report(y_test, y_pred_lr))

Logistic Regression
               precision    recall  f1-score   support

           0       0.95      0.17      0.29       315
           1       0.74      1.00      0.85       730

    accuracy                           0.75      1045
   macro avg       0.84      0.59      0.57      1045
weighted avg       0.80      0.75      0.68      1045



### LSTM

In [13]:
# Reshape input to 3D [samples, time_steps, features] to appropriately fir the data into the LSTM model
X_train_reshaped = np.reshape(X_train_pca, (X_train_pca.shape[0], X_train_pca.shape[1], 1))

# Initialized LSTM Classifier
lstm = Sequential()

# Creating the first hidden layer
lstm.add(LSTM(units = 70, return_sequences = True, input_shape = (X_train_pca.shape[1],1)))
lstm.add(Dropout(0.2))

# Creating the second hidden layer
lstm.add(LSTM(units = 60, return_sequences = True, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the third hidden layer
lstm.add(LSTM(units = 60, return_sequences = True, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the fourth hidden layer
lstm.add(LSTM(units = 60, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the output layer to the model 
lstm.add(Dense(units = 1, activation= 'sigmoid'))

In [14]:
# Obtain the LSTM model symmary
lstm.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 70)             20160     
                                                                 
 dropout (Dropout)           (None, 5, 70)             0         
                                                                 
 lstm_1 (LSTM)               (None, 5, 60)             31440     
                                                                 
 dropout_1 (Dropout)         (None, 5, 60)             0         
                                                                 
 lstm_2 (LSTM)               (None, 5, 60)             29040     
                                                                 
 dropout_2 (Dropout)         (None, 5, 60)             0         
                                                                 
 lstm_3 (LSTM)               (None, 60)                2

In [15]:
# Compile the model
lstm.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])

In [16]:
# Fit the model
lstm.fit(X_train_reshaped, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x22ea334aec0>

In [17]:
# Make predictions
y_pred = lstm.predict(X_test_pca)

# Obtain the labels in the required format
y_pred_lstm = np.where(y_pred > y_pred.mean(), 1, 0).ravel()



In [18]:
# Generate the classification report of the predictions against the test set
print('LSTM',classification_report(y_test, y_pred_lstm))

LSTM               precision    recall  f1-score   support

           0       0.36      0.36      0.36       315
           1       0.72      0.72      0.72       730

    accuracy                           0.61      1045
   macro avg       0.54      0.54      0.54      1045
weighted avg       0.61      0.61      0.61      1045



## Backtesting

In [19]:
# Create function to accept the validation input data and model predictions
# Generate a data frome through the function with the corresponding returns based on the model predictions entered
def backtest_df(df, pred):
    pred = np.roll(pred, 1)
    pred[0] = 0
    capital, position = 100000, 0
    portfolio = []
    
    for i in range(len(df)):
        close_price = df.iloc[i]['Close']
        if pred[i] == 1 and position == 0:
            position = capital / close_price
            capital = 0
            
        elif pred[i] == 0 and position != 0:
            capital = position * close_price
            position = 0
            
        portfolio_value = capital + position * close_price
        portfolio.append(portfolio_value)
        
    return pd.DataFrame({'Model_Predictions': pred, 'Portfolio_Total': portfolio}, index=df.index)

In [20]:
# Function to generate dataframe with the validation input and cumulative returns and its plot based on the model predictions 
def cumualtive_returns(portfolio_df, X_test):
    
    # Merge
    merged_df = pd.concat([X_test, portfolio_df], axis = 1)
    
    # Obtain the Daily returns of the portfolio and clean the data
    merged_df['Portfolio_Returns'] = merged_df['Portfolio_Total'].pct_change()
    merged_df.dropna(inplace = True)
    
    # Calculate cumualtive returns at every hour
    merged_df['Cumulative_Returns'] = (1 + merged_df['Portfolio_Returns']).cumprod() - 1
    
    return merged_df

##### Gradient Boosting Classifier

In [21]:
# Conduct backtest with the function created
gb_backtest = backtest_df(X_test, y_pred_gb)

# Obtain the cumulateive returns the backtesting algorithm generates
gb_backtest_cumulative = cumualtive_returns(gb_backtest, X_test)
gb_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,1,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,101023.059435,0.014018,0.010231
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,102858.070802,0.018164,0.028581
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,103994.803508,0.011051,0.039948
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,0,197704.492650,0.005627,0.977045
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,0,197704.492650,0.000000,0.977045
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,0,197704.492650,0.000000,0.977045
2025-07-02 14:30:00-04:00,132.17,132.95,131.64,132.52,5045756,133.37,136.10,37.064071,1.834286,0,197704.492650,0.000000,0.977045


In [22]:
# Plot the cumulative returns of the model from the backtesting
gb_backtest_cumulative['Cumulative_Returns'].hvplot()

##### Supported Vector Machine

In [23]:
# Conduct backtest with the function created
svm_backtest = backtest_df(X_test, y_pred_svm)

# Obtain the cumulateive returns the backtesting algorithm generates
svm_backtest_cumulative = cumualtive_returns(svm_backtest, X_test)
svm_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,1,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,101023.059435,0.014018,0.010231
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,102858.070802,0.018164,0.028581
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,103994.803508,0.011051,0.039948
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,1,207517.793175,0.005627,1.075178
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,1,208176.878725,0.003176,1.081769
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,1,207941.491029,-0.001131,1.079415
2025-07-02 14:30:00-04:00,132.17,132.95,131.64,132.52,5045756,133.37,136.10,37.064071,1.834286,1,207407.945584,-0.002566,1.074079


In [24]:
# Plot the cumulative returns of the model from the backtesting
svm_backtest_cumulative['Cumulative_Returns'].hvplot()

##### Logistic Regression

In [25]:
# Conduct backtest with the function created
lr_backtest = backtest_df(X_test, y_pred_lr)

# Obtain the cumulateive returns the backtesting algorithm generates
lr_backtest_cumulative = cumualtive_returns(lr_backtest, X_test)
lr_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,1,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,101023.059435,0.014018,0.010231
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,102858.070802,0.018164,0.028581
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,103994.803508,0.011051,0.039948
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,1,184560.866567,0.005627,0.845609
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,1,185147.039918,0.003176,0.851470
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,1,184937.692293,-0.001131,0.849377
2025-07-02 14:30:00-04:00,132.17,132.95,131.64,132.52,5045756,133.37,136.10,37.064071,1.834286,1,184463.171009,-0.002566,0.844632


In [26]:
# Plot the cumulative returns of the model from the backtesting
lr_backtest_cumulative['Cumulative_Returns'].hvplot()

##### LSTM

In [27]:
# Conduct backtest with the function created
lstm_backtest = backtest_df(X_test, y_pred_lstm)

# Obtain the cumulateive returns the backtesting algorithm generates
lstm_backtest_cumulative = cumualtive_returns(lstm_backtest, X_test)
lstm_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,1,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,101023.059435,0.014018,0.010231
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,102858.070802,0.018164,0.028581
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,103994.803508,0.011051,0.039948
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,0,148876.746682,0.000000,0.488767
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,0,148876.746682,0.000000,0.488767
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,0,148876.746682,0.000000,0.488767
2025-07-02 14:30:00-04:00,132.17,132.95,131.64,132.52,5045756,133.37,136.10,37.064071,1.834286,0,148876.746682,0.000000,0.488767


In [28]:
# Plot the cumulative returns of the model from the backtesting
lstm_backtest_cumulative['Cumulative_Returns'].hvplot()

#### Plot the entry and exit points the model generated on their respective cumulative returns

In [29]:
# function to plot entry and exit
def plot_entry_exit(df, model:str):
    # Obtain exit and entry points (1 to buy, -1 to sell and 0 to hold)
    df['Entry/Exit'] = df['Model_Predictions'].diff()
    df['Entry/Exit'][0] = df['Model_Predictions'][0]
    df['Entry/Exit'] = df['Entry/Exit'].astype(int)
    df[['Model_Predictions', 'Entry/Exit']]
    

    # Plot the points to sell on the graph
    exit = df[df['Entry/Exit'] == -1]['Portfolio_Total'].hvplot.scatter(color = 'red',
                                           marker = 'v',
                                           legend = False,
                                           width = 1000,
                                           height = 500,
                                           size = 200, ylabel = 'Price in $')

    # Plot the points to buy on the graph
    entry = df[df['Entry/Exit'] == 1]['Portfolio_Total'].hvplot.scatter(color = 'green',
                                           marker = '^',
                                           legend = False,
                                           width = 1000,
                                           height = 500,
                                           size = 200, ylabel = 'Price in $')

    


    # plot cumulative returns through the model
    portfolio_price_chart = df['Portfolio_Total'].hvplot(color = 'lightgray')

    entry_exit_chart = portfolio_price_chart * entry * exit 

    entry_exit_chart.opts(title = f'Entry Exit Plot on the Portfolio Total with the {model} model',
                         height = 500,
                         width = 1000)

    return entry_exit_chart

In [30]:
# Make the list of dataframes with the cumulative returns of each model
model_pred = [gb_backtest_cumulative, svm_backtest_cumulative, lr_backtest_cumulative, lstm_backtest_cumulative]

# List of names of the respective models
model_name = ['Gradient Boosting Classifier', 
             'Supported Vector Machine',
             'Logistic Regression Classifier',
             'LSTM']

# Loop throught the lists to pass them as arguments in to the function to generate plots
for i in range(len(model_name)):
    display(plot_entry_exit(model_pred[i], model_name[i]))

## Calculating Metrics - Post trade performance analysis of the strategy and models

#### Profit and Loss of the trades executed

In [31]:
# Create a function to obtain the records of the trades with the respective models
def performance(df, model:str):
    # Create the dataframe specifying the features of the trades along with its profit and loss
    performance_data = []
    
    # Initialize entry_date as None
    entry_date = None

    for index, row in df.iterrows():
        if row['Entry/Exit'] == 1:
            entry_date = index
            entry_share_price = row['Close']
            share_size = abs(row['Portfolio_Total'] / row['Close'])
            entry_portfolio_holdings = row['Close'] * (abs(row['Portfolio_Total'] / row['Close']))

        elif row['Entry/Exit'] == -1 and entry_date is not None:
            exit_date = index
            exit_share_price = row['Close']
            share_size = abs(row['Portfolio_Total'] / row['Close'])
            exit_portfolio_holdings = row['Close'] * (abs(row['Portfolio_Total'] / row['Close']))
            profit_loss = exit_portfolio_holdings - entry_portfolio_holdings

            performance_data.append({
                'Stock': 'PLTR',
                'Entry Date': entry_date,
                'Exit Date': exit_date,
                'Entry Price': entry_share_price,
                'Exit Price': exit_share_price,
                'Shares': share_size,
                'Entry Portfolio Holding': entry_portfolio_holdings,
                'Exit Portfolio Holding': exit_portfolio_holdings,
                'Profit/Loss': profit_loss
            })



    performance_data_df = pd.DataFrame(performance_data)
    print(f'\n \nPerformance of {model} : ')
    
    return performance_data_df

In [32]:
# Loop through the list of model names and generate the performance of each model
for i in range(len(model_name)):
    display(performance(model_pred[i], model_name[i]))


 
Performance of Gradient Boosting Classifier : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2024-12-27 15:30:00-05:00,61.58,79.13,1623.903865,100000.0,128499.512829,28499.512829
1,PLTR,2024-12-30 10:30:00-05:00,2024-12-30 15:30:00-05:00,78.16,77.13,1644.057227,128499.512829,126806.133885,-1693.378943
2,PLTR,2024-12-31 10:30:00-05:00,2024-12-31 11:30:00-05:00,76.81,76.42,1650.906573,126806.133885,126162.280322,-643.853564
3,PLTR,2025-01-02 10:30:00-05:00,2025-03-26 13:30:00-04:00,74.96,92.06,1683.06137,126162.280322,154942.629755,28780.349433
4,PLTR,2025-03-26 15:30:00-04:00,2025-03-27 09:30:00-04:00,92.25,92.36,1679.594902,154942.629755,155127.385194,184.755439
5,PLTR,2025-03-27 10:30:00-04:00,2025-05-12 09:30:00-04:00,92.63,115.65,1674.699182,155127.385194,193678.960355,38551.575161
6,PLTR,2025-05-12 10:30:00-04:00,2025-05-12 12:30:00-04:00,118.43,117.64,1635.387658,193678.960355,192387.004105,-1291.95625
7,PLTR,2025-05-13 10:30:00-04:00,2025-05-16 09:30:00-04:00,126.59,128.34,1519.764627,192387.004105,195046.592202,2659.588097
8,PLTR,2025-05-16 10:30:00-04:00,2025-05-19 11:30:00-04:00,128.48,125.39,1518.108594,195046.592202,190355.636646,-4690.955557
9,PLTR,2025-05-20 10:30:00-04:00,2025-05-20 11:30:00-04:00,125.85,126.21,1512.559687,190355.636646,190900.158133,544.521487



 
Performance of Supported Vector Machine : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2025-01-02 13:30:00-05:00,61.58,74.32,1623.903865,100000.0,120688.535239,20688.535239
1,PLTR,2025-01-02 14:30:00-05:00,2025-01-07 11:30:00-05:00,74.85,70.93,1612.40528,120688.535239,114367.90654,-6320.628699
2,PLTR,2025-01-07 12:30:00-05:00,2025-01-07 13:30:00-05:00,71.23,70.97,1605.614299,114367.90654,113950.446822,-417.459718
3,PLTR,2025-01-14 10:30:00-05:00,2025-01-14 12:30:00-05:00,66.16,66.27,1722.346536,113950.446822,114139.904941,189.458119
4,PLTR,2025-01-14 14:30:00-05:00,2025-01-15 09:30:00-05:00,65.56,68.45,1740.99916,114139.904941,119171.392514,5031.487573
5,PLTR,2025-01-15 10:30:00-05:00,2025-02-20 10:30:00-05:00,67.67,100.18,1761.066832,119171.392514,176423.675218,57252.282705
6,PLTR,2025-02-20 15:30:00-05:00,2025-02-24 09:30:00-05:00,106.26,89.68,1660.301856,176423.675218,148895.870446,-27527.804772
7,PLTR,2025-03-03 10:30:00-05:00,2025-03-04 09:30:00-05:00,87.98,80.77,1692.38316,148895.870446,136693.78786,-12202.082586
8,PLTR,2025-03-04 13:30:00-05:00,2025-03-07 09:30:00-05:00,85.85,81.99,1592.239812,136693.78786,130547.742185,-6146.045674
9,PLTR,2025-03-07 10:30:00-05:00,2025-03-10 11:30:00-04:00,80.24,78.74,1626.96588,130547.742185,128107.293366,-2440.44882



 
Performance of Logistic Regression Classifier : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2025-01-07 11:30:00-05:00,61.58,70.93,1623.903865,100000.0,115183.501137,15183.501137
1,PLTR,2025-01-07 12:30:00-05:00,2025-01-08 10:30:00-05:00,71.23,67.27,1617.064455,115183.501137,108779.925895,-6403.575242
2,PLTR,2025-01-08 14:30:00-05:00,2025-01-10 10:30:00-05:00,68.31,65.32,1592.445116,108779.925895,104018.514997,-4761.410898
3,PLTR,2025-01-10 14:30:00-05:00,2025-01-13 10:30:00-05:00,67.15,64.54,1549.047133,104018.514997,99975.501979,-4043.013018
4,PLTR,2025-01-14 10:30:00-05:00,2025-02-20 10:30:00-05:00,66.16,100.18,1511.117019,99975.501979,151383.702966,51408.200987
5,PLTR,2025-02-20 13:30:00-05:00,2025-02-24 10:30:00-05:00,103.78,93.51,1458.698236,151383.702966,136402.872079,-14980.830887
6,PLTR,2025-02-27 10:30:00-05:00,2025-02-27 15:30:00-05:00,90.14,84.77,1513.233549,136402.872079,128276.807923,-8126.064156
7,PLTR,2025-03-03 09:30:00-05:00,2025-03-04 10:30:00-05:00,88.88,80.34,1443.258415,128276.807923,115951.381059,-12325.426864
8,PLTR,2025-03-04 12:30:00-05:00,2025-03-10 14:30:00-04:00,83.14,76.12,1394.652166,115951.381059,106160.922856,-9790.458203
9,PLTR,2025-03-11 09:30:00-04:00,2025-03-31 10:30:00-04:00,75.87,82.76,1399.247698,106160.922856,115801.739496,9640.81664



 
Performance of LSTM : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2024-12-09 10:30:00-05:00,61.58,72.66,1623.903865,100000.0,117992.854823,17992.854823
1,PLTR,2024-12-09 11:30:00-05:00,2024-12-27 13:30:00-05:00,72.54,78.79,1626.590224,117992.854823,128159.043721,10166.188898
2,PLTR,2025-01-02 10:30:00-05:00,2025-01-02 11:30:00-05:00,74.96,74.23,1709.699089,128159.043721,126910.963386,-1248.080335
3,PLTR,2025-01-03 10:30:00-05:00,2025-01-07 11:30:00-05:00,79.08,70.93,1604.842734,126910.963386,113831.495106,-13079.46828
4,PLTR,2025-01-08 10:30:00-05:00,2025-01-08 11:30:00-05:00,67.27,66.61,1692.158393,113831.495106,112714.670566,-1116.824539
5,PLTR,2025-01-10 10:30:00-05:00,2025-01-10 11:30:00-05:00,65.32,66.47,1725.576708,112714.670566,114699.08378,1984.413214
6,PLTR,2025-01-10 13:30:00-05:00,2025-01-13 13:30:00-05:00,67.43,63.9,1701.009696,114699.08378,108694.519555,-6004.564226
7,PLTR,2025-01-14 09:30:00-05:00,2025-01-27 13:30:00-05:00,67.22,74.37,1616.996721,108694.519555,120256.046106,11561.526552
8,PLTR,2025-01-27 14:30:00-05:00,2025-02-11 11:30:00-05:00,74.2,114.14,1620.70143,120256.046106,184986.861221,64730.815114
9,PLTR,2025-02-12 10:30:00-05:00,2025-02-12 11:30:00-05:00,115.29,115.76,1604.535183,184986.861221,185740.992757,754.131536


In [33]:
# Create a function to calculate sortino ratio
def sortino(df):
    # Convert the returns to numneric and drop 'NaN's for calulation
    daily_returns = pd.to_numeric(df['Portfolio_Returns']).dropna()


    # Calculate average daily return
    average_return = daily_returns.mean()

    # Calculate downside returns (only negative returns)
    downside_returns = daily_returns[daily_returns < 0]

    # Calculate the downside standard deviation
    downside_std = downside_returns.std()

    # Calculate the Sortino Ratio
    sortino_ratio = (average_return / downside_std) * np.sqrt(252)
    
    
    return sortino_ratio

In [45]:
# Create funciton to calculate the metrics in the dataframe format
def evaluation(df):
    
    # Create dataframe to record the perfoemance metrics
    metics = ['Annualized Returns',
              'Cumulative Returns',
              'Annualized Volatility',
              'Sharpe Ratio',
              'Sortino Ratio']

    eval_df = pd.DataFrame(columns = ['Backtest'], index = metics)
    
    eval_df.loc['Annualized Returns'] = df['Portfolio_Returns'].mean() * 252
    eval_df.loc['Cumulative Returns'] = df['Cumulative_Returns'][-1]
    eval_df.loc['Annualized Volatility'] = df['Portfolio_Returns'].std() * np.sqrt(252)
    eval_df.loc['Sharpe Ratio'] = (df['Portfolio_Returns'].mean() * 252) / (df['Portfolio_Returns'].std() * np.sqrt(252))
    eval_df.loc['Sortino Ratio'] = sortino(df)
    
    
    return eval_df

In [35]:
# Create dataframe to record the perfoemance metrics
metics = ['Annualized Returns',
          'Cumulative Returns',
          'Annualized Volatility',
          'Sharpe Ratio',
          'Sortino Ratio']

evaluation_df = pd.DataFrame(columns = ['Backtest'], index = metics)
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,
Cumulative Returns,
Annualized Volatility,
Sharpe Ratio,
Sortino Ratio,


##### Gradient Boosting Classifier

In [46]:
# Generate the performance mertics of the Gradient Boosting Classifier model
gb_backtest = evaluation(gb_backtest_cumulative)
gb_backtest

Unnamed: 0,Backtest
Annualized Returns,0.20887
Cumulative Returns,0.977045
Annualized Volatility,0.299601
Sharpe Ratio,0.69716
Sortino Ratio,0.826354


##### Supported Vector Machine

In [49]:
# Generate the performance mertics of the Supported Vector Machine model
svm_backtest = evaluation(svm_backtest_cumulative)
svm_backtest

Unnamed: 0,Backtest
Annualized Returns,0.218922
Cumulative Returns,1.073609
Annualized Volatility,0.294688
Sharpe Ratio,0.742895
Sortino Ratio,0.886705


##### Logistic Regression Classifier

In [50]:
# Generate the performance mertics of the Logistic Regression Classifier model
lr_backtest = evaluation(lr_backtest_cumulative)
lr_backtest

Unnamed: 0,Backtest
Annualized Returns,0.192978
Cumulative Returns,0.844213
Annualized Volatility,0.302611
Sharpe Ratio,0.637711
Sortino Ratio,0.78507


##### LSTM

In [51]:
# Generate the performance mertics of the LSTM model
lstm_backtest = evaluation(lstm_backtest_cumulative)
lstm_backtest

Unnamed: 0,Backtest
Annualized Returns,0.134146
Cumulative Returns,0.488767
Annualized Volatility,0.277866
Sharpe Ratio,0.482773
Sortino Ratio,0.500987


In [55]:
# Concatinate the metrics of all models
consolidated_metrics_df = pd.concat([gb_backtest, svm_backtest, lr_backtest, lstm_backtest], axis = 1)

# Rename columns
consolidated_metrics_df.columns = ['Gradient Boosting', 'Support Vector Machine', 'Logistic Regression', 'LSTM']

# Display the final result
consolidated_metrics_df


Unnamed: 0,Gradient Boosting,Support Vector Machine,Logistic Regression,LSTM
Annualized Returns,0.20887,0.218922,0.192978,0.134146
Cumulative Returns,0.977045,1.073609,0.844213,0.488767
Annualized Volatility,0.299601,0.294688,0.302611,0.277866
Sharpe Ratio,0.69716,0.742895,0.637711,0.482773
Sortino Ratio,0.826354,0.886705,0.78507,0.500987
