In [1]:
# import libraries
import pandas as pd
import yfinance as yf
import hvplot.pandas
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout

import warnings
warnings.filterwarnings('ignore')

In [2]:
# download the historical prices of pltr
pltr_df = yf.download(tickers = 'pltr', period='2Y', interval = '1h')
pltr_df

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,PLTR,PLTR,PLTR,PLTR,PLTR
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2023-07-03 13:30:00+00:00,15.319300,15.590000,15.220000,15.490000,12379571
2023-07-03 14:30:00+00:00,15.405000,15.420000,15.190000,15.310000,5627704
2023-07-03 15:30:00+00:00,15.470000,15.470000,15.340000,15.407100,3471561
2023-07-05 13:30:00+00:00,15.425000,15.570000,15.230000,15.436700,9888941
2023-07-05 14:30:00+00:00,15.345000,15.580000,15.330000,15.430000,6028562
...,...,...,...,...,...
2025-07-02 14:30:00+00:00,131.496307,132.520004,131.350006,131.850006,8051590
2025-07-02 15:30:00+00:00,132.240005,132.320007,130.960007,131.490005,5249743
2025-07-02 16:30:00+00:00,132.660004,133.369995,132.110001,132.220001,6213849
2025-07-02 17:30:00+00:00,132.509995,133.300293,132.179993,132.670700,5675254


In [3]:
# Remove multilevels of the column headers
pltr_df = pltr_df.droplevel(level = 1, axis = 1)

# Remove the name 'Price' from the headers
pltr_df.columns.name = None

pltr_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-03 13:30:00+00:00,15.319300,15.590000,15.220000,15.490000,12379571
2023-07-03 14:30:00+00:00,15.405000,15.420000,15.190000,15.310000,5627704
2023-07-03 15:30:00+00:00,15.470000,15.470000,15.340000,15.407100,3471561
2023-07-05 13:30:00+00:00,15.425000,15.570000,15.230000,15.436700,9888941
2023-07-05 14:30:00+00:00,15.345000,15.580000,15.330000,15.430000,6028562
...,...,...,...,...,...
2025-07-02 14:30:00+00:00,131.496307,132.520004,131.350006,131.850006,8051590
2025-07-02 15:30:00+00:00,132.240005,132.320007,130.960007,131.490005,5249743
2025-07-02 16:30:00+00:00,132.660004,133.369995,132.110001,132.220001,6213849
2025-07-02 17:30:00+00:00,132.509995,133.300293,132.179993,132.670700,5675254


In [4]:
# Round the values of the dataframe to 2 decimal points
signals_df = round(pltr_df[['Close', 'High', 'Low', 'Open', 'Volume']], 2)

# Change the index to the required timezone
signals_df.index = signals_df.index.tz_convert('US/Eastern')

# Display the data
signals_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-03 09:30:00-04:00,15.32,15.59,15.22,15.49,12379571
2023-07-03 10:30:00-04:00,15.40,15.42,15.19,15.31,5627704
2023-07-03 11:30:00-04:00,15.47,15.47,15.34,15.41,3471561
2023-07-05 09:30:00-04:00,15.43,15.57,15.23,15.44,9888941
2023-07-05 10:30:00-04:00,15.35,15.58,15.33,15.43,6028562
...,...,...,...,...,...
2025-07-02 10:30:00-04:00,131.50,132.52,131.35,131.85,8051590
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254


In [5]:
# visulise the closing price
signals_df['Close'].hvplot()

In [6]:
# Set the long and short windows
short_window = 20
long_window = 50

# Obtain the Exponential Moving Average of the Close prices with short and long windows
signals_df['EMA20_Close'] =  round(signals_df['Close'].ewm(span = short_window).mean(), 2)
signals_df['EMA50_Close'] =  round(signals_df['Close'].ewm(span = long_window).mean(), 2)

# Calculate Relative Strength Index (RSI)
delta = signals_df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)

avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

rs = avg_gain / avg_loss
signals_df['RSI'] = 100 - (100 / (1 + rs))

# Calculate the 14-day Average True Range (ATR) for volatility
high_low = signals_df['High'] - signals_df['Low']
high_close = (signals_df['High'] - signals_df['Close'].shift()).abs()
low_close = (signals_df['Low'] - signals_df['Close'].shift()).abs()
tr = pd.concat([high_low, high_close, low_close], axis=1)
signals_df['ATR'] = tr.max(axis=1).rolling(window=14).mean()

# Drop missing values
signals_df.dropna(inplace=True)

# view data
signals_df.head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-07-06 12:30:00-04:00,15.06,15.14,14.95,14.97,4736592,15.27,15.31,42.857143,0.274286
2023-07-06 13:30:00-04:00,15.13,15.16,15.01,15.05,5024314,15.25,15.29,44.973545,0.258571
2023-07-06 14:30:00-04:00,15.17,15.19,15.09,15.14,4433036,15.24,15.28,43.783784,0.249286
2023-07-06 15:30:00-04:00,15.13,15.18,15.1,15.17,4850850,15.23,15.27,40.659341,0.245714
2023-07-07 09:30:00-04:00,15.63,15.9,15.22,15.23,17897837,15.27,15.3,54.385965,0.276429


In [7]:
# Create signals
signals_df['Target'] = (signals_df['EMA20_Close'] > signals_df['EMA50_Close']).astype(int)

# Display data
signals_df.head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-07-06 12:30:00-04:00,15.06,15.14,14.95,14.97,4736592,15.27,15.31,42.857143,0.274286,0
2023-07-06 13:30:00-04:00,15.13,15.16,15.01,15.05,5024314,15.25,15.29,44.973545,0.258571,0
2023-07-06 14:30:00-04:00,15.17,15.19,15.09,15.14,4433036,15.24,15.28,43.783784,0.249286,0
2023-07-06 15:30:00-04:00,15.13,15.18,15.1,15.17,4850850,15.23,15.27,40.659341,0.245714,0
2023-07-07 09:30:00-04:00,15.63,15.9,15.22,15.23,17897837,15.27,15.3,54.385965,0.276429,0


## Machine Learning

In [8]:
# Define the datasets X and y for machine learing
X = signals_df.drop(columns = ['Target'], axis = 1)
y = signals_df['Target']


# Split the data into test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, shuffle = False, random_state = 2)

In [9]:
# Scale the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Conduct PCA transformation
pca = PCA(n_components= 5)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [10]:
# Check if the training data label classes are balanced 
y_train.value_counts()

Target
1    1395
0    1041
Name: count, dtype: int64

### Gradienct Boosting Classifier

In [11]:
# initiate the model
gb_model = GradientBoostingClassifier(n_estimators = 300,
                                      max_depth = 5,
                                      learning_rate = .2,
                                      random_state = 1)

# fit the model with the training data
gb_model.fit(X_train_pca, y_train)


# Obtain the prediction with the model
y_pred_gb = gb_model.predict(X_test_pca)

# Generate the Classification report
print(classification_report(y_test, y_pred_gb))

              precision    recall  f1-score   support

           0       0.23      0.13      0.17       314
           1       0.69      0.81      0.74       730

    accuracy                           0.61      1044
   macro avg       0.46      0.47      0.46      1044
weighted avg       0.55      0.61      0.57      1044



### Supported Vector Machine

In [12]:
# Initialize and fit the data to SVC
svm_model = SVC(kernel = 'linear', gamma = 'auto', C=1.0)
svm_model.fit(X_train_pca, y_train)


# make predictions with the model
y_pred_svm = svm_model.predict(X_test_pca)

# Obtain the classification report of predictions against the test data
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

           0       0.92      0.32      0.47       314
           1       0.77      0.99      0.87       730

    accuracy                           0.79      1044
   macro avg       0.84      0.65      0.67      1044
weighted avg       0.82      0.79      0.75      1044



### Logistic Regression

In [13]:
# Initiate and fit the data to logistic regression model
lr_model = LogisticRegression(solver = 'liblinear')
lr_model.fit(X_train_pca, y_train)

# Make predictions with the model
y_pred_lr = lr_model.predict(X_test_pca)

# Obtain the classification report of the predictions against the test data
print(classification_report(y_test, y_pred_lr))

              precision    recall  f1-score   support

           0       0.95      0.18      0.30       314
           1       0.74      1.00      0.85       730

    accuracy                           0.75      1044
   macro avg       0.84      0.59      0.57      1044
weighted avg       0.80      0.75      0.68      1044



In [14]:
# Initialize and fit the data to RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators = 500, max_depth = 5, random_state = 2)
model_rf.fit(X_train_pca, y_train)

# Obtain the predictions with the model
y_pred_rf = model_rf.predict(X_test_pca)

# Generate the classification report of the predictions against the test set
print(classification_report(y_test, y_pred_rf))

NameError: name 'RandomForestClassifier' is not defined

In [None]:
print(Counter(y_pred_rf))

In [None]:
# Initialize and fit the data to RandomForestClassifier
model_ada = AdaBoostClassifier(n_estimators = 250, learning_rate = 0.5,  random_state = 10)
model_ada.fit(X_train_pca, y_train)

# Obtain the predictions with the model
y_pred_ada = model_ada.predict(X_test_pca)

# Generate the classification report of the predictions against the test set
print(classification_report(y_test, y_pred_ada))

In [15]:
print(Counter(y_pred_ada))

NameError: name 'Counter' is not defined

### LSTM

In [16]:
# Reshape input to 3D [samples, time_steps, features] to appropriately fir the data into the LSTM model
X_train_reshaped = np.reshape(X_train_pca, (X_train_pca.shape[0], X_train_pca.shape[1], 1))

# Initialized LSTM Classifier
lstm = Sequential()

# Creating the first hidden layer
lstm.add(LSTM(units = 70, return_sequences = True, input_shape = (X_train_pca.shape[1],1)))
lstm.add(Dropout(0.2))

# Creating the second hidden layer
lstm.add(LSTM(units = 60, return_sequences = True, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the third hidden layer
lstm.add(LSTM(units = 60, return_sequences = True, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the fourth hidden layer
lstm.add(LSTM(units = 60, activation = 'relu'))
lstm.add(Dropout(0.1))

# Creating the output layer to the model 
lstm.add(Dense(units = 1, activation= 'sigmoid'))

In [17]:
# Obtain the LSTM model symmary
lstm.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 70)             20160     
                                                                 
 dropout (Dropout)           (None, 5, 70)             0         
                                                                 
 lstm_1 (LSTM)               (None, 5, 60)             31440     
                                                                 
 dropout_1 (Dropout)         (None, 5, 60)             0         
                                                                 
 lstm_2 (LSTM)               (None, 5, 60)             29040     
                                                                 
 dropout_2 (Dropout)         (None, 5, 60)             0         
                                                                 
 lstm_3 (LSTM)               (None, 60)                2

In [18]:
# Compile the model
lstm.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])

In [19]:
# Fit the model
lstm.fit(X_train_reshaped, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2167e5df820>

In [20]:
# Make predictions
y_pred = lstm.predict(X_test_pca)

# Obtain the labels in the required format
y_pred_lstm = np.where(y_pred > y_pred.mean(), 1, 0).ravel()




In [21]:
# Generate the classification report of the predictions against the test set
print(classification_report(y_test, y_pred_lstm))

              precision    recall  f1-score   support

           0       0.39      0.34      0.37       314
           1       0.73      0.77      0.75       730

    accuracy                           0.64      1044
   macro avg       0.56      0.56      0.56      1044
weighted avg       0.63      0.64      0.64      1044



## Backtesting

In [22]:
# Create function to accept the validation input data and model predictions
# Generate a data frome with the function with the corresponding returns based on the model predictions entered
def backtest_df(df, pred):
    initial_capital = 100000
    capital = initial_capital
    position = 0

    portfolio = []


    for i in range(len(df)):
        if pred[i] == 1:

            if position == 0:
                position = capital / df.iloc[i]['Close']
                capital -= position * df.iloc[i]['Close']

        elif pred[i] == 0 and position !=0:
            capital = capital + df.iloc[i]['Close'] * position
            position = 0

        portfolio_value = capital + position * df.iloc[i]['Close']
        portfolio.append(portfolio_value)

    portfolio_df = pd.DataFrame(portfolio , columns = ['Portfolio_Total'], index = df.index)

    portfolio_df['Model_Predictions'] = pred
    
    return portfolio_df[['Model_Predictions', 'Portfolio_Total']]
    

In [23]:
# Fucntion to generate dataframe with the validation input and cumulative returns and its plot based on the model predictions 
def cumualtive_returns(portfolio_df, X_test):
    
    # Merge
    merged_df = pd.concat([X_test, portfolio_df], axis = 1)
    
    # Obtain the Daily returns of the portfolio and clean the data
    merged_df['Portfolio_Returns'] = merged_df['Portfolio_Total'].pct_change()
    merged_df.dropna(inplace = True)
    
    # Calculate cumualtive returns at every hour
    merged_df['Cumulative_Returns'] = (1 + merged_df['Portfolio_Returns']).cumprod() - 1
    
    return merged_df
    

### Gradient Boosting Classifier

In [24]:
# Conduct backtest with the function created
gb_backtest = backtest_df(X_test, y_pred_gb)

# Obtain the cumulateive returns the backtesting algorithm generates
gb_backtest_cumulative = cumualtive_returns(gb_backtest, X_test)
gb_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,0,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,99626.502111,0.000000,-0.003735
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,101436.146017,0.018164,0.014361
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,102557.164366,0.011051,0.025572
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 10:30:00-04:00,131.50,132.52,131.35,131.85,8051590,133.86,136.75,30.434783,1.972857,0,208290.396954,-0.002655,1.082904
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,0,208290.396954,0.000000,1.082904
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,0,208290.396954,0.000000,1.082904
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,0,208290.396954,0.000000,1.082904


In [25]:
# Plot the cumulative returns of the model from the backtesting
gb_backtest_cumulative['Cumulative_Returns'].hvplot()

### Supported Vector Machine

In [26]:
# Conduct backtest with the function created
svm_backtest = backtest_df(X_test, y_pred_svm)

# Obtain the cumulateive returns the backtesting algorithm generates
svm_backtest_cumulative = cumualtive_returns(svm_backtest, X_test)
svm_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,1,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,101023.059435,0.014018,0.010231
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,102858.070802,0.018164,0.028581
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,103994.803508,0.011051,0.039948
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 10:30:00-04:00,131.50,132.52,131.35,131.85,8051590,133.86,136.75,30.434783,1.972857,1,229387.908754,-0.002655,1.293879
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,1,230678.760864,0.005627,1.306788
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,1,231411.406656,0.003176,1.314114
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,1,231149.747445,-0.001131,1.311497


In [27]:
# Plot the cumulative returns of the model from the backtesting
svm_backtest_cumulative['Cumulative_Returns'].hvplot()

### Logistic Regression

In [28]:
# Conduct backtest with the function created
lr_backtest = backtest_df(X_test, y_pred_lr)

# Obtain the cumulateive returns the backtesting algorithm generates
lr_backtest_cumulative = cumualtive_returns(lr_backtest, X_test)
lr_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,1,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,101023.059435,0.014018,0.010231
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,102858.070802,0.018164,0.028581
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,103994.803508,0.011051,0.039948
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 10:30:00-04:00,131.50,132.52,131.35,131.85,8051590,133.86,136.75,30.434783,1.972857,1,188588.149109,-0.002655,0.885881
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,1,189649.405613,0.005627,0.896494
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,1,190251.740386,0.003176,0.902517
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,1,190036.620824,-0.001131,0.900366


In [29]:
# Plot the cumulative returns of the model from the backtesting
lr_backtest_cumulative['Cumulative_Returns'].hvplot()

### LSTM

In [30]:
# Conduct backtest with the function created
lstm_backtest = backtest_df(X_test, y_pred_lstm)

# Obtain the cumulateive returns the backtesting algorithm generates
lstm_backtest_cumulative = cumualtive_returns(lstm_backtest, X_test)
lstm_backtest_cumulative

Unnamed: 0_level_0,Close,High,Low,Open,Volume,EMA20_Close,EMA50_Close,RSI,ATR,Model_Predictions,Portfolio_Total,Portfolio_Returns,Cumulative_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-21 14:30:00-05:00,61.58,61.74,61.30,61.59,4064551,61.75,60.79,45.707376,1.046429,1,100000.000000,0.000000,0.000000
2024-11-21 15:30:00-05:00,61.35,61.72,61.24,61.57,3388214,61.71,60.81,39.641944,1.027143,1,99626.502111,-0.003735,-0.003735
2024-11-22 09:30:00-05:00,62.21,63.04,61.37,61.60,15777196,61.76,60.87,63.665595,0.925000,1,101023.059435,0.014018,0.010231
2024-11-22 10:30:00-05:00,63.34,63.47,62.14,62.21,11239611,61.91,60.97,68.870523,0.956429,1,102858.070802,0.018164,0.028581
2024-11-22 11:30:00-05:00,64.04,64.38,63.00,63.35,12237053,62.11,61.09,68.347339,0.975000,1,103994.803508,0.011051,0.039948
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-02 10:30:00-04:00,131.50,132.52,131.35,131.85,8051590,133.86,136.75,30.434783,1.972857,0,198659.723157,0.000000,0.986597
2025-07-02 11:30:00-04:00,132.24,132.32,130.96,131.49,5249743,133.70,136.57,28.648649,1.957143,0,198659.723157,0.000000,0.986597
2025-07-02 12:30:00-04:00,132.66,133.37,132.11,132.22,6213849,133.60,136.42,32.770745,1.904286,0,198659.723157,0.000000,0.986597
2025-07-02 13:30:00-04:00,132.51,133.30,132.18,132.67,5675254,133.50,136.26,32.002801,1.927143,0,198659.723157,0.000000,0.986597


In [31]:
# Plot the cumulative returns of the model from the backtesting
lstm_backtest_cumulative['Cumulative_Returns'].hvplot()

In [32]:
# function to plot entry and exit
def plot_entry_exit(df, model:str):
    # Obtain exit and entry points (1 to buy, -1 to sell and 0 to hold)
    df['Entry/Exit'] = df['Model_Predictions'].diff()
    df['Entry/Exit'][0] = df['Model_Predictions'][0]
    df['Entry/Exit'] = df['Entry/Exit'].astype(int)
    df[['Model_Predictions', 'Entry/Exit']]
    

    # Plot the points to sell on the graph
    exit = df[df['Entry/Exit'] == -1]['Portfolio_Total'].hvplot.scatter(color = 'red',
                                           marker = 'v',
                                           legend = False,
                                           width = 1000,
                                           height = 500,
                                           size = 200, ylabel = 'Price in $')

    # Plot the points to buy on the graph
    entry = df[df['Entry/Exit'] == 1]['Portfolio_Total'].hvplot.scatter(color = 'green',
                                           marker = '^',
                                           legend = False,
                                           width = 1000,
                                           height = 500,
                                           size = 200, ylabel = 'Price in $')

    


    # plot cumulative returns through the model
    portfolio_price_chart = df['Portfolio_Total'].hvplot(color = 'lightgray')

    entry_exit_chart = portfolio_price_chart * entry * exit 

    entry_exit_chart.opts(title = f'Entry Exit Plot on the Portfolio Cumulative Returns with the {model} model',
                         height = 500,
                         width = 1000)

    return entry_exit_chart

#### Plot the entry and exit points the model generated on their respective cumulative returns

In [33]:
# Make the list of dataframes with the cumulative returns of each model
model_pred = [gb_backtest_cumulative, svm_backtest_cumulative, lr_backtest_cumulative, lstm_backtest_cumulative]

# List of names of the respective models
model_name = ['Gradient Boosting Classifier', 
             'Supported Vector Machine',
             'Logistic Regression Classifier',
             'LSTM']

# Loop throught the lists to pass them as arguments in to the function to generate plots
for i in range(len(model_name)):
    display(plot_entry_exit(model_pred[i], model_name[i]))


## Calculating Metrics - Post trade performance analysis of the strategy and models

In [34]:
# Create a function to calculate sortino ratio
def sortino(df):
    # Convert the returns to numneric and drop 'NaN's for calulation
    daily_returns = pd.to_numeric(df['Portfolio_Returns']).dropna()


    # Calculate average daily return
    average_return = daily_returns.mean()

    # Calculate downside returns (only negative returns)
    downside_returns = daily_returns[daily_returns < 0]

    # Calculate the downside standard deviation
    downside_std = downside_returns.std()

    # Calculate the Sortino Ratio
    sortino_ratio = (average_return / downside_std) * np.sqrt(252)
    
    
    return sortino_ratio

In [35]:
# Create funciton to calculate the metrics in the dataframe format
def evaluation(eval_df, df):
    eval_df.loc['Annualized Returns'] = df['Portfolio_Returns'].mean() * 252
    eval_df.loc['Cumulative Returns'] = df['Cumulative_Returns'][-1]
    eval_df.loc['Annualized Volatility'] = df['Portfolio_Returns'].std() * np.sqrt(252)
    eval_df.loc['Sharpe Ratio'] = (df['Portfolio_Returns'].mean() * 252) / (df['Portfolio_Returns'].std() * np.sqrt(252))
    eval_df.loc['Sortino Ratio'] = sortino(df)
    
    
    return eval_df
    

In [36]:
metics = ['Annualized Returns',
          'Cumulative Returns',
          'Annualized Volatility',
          'Sharpe Ratio',
          'Sortino Ratio']

evaluation_df = pd.DataFrame(columns = ['Backtest'], index = metics)
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,
Cumulative Returns,
Annualized Volatility,
Sharpe Ratio,
Sortino Ratio,


### Gradient Boosting Classifier

In [46]:
# Generate the performance mertics of the Gradient Boosting Classifier model
gb_backtest = evaluation(evaluation_df, gb_backtest_cumulative)
gb_backtest

Unnamed: 0,Backtest
Annualized Returns,0.220992
Cumulative Returns,1.082904
Annualized Volatility,0.297446
Sharpe Ratio,0.742966
Sortino Ratio,0.855865


### Supported Vector Machine

In [45]:
# Generate the performance mertics of the Supported Vector Machine model
svm_backtest = evaluation(evaluation_df, svm_backtest_cumulative)
svm_backtest

Unnamed: 0,Backtest
Annualized Returns,0.243962
Cumulative Returns,1.314637
Annualized Volatility,0.289262
Sharpe Ratio,0.843395
Sortino Ratio,1.043924


### Logistic Regression Classifier

In [44]:
# Generate the performance mertics of the Logistic Regression Classifier model
lr_backtest = evaluation(evaluation_df, lr_backtest_cumulative)
lr_backtest

Unnamed: 0,Backtest
Annualized Returns,0.200745
Cumulative Returns,0.902948
Annualized Volatility,0.302768
Sharpe Ratio,0.663032
Sortino Ratio,0.814278


#### LSTM

In [43]:
# Generate the performance mertics of the LSTM model
lstm_backtest = evaluation(evaluation_df, lstm_backtest_cumulative)
lstm_backtest

Unnamed: 0,Backtest
Annualized Returns,0.20527
Cumulative Returns,0.986597
Annualized Volatility,0.282763
Sharpe Ratio,0.725942
Sortino Ratio,0.790501


In [41]:
# Create a function to obtain the records of the trades with the respective models
def performance(df, model:str):
    # Create the dataframe specifying the features of the trades along with its profit and loss
    performance_data = []
    
    # Initialize entry_date as None
    entry_date = None

    for index, row in df.iterrows():
        if row['Entry/Exit'] == 1:
            entry_date = index
            entry_share_price = row['Close']
            share_size = abs(row['Portfolio_Total'] / row['Close'])
            entry_portfolio_holdings = row['Close'] * (abs(row['Portfolio_Total'] / row['Close']))

        elif row['Entry/Exit'] == -1 and entry_date is not None:
            exit_date = index
            exit_share_price = row['Close']
            share_size = abs(row['Portfolio_Total'] / row['Close'])
            exit_portfolio_holdings = row['Close'] * (abs(row['Portfolio_Total'] / row['Close']))
            profit_loss = exit_portfolio_holdings - entry_portfolio_holdings

            performance_data.append({
                'Stock': 'PLTR',
                'Entry Date': entry_date,
                'Exit Date': exit_date,
                'Entry Price': entry_share_price,
                'Exit Price': exit_share_price,
                'Shares': share_size,
                'Entry Portfolio Holding': entry_portfolio_holdings,
                'Exit Portfolio Holding': exit_portfolio_holdings,
                'Profit/Loss': profit_loss
            })



    performance_data_df = pd.DataFrame(performance_data)
    print(f'\n \nPerformance of {model} : ')
    
    return performance_data_df

In [42]:
for i in range(len(model_name)):
    display(performance(model_pred[i], model_name[i]))


 
Performance of Gradient Boosting Classifier : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2024-11-21 15:30:00-05:00,61.58,61.35,1623.903865,100000.0,99626.502111,-373.497889
1,PLTR,2024-11-22 09:30:00-05:00,2024-12-27 14:30:00-05:00,62.21,79.28,1601.454784,99626.502111,126963.335274,27336.833162
2,PLTR,2024-12-30 09:30:00-05:00,2024-12-30 10:30:00-05:00,76.9,78.16,1651.018664,126963.335274,129043.61879,2080.283517
3,PLTR,2024-12-30 11:30:00-05:00,2024-12-30 12:30:00-05:00,78.62,78.47,1641.358672,129043.61879,128797.41499,-246.203801
4,PLTR,2025-01-02 09:30:00-05:00,2025-01-02 13:30:00-05:00,74.96,74.32,1718.215248,128797.41499,127697.757231,-1099.657759
5,PLTR,2025-01-03 09:30:00-05:00,2025-01-13 12:30:00-05:00,77.85,64.6,1640.305167,127697.757231,105963.713772,-21734.043459
6,PLTR,2025-01-13 15:30:00-05:00,2025-01-30 12:30:00-05:00,64.97,80.43,1630.963734,105963.713772,131178.413093,25214.699321
7,PLTR,2025-01-30 13:30:00-05:00,2025-03-26 11:30:00-04:00,81.04,92.74,1618.68723,131178.413093,150117.05368,18938.640587
8,PLTR,2025-03-26 14:30:00-04:00,2025-03-26 15:30:00-04:00,91.21,92.25,1645.839861,150117.05368,151828.727135,1711.673455
9,PLTR,2025-03-27 09:30:00-04:00,2025-03-27 12:30:00-04:00,92.36,91.89,1643.879679,151828.727135,151056.103686,-772.623449



 
Performance of Supported Vector Machine : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2025-01-02 12:30:00-05:00,61.58,73.82,1623.903865,100000.0,119876.583306,19876.583306
1,PLTR,2025-01-02 13:30:00-05:00,2025-01-07 10:30:00-05:00,74.32,72.32,1612.978785,119876.583306,116650.625736,-3225.95757
2,PLTR,2025-01-07 11:30:00-05:00,2025-01-07 12:30:00-05:00,70.93,71.23,1644.587984,116650.625736,117144.002131,493.376395
3,PLTR,2025-01-14 09:30:00-05:00,2025-01-14 11:30:00-05:00,67.22,66.07,1742.695658,117144.002131,115139.902125,-2004.100007
4,PLTR,2025-01-14 13:30:00-05:00,2025-01-14 15:30:00-05:00,66.53,65.91,1730.646357,115139.902125,114066.901383,-1073.000741
5,PLTR,2025-01-15 09:30:00-05:00,2025-02-20 09:30:00-05:00,68.45,100.69,1666.426609,114066.901383,167792.495256,53725.593873
6,PLTR,2025-02-20 14:30:00-05:00,2025-02-21 15:30:00-05:00,107.08,101.33,1566.982586,167792.495256,158782.34539,-9010.149867
7,PLTR,2025-03-03 09:30:00-05:00,2025-03-03 15:30:00-05:00,88.88,83.51,1786.480034,158782.34539,149188.947609,-9593.397781
8,PLTR,2025-03-04 12:30:00-05:00,2025-03-06 15:30:00-05:00,83.14,80.4,1794.43045,149188.947609,144272.208176,-4916.739433
9,PLTR,2025-03-07 09:30:00-05:00,2025-03-10 10:30:00-04:00,81.99,79.46,1759.631762,144272.208176,139820.339818,-4451.868358



 
Performance of Logistic Regression Classifier : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2025-01-07 10:30:00-05:00,61.58,72.32,1623.903865,100000.0,117440.727509,17440.727509
1,PLTR,2025-01-07 11:30:00-05:00,2025-01-08 09:30:00-05:00,70.93,67.82,1655.727161,117440.727509,112291.416039,-5149.31147
2,PLTR,2025-01-08 13:30:00-05:00,2025-01-10 09:30:00-05:00,68.96,66.17,1628.355801,112291.416039,107748.303354,-4543.112685
3,PLTR,2025-01-10 13:30:00-05:00,2025-01-13 09:30:00-05:00,67.43,64.08,1597.928272,107748.303354,102395.243644,-5353.05971
4,PLTR,2025-01-14 09:30:00-05:00,2025-02-20 09:30:00-05:00,67.22,100.69,1523.285386,102395.243644,153379.605513,50984.361868
5,PLTR,2025-02-20 12:30:00-05:00,2025-02-24 09:30:00-05:00,102.89,89.68,1490.714409,153379.605513,133687.268173,-19692.337339
6,PLTR,2025-02-27 09:30:00-05:00,2025-02-27 14:30:00-05:00,89.7,84.24,1490.382031,133687.268173,125549.782285,-8137.485889
7,PLTR,2025-02-28 15:30:00-05:00,2025-03-04 09:30:00-05:00,84.92,80.77,1478.447742,125549.782285,119414.224154,-6135.558131
8,PLTR,2025-03-04 11:30:00-05:00,2025-03-10 13:30:00-04:00,82.98,75.41,1439.072357,119414.224154,108520.446414,-10893.77774
9,PLTR,2025-03-10 15:30:00-04:00,2025-03-31 09:30:00-04:00,76.38,81.67,1420.796628,108520.446414,116036.460574,7516.01416



 
Performance of LSTM : 


Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2024-11-21 14:30:00-05:00,2024-12-27 13:30:00-05:00,61.58,78.79,1623.903865,100000.0,127947.385515,27947.385515
1,PLTR,2024-12-31 09:30:00-05:00,2024-12-31 10:30:00-05:00,77.36,76.81,1653.921736,127947.385515,127037.72856,-909.656955
2,PLTR,2025-01-02 09:30:00-05:00,2025-01-02 10:30:00-05:00,74.96,74.96,1694.740242,127037.72856,127037.72856,0.0
3,PLTR,2025-01-03 09:30:00-05:00,2025-01-07 10:30:00-05:00,77.85,72.32,1631.826956,127037.72856,118013.725491,-9024.003069
4,PLTR,2025-01-08 09:30:00-05:00,2025-01-08 10:30:00-05:00,67.82,67.27,1740.102116,118013.725491,117056.669327,-957.056164
5,PLTR,2025-01-10 09:30:00-05:00,2025-01-10 10:30:00-05:00,66.17,65.32,1769.029308,117056.669327,115552.994415,-1503.674912
6,PLTR,2025-01-10 12:30:00-05:00,2025-01-13 12:30:00-05:00,67.43,64.6,1713.673356,115552.994415,110703.298817,-4849.695598
7,PLTR,2025-01-13 15:30:00-05:00,2025-01-27 12:30:00-05:00,64.97,72.94,1703.914096,110703.298817,124283.494162,13580.195345
8,PLTR,2025-01-27 13:30:00-05:00,2025-02-11 10:30:00-05:00,74.37,113.62,1671.150923,124283.494162,189876.167899,65592.673737
9,PLTR,2025-02-11 14:30:00-05:00,2025-02-12 15:30:00-05:00,113.04,117.42,1679.725477,189876.167899,197233.365488,7357.197588


In [50]:
# Consolidate the performance metrics of differnt models
consolidated_metrics = pd.concat([gb_backtest, svm_backtest, lr_backtest, lstm_backtest], axis = 1)

# Rename the columns for identification of the repective models to its numbers
columns = ['Grdaient Boosting', 'Supported Vector Machine', 'Logistic Regression', 'LSTM']
consolidated_metrics. columns = columns

# Display the data
consolidated_metrics

Unnamed: 0,Grdaient Boosting,Supported Vector Machine,Logistic Regression,LSTM
Annualized Returns,0.220992,0.220992,0.220992,0.220992
Cumulative Returns,1.082904,1.082904,1.082904,1.082904
Annualized Volatility,0.297446,0.297446,0.297446,0.297446
Sharpe Ratio,0.742966,0.742966,0.742966,0.742966
Sortino Ratio,0.855865,0.855865,0.855865,0.855865
