In [1]:
# import libraries
import pandas as pd
import yfinance as yf
import hvplot.pandas
import numpy as np
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

In [2]:
# download the historical prices of pltr
pltr_df = yf.download(tickers = 'pltr', period='2Y', interval = '1h')
pltr_df

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,PLTR,PLTR,PLTR,PLTR,PLTR
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2023-06-12 13:30:00+00:00,15.820000,16.030001,15.470800,15.500000,39342018
2023-06-12 14:30:00+00:00,15.375000,15.900000,15.330000,15.820000,15215703
2023-06-12 15:30:00+00:00,15.440000,15.580000,15.340000,15.380000,7366453
2023-06-12 16:30:00+00:00,15.520100,15.600000,15.440000,15.445000,4768534
2023-06-12 17:30:00+00:00,15.572300,15.680000,15.520000,15.530000,5788694
...,...,...,...,...,...
2025-06-11 15:30:00+00:00,137.340500,137.589996,136.059998,137.414703,10186653
2025-06-11 16:30:00+00:00,137.809998,139.750000,137.110001,137.345001,8773641
2025-06-11 17:30:00+00:00,136.264999,138.580002,135.603607,137.820007,9207376
2025-06-11 18:30:00+00:00,136.804993,136.990005,135.779999,136.220001,7514670


In [3]:
# Remove multilevels of the column headers
pltr_df = pltr_df.droplevel(level = 1, axis = 1)

# Remove the name 'Price' from the headers
pltr_df.columns.name = None

pltr_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-06-12 13:30:00+00:00,15.820000,16.030001,15.470800,15.500000,39342018
2023-06-12 14:30:00+00:00,15.375000,15.900000,15.330000,15.820000,15215703
2023-06-12 15:30:00+00:00,15.440000,15.580000,15.340000,15.380000,7366453
2023-06-12 16:30:00+00:00,15.520100,15.600000,15.440000,15.445000,4768534
2023-06-12 17:30:00+00:00,15.572300,15.680000,15.520000,15.530000,5788694
...,...,...,...,...,...
2025-06-11 15:30:00+00:00,137.340500,137.589996,136.059998,137.414703,10186653
2025-06-11 16:30:00+00:00,137.809998,139.750000,137.110001,137.345001,8773641
2025-06-11 17:30:00+00:00,136.264999,138.580002,135.603607,137.820007,9207376
2025-06-11 18:30:00+00:00,136.804993,136.990005,135.779999,136.220001,7514670


In [4]:
# Round the values of the dataframe to 2 decimal points
pltr_df = round(pltr_df[['Close', 'High', 'Low', 'Open', 'Volume']], 2)

# Display the data
pltr_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-06-12 13:30:00+00:00,15.82,16.03,15.47,15.50,39342018
2023-06-12 14:30:00+00:00,15.38,15.90,15.33,15.82,15215703
2023-06-12 15:30:00+00:00,15.44,15.58,15.34,15.38,7366453
2023-06-12 16:30:00+00:00,15.52,15.60,15.44,15.44,4768534
2023-06-12 17:30:00+00:00,15.57,15.68,15.52,15.53,5788694
...,...,...,...,...,...
2025-06-11 15:30:00+00:00,137.34,137.59,136.06,137.41,10186653
2025-06-11 16:30:00+00:00,137.81,139.75,137.11,137.35,8773641
2025-06-11 17:30:00+00:00,136.26,138.58,135.60,137.82,9207376
2025-06-11 18:30:00+00:00,136.80,136.99,135.78,136.22,7514670


In [5]:
# Copy pltr_df to a new dataframe for further analysis
signals_df = pltr_df[['Close']]

# visulise the data
signals_df.hvplot()

In [6]:
# Set the long and short windows
short_window = 20
long_window = 50

# Obtain the Exponential Moving Average of the Close prices with short and long windows
signals_df['EMA20_Close'] =  round(signals_df['Close'].ewm(span = short_window).mean(), 2)
signals_df['EMA50_Close'] =  round(signals_df['Close'].ewm(span = long_window).mean(), 2)


# view data
signals_df.head()

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-06-12 13:30:00+00:00,15.82,15.82,15.82
2023-06-12 14:30:00+00:00,15.38,15.59,15.6
2023-06-12 15:30:00+00:00,15.44,15.53,15.54
2023-06-12 16:30:00+00:00,15.52,15.53,15.54
2023-06-12 17:30:00+00:00,15.57,15.54,15.54


In [7]:
# Obtain the points of buy and sell using the 20 and 50 day exponential moving averages
buy_points = (signals_df['EMA20_Close'] > signals_df['EMA50_Close']) & (signals_df['EMA20_Close'].shift(1) <= signals_df['EMA50_Close'].shift(1))
sell_points = (signals_df['EMA20_Close'] < signals_df['EMA50_Close']) & (signals_df['EMA20_Close'].shift(1) >= signals_df['EMA50_Close'].shift(1))

# Combine the buy and sell points to obtain the all the crossover points of the EMAs
crossover_points = buy_points | sell_points

# Obtain the first crossover point
first_buy_point = crossover_points.idxmax()

# Mark the crossover points of the EMAs with 1s 
signals_df['Signal'] = np.where((signals_df['EMA20_Close'] > signals_df['EMA50_Close']) & 
                                         (signals_df.index > first_buy_point), 1, 0)

# View data
signals_df.head()

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-06-12 13:30:00+00:00,15.82,15.82,15.82,0
2023-06-12 14:30:00+00:00,15.38,15.59,15.6,0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0
2023-06-12 16:30:00+00:00,15.52,15.53,15.54,0
2023-06-12 17:30:00+00:00,15.57,15.54,15.54,0


In [8]:
# Label the exit and entry points with Buy as 1, Sell as -1 and Hold as 0
signals_df['Entry/Exit'] = signals_df['Signal'].diff()
signals_df.dropna(inplace = True)

signals_df.head()

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.6,0,0.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0
2023-06-12 16:30:00+00:00,15.52,15.53,15.54,0,0.0
2023-06-12 17:30:00+00:00,15.57,15.54,15.54,0,0.0
2023-06-12 18:30:00+00:00,15.57,15.55,15.55,0,0.0


In [9]:
# Create a function to obtain the dataframe with the dates around the trades alone
def subset_crossover(df, crossovers):
    crossindex = np.where(crossovers)[0]
    row_ranges = []
    for index in crossindex:
        start = max(index-1, 0)
        end = min(index+1, len(df))
        row_ranges.extend(range(start, end))
    
    # Add the last row of the main dataframe
    row_ranges.append(len(df) - 1)
    
    unique_rows = sorted(set(row_ranges))
    
    return df.iloc[unique_rows]


In [10]:
# Call the function to create the dataframe with only the dates around the trades
crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.60,0,0.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0
2023-06-12 19:30:00+00:00,15.65,15.57,15.56,1,1.0
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0
...,...,...,...,...,...
2025-06-05 19:30:00+00:00,119.91,127.44,127.66,0,-1.0
2025-06-06 13:30:00+00:00,124.64,127.17,127.54,0,0.0
2025-06-09 16:30:00+00:00,131.04,127.68,127.59,1,1.0
2025-06-09 17:30:00+00:00,131.54,128.05,127.74,1,0.0


In [11]:
# Visualise the data with buy and sell points marked on the chart with the close prices
close_prices = signals_df['Close'].hvplot(color = 'lightgray')

ema20  = signals_df['EMA20_Close'].hvplot(color = 'green')

ema50  = signals_df['EMA50_Close'].hvplot(color = 'yellow')

entry = signals_df[signals_df['Entry/Exit'] == 1]['Close'].hvplot.scatter(color = 'blue',
                                                                 marker = '^',
                                                                 legend = False,
                                                                 size = 200
                                                                )

exit = signals_df[signals_df['Entry/Exit'] == -1]['Close'].hvplot.scatter(color = 'red',
                                                                 marker = 'v',
                                                                 legend = False,
                                                                 size = 200
                                                                )


plot = close_prices * ema20 * ema50 * entry * exit

plot.opts(height = 500,
         width = 1000,
         title = 'Entry Exit plot based on EMA',
         ylabel = 'Price in $')

In [12]:
# Set up values for initial capital and size of position that would be taken in the trades to simulate the trading
initial_capital = 100000
share_size = 150

In [13]:
# Create a column that shows the size of the postion when executing the trade
signals_df['Share_Size'] = abs(signals_df['Entry/Exit'] * share_size)

# View data
crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df


Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.60,0,0.0,0.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0
2023-06-12 19:30:00+00:00,15.65,15.57,15.56,1,1.0,150.0
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0
...,...,...,...,...,...,...
2025-06-05 19:30:00+00:00,119.91,127.44,127.66,0,-1.0,150.0
2025-06-06 13:30:00+00:00,124.64,127.17,127.54,0,0.0,0.0
2025-06-09 16:30:00+00:00,131.04,127.68,127.59,1,1.0,150.0
2025-06-09 17:30:00+00:00,131.54,128.05,127.74,1,0.0,0.0


In [14]:
#### Define the position taken in each trade
signals_df['Position'] = signals_df['Entry/Exit'] * signals_df['Share_Size']

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df.head(20)

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Position
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.6,0,0.0,0.0,0.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0,0.0
2023-06-12 19:30:00+00:00,15.65,15.57,15.56,1,1.0,150.0,150.0
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0,-150.0
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0,0.0
2023-06-13 16:30:00+00:00,15.99,15.65,15.63,1,1.0,150.0,150.0
2023-06-13 17:30:00+00:00,15.71,15.66,15.64,1,0.0,0.0,0.0
2023-06-20 19:30:00+00:00,15.8,16.02,16.03,0,-1.0,150.0,-150.0
2023-06-21 13:30:00+00:00,14.9,15.92,15.97,0,0.0,0.0,0.0
2023-06-29 13:30:00+00:00,15.27,14.82,14.79,1,1.0,150.0,150.0


In [15]:
# Calculate the portfolio holdings in each trade
signals_df['Portfolio_Holdings'] = signals_df['Position'] * signals_df['Close']

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df.head(20)

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Position,Portfolio_Holdings
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.6,0,0.0,0.0,0.0,0.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0,0.0,0.0
2023-06-12 19:30:00+00:00,15.65,15.57,15.56,1,1.0,150.0,150.0,2347.5
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0,-150.0,-2314.5
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0,0.0,0.0
2023-06-13 16:30:00+00:00,15.99,15.65,15.63,1,1.0,150.0,150.0,2398.5
2023-06-13 17:30:00+00:00,15.71,15.66,15.64,1,0.0,0.0,0.0,0.0
2023-06-20 19:30:00+00:00,15.8,16.02,16.03,0,-1.0,150.0,-150.0,-2370.0
2023-06-21 13:30:00+00:00,14.9,15.92,15.97,0,0.0,0.0,0.0,0.0
2023-06-29 13:30:00+00:00,15.27,14.82,14.79,1,1.0,150.0,150.0,2290.5


In [16]:
# Obtain the cash reserve of the pportfolio
signals_df['Portfolio_Cash'] = initial_capital - (signals_df['Close'] * signals_df['Position']).cumsum()

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Position,Portfolio_Holdings,Portfolio_Cash
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.60,0,0.0,0.0,0.0,0.0,100000.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0
2023-06-12 19:30:00+00:00,15.65,15.57,15.56,1,1.0,150.0,150.0,2347.5,97652.5
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0,-150.0,-2314.5,99967.0
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0,0.0,0.0,99967.0
...,...,...,...,...,...,...,...,...,...
2025-06-05 19:30:00+00:00,119.91,127.44,127.66,0,-1.0,150.0,-150.0,-17986.5,111193.0
2025-06-06 13:30:00+00:00,124.64,127.17,127.54,0,0.0,0.0,0.0,0.0,111193.0
2025-06-09 16:30:00+00:00,131.04,127.68,127.59,1,1.0,150.0,150.0,19656.0,91537.0
2025-06-09 17:30:00+00:00,131.54,128.05,127.74,1,0.0,0.0,0.0,0.0,91537.0


In [17]:
# Calculate the total value of the portfolio at each time
# signals_df['Portfolio_Total'] = initial_capital
# signals_df['Portfolio_Total'] = np.where(signals_df['Signal'] == 1, (signals_df['Portfolio_Cash'] + (signals_df['Close'] * share_size)), signals_df['Portfolio_Total'].shift(1))

value_list = [initial_capital]

for i in range(1, len(signals_df)):
    if signals_df.iloc[i]['Signal'] == 1 and signals_df.iloc[i]['Portfolio_Holdings'] != 0:
        value =  signals_df.iloc[i]['Portfolio_Holdings'] + signals_df.iloc[i]['Portfolio_Cash']
    
    elif signals_df.iloc[i]['Signal'] == 1 and signals_df.iloc[i]['Portfolio_Holdings'] == 0:
        value = signals_df.iloc[i]['Portfolio_Cash'] + (share_size * signals_df.iloc[i]['Close'])
        
    else:
        value = value_list[-1]
        
    value_list.append(value)
    

signals_df['Portfolio_Total'] = value_list
signals_df.head(50)


crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Position,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.60,0,0.0,0.0,0.0,0.0,100000.0,100000.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0
2023-06-12 19:30:00+00:00,15.65,15.57,15.56,1,1.0,150.0,150.0,2347.5,97652.5,100000.0
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0,-150.0,-2314.5,99967.0,100000.0
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0,0.0,0.0,99967.0,100000.0
...,...,...,...,...,...,...,...,...,...,...
2025-06-05 19:30:00+00:00,119.91,127.44,127.66,0,-1.0,150.0,-150.0,-17986.5,111193.0,111263.5
2025-06-06 13:30:00+00:00,124.64,127.17,127.54,0,0.0,0.0,0.0,0.0,111193.0,111263.5
2025-06-09 16:30:00+00:00,131.04,127.68,127.59,1,1.0,150.0,150.0,19656.0,91537.0,111193.0
2025-06-09 17:30:00+00:00,131.54,128.05,127.74,1,0.0,0.0,0.0,0.0,91537.0,111268.0


In [44]:
signals_df['Signal'].value_counts()

Signal
1    2114
0    1378
Name: count, dtype: int64

In [18]:
signals_df.head(10)

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Position,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-12 14:30:00+00:00,15.38,15.59,15.6,0,0.0,0.0,0.0,0.0,100000.0,100000.0
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0
2023-06-12 16:30:00+00:00,15.52,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0
2023-06-12 17:30:00+00:00,15.57,15.54,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0
2023-06-12 18:30:00+00:00,15.57,15.55,15.55,0,0.0,0.0,0.0,0.0,100000.0,100000.0
2023-06-12 19:30:00+00:00,15.65,15.57,15.56,1,1.0,150.0,150.0,2347.5,97652.5,100000.0
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0,-150.0,-2314.5,99967.0,100000.0
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0,0.0,0.0,99967.0,100000.0
2023-06-13 15:30:00+00:00,15.74,15.59,15.59,0,0.0,0.0,0.0,0.0,99967.0,100000.0
2023-06-13 16:30:00+00:00,15.99,15.65,15.63,1,1.0,150.0,150.0,2398.5,97568.5,99967.0


In [19]:
# Obtain the Daily returns of the portfolio and clean the data
signals_df['Portfolio_Daily_Returns'] = signals_df['Portfolio_Total'].pct_change()
signals_df.dropna(inplace = True)

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Position,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total,Portfolio_Daily_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000
2023-06-12 16:30:00+00:00,15.52,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0,-150.0,-2314.5,99967.0,100000.0,0.000000
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0,0.0,0.0,99967.0,100000.0,0.000000
2023-06-13 15:30:00+00:00,15.74,15.59,15.59,0,0.0,0.0,0.0,0.0,99967.0,100000.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
2025-06-06 13:30:00+00:00,124.64,127.17,127.54,0,0.0,0.0,0.0,0.0,111193.0,111263.5,0.000000
2025-06-06 14:30:00+00:00,124.76,126.94,127.43,0,0.0,0.0,0.0,0.0,111193.0,111263.5,0.000000
2025-06-09 17:30:00+00:00,131.54,128.05,127.74,1,0.0,0.0,0.0,0.0,91537.0,111268.0,0.000675
2025-06-09 18:30:00+00:00,131.92,128.42,127.91,1,0.0,0.0,0.0,0.0,91537.0,111325.0,0.000512


In [20]:
signals_df['Cumulative_Daily_Returns'] = (1 + signals_df['Portfolio_Daily_Returns']).cumprod() - 1

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Position,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total,Portfolio_Daily_Returns,Cumulative_Daily_Returns
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-06-12 15:30:00+00:00,15.44,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.000000
2023-06-12 16:30:00+00:00,15.52,15.53,15.54,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.000000
2023-06-13 13:30:00+00:00,15.43,15.54,15.55,0,-1.0,150.0,-150.0,-2314.5,99967.0,100000.0,0.000000,0.000000
2023-06-13 14:30:00+00:00,15.71,15.57,15.57,0,0.0,0.0,0.0,0.0,99967.0,100000.0,0.000000,0.000000
2023-06-13 15:30:00+00:00,15.74,15.59,15.59,0,0.0,0.0,0.0,0.0,99967.0,100000.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-06 13:30:00+00:00,124.64,127.17,127.54,0,0.0,0.0,0.0,0.0,111193.0,111263.5,0.000000,0.112635
2025-06-06 14:30:00+00:00,124.76,126.94,127.43,0,0.0,0.0,0.0,0.0,111193.0,111263.5,0.000000,0.112635
2025-06-09 17:30:00+00:00,131.54,128.05,127.74,1,0.0,0.0,0.0,0.0,91537.0,111268.0,0.000675,0.112680
2025-06-09 18:30:00+00:00,131.92,128.42,127.91,1,0.0,0.0,0.0,0.0,91537.0,111325.0,0.000512,0.113250


In [21]:
signals_df['Cumulative_Daily_Returns'].hvplot()

In [22]:
# Plot the buys and sells on the graph
exit = signals_df[signals_df['Entry/Exit'] == -1]['Portfolio_Total'].hvplot.scatter(color = 'red',
                                       marker = 'v',
                                       legend = False,
                                       width = 1000,
                                       height = 500,
                                       size = 200, ylabel = 'Price in $')
exit

entry = signals_df[signals_df['Entry/Exit'] == 1]['Portfolio_Total'].hvplot.scatter(color = 'green',
                                       marker = '^',
                                       legend = False,
                                       width = 1000,
                                       height = 500,
                                       size = 200, ylabel = 'Price in $')

entry



portfolio_price_chart = signals_df['Portfolio_Total'].hvplot(color = 'lightgray')

entry_exit_chart = portfolio_price_chart * entry * exit 

entry_exit_chart.opts(title = 'Entry/Exit Plot on the Portfolio Cumulative Returns',
                     height = 500,
                     width = 1000)

## Calculating Metrics

In [23]:
metics = ['Annualized Returns',
          'Cumulative Returns',
          'Annualized Volatility',
          'Sharpe Ratio',
          'Sortino Ratio']

evaluation_df = pd.DataFrame(columns = ['Backtest'], index = metics)
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,
Cumulative Returns,
Annualized Volatility,
Sharpe Ratio,
Sortino Ratio,


In [24]:
# Add the first four respective data to the data frame
evaluation_df.loc['Annualized Returns'] = signals_df['Portfolio_Daily_Returns'].mean() * 252
evaluation_df.loc['Cumulative Returns'] = signals_df['Cumulative_Daily_Returns'][-1]
evaluation_df.loc['Annualized Volatility'] = signals_df['Portfolio_Daily_Returns'].std() * np.sqrt(252)
evaluation_df.loc['Sharpe Ratio'] = (signals_df['Portfolio_Daily_Returns'].mean() * 252) / (signals_df['Portfolio_Daily_Returns'].std() * np.sqrt(252))

# Display the data
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,0.008345
Cumulative Returns,0.119955
Annualized Volatility,0.018405
Sharpe Ratio,0.453383
Sortino Ratio,


In [25]:
# Convert the returns to numneric and drop 'NaN's for calulation
daily_returns = pd.to_numeric(signals_df['Portfolio_Daily_Returns']).dropna()


# Calculate average daily return
average_return = daily_returns.mean()

# Calculate downside returns (only negative returns)
downside_returns = daily_returns[daily_returns < 0]

# Calculate the downside standard deviation
downside_std = downside_returns.std()

# Calculate the Sortino Ratio
sortino_ratio = (average_return / downside_std) * np.sqrt(252)
sortino_ratio

0.38462303024066663

In [26]:
# Obtain the sortino ratio and add it to the evaluation dataframe
evaluation_df.loc['Sortino Ratio'] = sortino_ratio
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,0.008345
Cumulative Returns,0.119955
Annualized Volatility,0.018405
Sharpe Ratio,0.453383
Sortino Ratio,0.384623


## Performance analysis of the strategy

In [27]:
# Create the dataframe specifying the features of the trades along with its profit and loss
performance_data = []

    
for index, row in signals_df.iterrows():
    if row['Entry/Exit'] == 1:
        entry_date = index
        entry_share_price = row['Close']
        share_size = abs(row['Position'])
        entry_portfolio_holdings = row['Portfolio_Holdings']

    elif row['Entry/Exit'] == -1 and entry_date is not None:
        exit_date = index
        exit_share_price = row['Close']
        share_size = abs(row['Position'])
        exit_portfolio_holdings = abs(row['Portfolio_Holdings'])
        profit_loss = exit_portfolio_holdings - entry_portfolio_holdings

        performance_data.append({
            'Stock': 'pltr',
            'Entry Date': entry_date,
            'Exit Date': exit_date,
            'Entry Price': entry_share_price,
            'Exit Price': exit_share_price,
            'Shares': share_size,
            'Entry Portfolio Holding': entry_portfolio_holdings,
            'Exit Portfolio Holding': exit_portfolio_holdings,
            'Profit/Loss': profit_loss
        })

        

performance_data_df = pd.DataFrame(performance_data)

performance_data_df

Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,pltr,2023-06-12 19:30:00+00:00,2023-06-13 13:30:00+00:00,15.65,15.43,150.0,2347.5,2314.5,-33.0
1,pltr,2023-06-13 16:30:00+00:00,2023-06-20 19:30:00+00:00,15.99,15.8,150.0,2398.5,2370.0,-28.5
2,pltr,2023-06-29 13:30:00+00:00,2023-07-21 19:30:00+00:00,15.27,16.43,150.0,2290.5,2464.5,174.0
3,pltr,2023-07-28 17:30:00+00:00,2023-08-07 16:30:00+00:00,17.69,17.66,150.0,2653.5,2649.0,-4.5
4,pltr,2023-08-29 16:30:00+00:00,2023-09-07 14:30:00+00:00,15.43,14.84,150.0,2314.5,2226.0,-88.5
5,pltr,2023-09-08 14:30:00+00:00,2023-09-18 13:30:00+00:00,15.33,15.36,150.0,2299.5,2304.0,4.5
6,pltr,2023-09-28 14:30:00+00:00,2023-10-20 13:30:00+00:00,15.55,16.29,150.0,2332.5,2443.5,111.0
7,pltr,2023-11-02 15:30:00+00:00,2023-11-24 16:30:00+00:00,17.58,19.27,150.0,2637.0,2890.5,253.5
8,pltr,2023-11-30 14:30:00+00:00,2023-12-04 15:30:00+00:00,20.08,18.56,150.0,3012.0,2784.0,-228.0
9,pltr,2023-12-15 15:30:00+00:00,2023-12-18 20:30:00+00:00,18.65,17.84,150.0,2797.5,2676.0,-121.5


## Split the data ```signals_df``` into training and testing datasets for the machine learning model

In [28]:
# Create input data for the model
X = signals_df.drop(columns = ['Entry/Exit'])


# Create the output data y for the model
y = signals_df['Entry/Exit']

In [29]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2023-06-12 15:30:00+00:00


In [30]:
# Select the end of the training period
training_end = X.index.min() + DateOffset(months = 18)

# Display the training end date
training_end

Timestamp('2024-12-12 15:30:00+0000', tz='UTC')

In [31]:
# Generate training datasets X_train and y_train dataframe
X_train = X[training_begin : training_end]
y_train = y[training_begin : training_end]

In [32]:
# Generate testing datasets X_train and y_train dataframe
X_test = X[training_end : ]
y_test = y[training_end : ]

In [33]:
# Create an object for the standard scaler
scaler = StandardScaler()

# Apply the scaler model to fit the X_train data
X_scaler = scaler.fit(X_train)

# Transform the input data with the scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [34]:
# Create an object for the Graient Boosing Classifier model
gb_model = GradientBoostingClassifier(n_estimators = 300, learning_rate = 0.1, max_depth = 5, min_samples_split = 10, subsample = 0.9, max_features='log2', random_state = 42)

# Fit the training data in to the model
gb_model.fit(X_train_scaled, y_train)

In [35]:
from sklearn.ensemble import HistGradientBoostingClassifier

gb_model = HistGradientBoostingClassifier(
    max_iter=500,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42
)
gb_model.fit(X_train_scaled, y_train)

In [36]:
# Make prdictions with the model on training and testing data
gb_model_train_perdiction = gb_model.predict(X_train_scaled)
gb_model_test_perdiction = gb_model.predict(X_test_scaled)

In [37]:
# Generate the classification report
gb_model_train_report = classification_report(gb_model_train_perdiction, y_train)
gb_model_test_report = classification_report(gb_model_test_perdiction, y_test)

# Display the classification reports
print(f"Classification report of gb_model_test_perdiction and y_test\n {'-' * 58}\n")
print(gb_model_test_report)

Classification report of gb_model_test_perdiction and y_test
 ----------------------------------------------------------

              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00         7
         0.0       1.00      1.00      1.00       842
         1.0       1.00      1.00      1.00         7

    accuracy                           1.00       856
   macro avg       1.00      1.00      1.00       856
weighted avg       1.00      1.00      1.00       856



In [38]:
y_train.value_counts()

Entry/Exit
 0.0    2592
 1.0      23
-1.0      22
Name: count, dtype: int64

In [39]:
# Imnport SMOTTEEN for resampling
from imblearn.over_sampling import SMOTE

# Create an object for resmapling
resampler = SMOTE(random_state = 1, sampling_strategy = 'not majority')

# Fit the training dataset into the resampler
X_train_resampled, y_train_resampled = resampler.fit_resample(X_train_scaled, y_train)

In [40]:
y_train_resampled.value_counts()

Entry/Exit
 0.0    2592
 1.0    2592
-1.0    2592
Name: count, dtype: int64

In [41]:
gb_model.fit(X_train_resampled, y_train_resampled)

In [42]:
prediction_resampled = gb_model.predict(X_test_scaled)

In [43]:
print(classification_report(y_test, prediction_resampled))

              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00         7
         0.0       1.00      1.00      1.00       842
         1.0       1.00      1.00      1.00         7

    accuracy                           1.00       856
   macro avg       1.00      1.00      1.00       856
weighted avg       1.00      1.00      1.00       856



In [None]:

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE
from collections import Counter
import matplotlib.pyplot as plt

# Split data (replace X and y with actual variables if different)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Oversample using SMOTE only on training data
resampler = SMOTE(random_state=42, sampling_strategy='not majority')
X_train_resampled, y_train_resampled = resampler.fit_resample(X_train_scaled, y_train)

# Print class distribution
print("Original class distribution:", Counter(y_train))
print("Resampled class distribution:", Counter(y_train_resampled))

# Gradient Boosting Classifier with regularization
gb_clf = GradientBoostingClassifier(
    n_estimators=150,
    learning_rate=0.05,
    max_depth=3,
    subsample=0.8,
    min_samples_split=5,
    random_state=42
)
gb_clf.fit(X_train_resampled, y_train_resampled)

# Evaluate
y_pred = gb_clf.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Blues')
plt.show()

# Cross-validation performance
cv = StratifiedKFold(n_splits=5)
cv_scores = cross_val_score(gb_clf, X_train_resampled, y_train_resampled, cv=cv, scoring='f1_macro')
print(f"Cross-validated F1 Macro Score (mean): {cv_scores.mean():.4f}")
