In [116]:
# import libraries
import pandas as pd
import yfinance as yf
import hvplot.pandas
import numpy as np
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

In [2]:
# download the data
pltr_df = yf.download(tickers = 'PLTR', period = '5Y')
pltr_df

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,PLTR,PLTR,PLTR,PLTR,PLTR
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-09-30,9.500000,11.410000,9.110000,10.000000,338584400
2020-10-01,9.460000,10.100000,9.230000,9.690000,124297600
2020-10-02,9.200000,9.280000,8.940000,9.060000,55018300
2020-10-05,9.030000,9.490000,8.920000,9.430000,36316900
2020-10-06,9.900000,10.180000,8.900000,9.040000,90864000
...,...,...,...,...,...
2025-06-02,132.039993,134.479996,128.860001,131.434998,93218700
2025-06-03,133.169998,135.279999,130.100006,133.070007,91297700
2025-06-04,130.009995,132.949997,125.580002,132.800003,97306300
2025-06-05,119.910004,132.850006,118.930000,129.270004,131055900


In [3]:
# drop the level with the ticker in the columns of the data frame
pltr_df = pltr_df.droplevel(level = 1, axis = 1)

# Remove the name 'Price' from the dataframe's columns
pltr_df.columns.name = None

# view data
pltr_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-30,9.500000,11.410000,9.110000,10.000000,338584400
2020-10-01,9.460000,10.100000,9.230000,9.690000,124297600
2020-10-02,9.200000,9.280000,8.940000,9.060000,55018300
2020-10-05,9.030000,9.490000,8.920000,9.430000,36316900
2020-10-06,9.900000,10.180000,8.900000,9.040000,90864000
...,...,...,...,...,...
2025-06-02,132.039993,134.479996,128.860001,131.434998,93218700
2025-06-03,133.169998,135.279999,130.100006,133.070007,91297700
2025-06-04,130.009995,132.949997,125.580002,132.800003,97306300
2025-06-05,119.910004,132.850006,118.930000,129.270004,131055900


In [4]:
# Copy pltr_df to a new dataframe for further analysis
signals_df = pltr_df[['Close']]

# visulise the data
signals_df.hvplot()

In [5]:
# Set the long and short windows
short_window = 20
long_window = 50

# Obtain the Exponential Moving Average of the Close prices with short and long windows
signals_df['EMA20_Close'] =  round(signals_df['Close'].ewm(span = short_window).mean(), 2)
signals_df['EMA50_Close'] =  round(signals_df['Close'].ewm(span = long_window).mean(), 2)


# view data
signals_df.head()

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-09-30,9.5,9.5,9.5
2020-10-01,9.46,9.48,9.48
2020-10-02,9.2,9.38,9.38
2020-10-05,9.03,9.28,9.29
2020-10-06,9.9,9.43,9.42


In [6]:
# Obtain the points of buy and sell using the 20 and 50 day exponential moving averages
buy_points = (signals_df['EMA20_Close'] > signals_df['EMA50_Close']) & (signals_df['EMA20_Close'].shift(1) <= signals_df['EMA50_Close'].shift(1))
sell_points = (signals_df['EMA20_Close'] < signals_df['EMA50_Close']) & (signals_df['EMA20_Close'].shift(1) >= signals_df['EMA50_Close'].shift(1))

# Combine the buy and sell points to obtain the all the crossover points of the EMAs
crossover_points = buy_points | sell_points

# Obtain the first crossover point
first_buy_point = crossover_points.idxmax()

# Mark the crossover points of the EMAs with 1s 
signals_df['Signal'] = np.where((signals_df['EMA20_Close'] > signals_df['EMA50_Close']) & 
                                         (signals_df.index > first_buy_point), 1, 0)

# View data
signals_df.head()

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-30,9.5,9.5,9.5,0
2020-10-01,9.46,9.48,9.48,0
2020-10-02,9.2,9.38,9.38,0
2020-10-05,9.03,9.28,9.29,0
2020-10-06,9.9,9.43,9.42,1


In [7]:
# Label the exit and entry points with Buy as 1, Sell as -1 and Hold as 0
signals_df['Entry/Exit'] = signals_df['Signal'].diff()
signals_df.dropna(inplace = True)

signals_df.head()

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-01,9.46,9.48,9.48,0,0.0
2020-10-02,9.2,9.38,9.38,0,0.0
2020-10-05,9.03,9.28,9.29,0,0.0
2020-10-06,9.9,9.43,9.42,1,1.0
2020-10-07,10.0,9.55,9.53,1,0.0


In [8]:
# Create a function to obtain the dataframe with the dates around the trades alone
def subset_crossover(df, crossovers):
    crossindex = np.where(crossovers)[0]
    row_ranges = []
    for index in crossindex:
        start = max(index-2, 0)
        end = min(index+2, len(df))
        row_ranges.extend(range(start, end))
    
    # Add the last row of the main dataframe
    row_ranges.append(len(df) - 1)
    
    unique_rows = sorted(set(row_ranges))
    
    return df.iloc[unique_rows]


In [9]:
# Call the function to create the dataframe with only the dates around the trades
crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-02,9.200000,9.38,9.38,0,0.0
2020-10-05,9.030000,9.28,9.29,0,0.0
2020-10-06,9.900000,9.43,9.42,1,1.0
2020-10-07,10.000000,9.55,9.53,1,0.0
2020-10-08,10.000000,9.63,9.60,1,0.0
...,...,...,...,...,...
2025-04-11,88.550003,86.10,86.18,0,0.0
2025-04-14,92.620003,86.72,86.43,1,1.0
2025-04-15,98.400002,87.84,86.90,1,0.0
2025-04-16,92.709999,88.30,87.13,1,0.0


In [10]:
# Visualise the data with buy and sell points marked on the chart with the close prices
close_prices = signals_df['Close'].hvplot(color = 'lightgray')

ema20  = signals_df['EMA20_Close'].hvplot(color = 'green')

ema50  = signals_df['EMA50_Close'].hvplot(color = 'yellow')

entry = signals_df[signals_df['Entry/Exit'] == 1]['Close'].hvplot.scatter(color = 'blue',
                                                                 marker = '^',
                                                                 legend = False,
                                                                 size = 200
                                                                )

exit = signals_df[signals_df['Entry/Exit'] == -1]['Close'].hvplot.scatter(color = 'red',
                                                                 marker = 'v',
                                                                 legend = False,
                                                                 size = 200
                                                                )


plot = close_prices * ema20 * ema50 * entry * exit

plot.opts(height = 500,
         width = 1000,
         title = 'Entry Exit plot based on EMA',
         ylabel = 'Price in $')

In [11]:
# Set up values for initial capital and size of position that would be taken in the trades to simulate the trading
initial_capital = 100000
share_size = 300

In [12]:
# Create a column that shows the size of the postion when executing the trade
signals_df['Share_Size'] = abs(signals_df['Entry/Exit'] * share_size)

# View data
crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df


Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-10-02,9.200000,9.38,9.38,0,0.0,0.0
2020-10-05,9.030000,9.28,9.29,0,0.0,0.0
2020-10-06,9.900000,9.43,9.42,1,1.0,300.0
2020-10-07,10.000000,9.55,9.53,1,0.0,0.0
2020-10-08,10.000000,9.63,9.60,1,0.0,0.0
...,...,...,...,...,...,...
2025-04-11,88.550003,86.10,86.18,0,0.0,0.0
2025-04-14,92.620003,86.72,86.43,1,1.0,300.0
2025-04-15,98.400002,87.84,86.90,1,0.0,0.0
2025-04-16,92.709999,88.30,87.13,1,0.0,0.0


In [26]:
# Define the position taken in each trade
signals_df['Position'] = signals_df['Entry/Exit'] * signals_df['Share_Size']

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df.head(20)

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Postion,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total,Portfolio_Daily_Returns,Cumulative_Daily_Returns,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-10-02,9.2,9.38,9.38,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.0,0.0,0.0
2020-10-05,9.03,9.28,9.29,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.0,0.0,0.0
2020-10-06,9.9,9.43,9.42,1,1.0,300.0,300.0,2970.0,97030.0,100000.0,0.0,0.0,300.0
2020-10-07,10.0,9.55,9.53,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.0003,0.0003,0.0
2020-10-08,10.0,9.63,9.6,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.0,0.0003,0.0
2020-10-19,9.57,9.66,9.65,1,0.0,0.0,0.0,0.0,97030.0,99901.0,-0.00042,-0.00099,0.0
2020-10-20,9.27,9.61,9.62,0,-1.0,300.0,-300.0,2781.0,99811.0,102592.0,0.026937,0.02592,-300.0
2020-10-21,9.2,9.57,9.58,0,0.0,0.0,0.0,0.0,99811.0,102571.0,-0.000205,0.02571,0.0
2020-10-22,9.68,9.58,9.59,0,0.0,0.0,0.0,0.0,99811.0,102715.0,0.001404,0.02715,0.0
2020-10-26,9.95,9.61,9.61,0,0.0,0.0,0.0,0.0,99811.0,102796.0,0.001344,0.02796,0.0


In [14]:
# Calculate the portfolio holdings in each trade
signals_df['Portfolio_Holdings'] = round(signals_df['Share_Size'] * signals_df['Close'], 2)

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df.head(20)

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Postion,Portfolio_Holdings
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-10-02,9.2,9.38,9.38,0,0.0,0.0,0.0,0.0
2020-10-05,9.03,9.28,9.29,0,0.0,0.0,0.0,0.0
2020-10-06,9.9,9.43,9.42,1,1.0,300.0,300.0,2970.0
2020-10-07,10.0,9.55,9.53,1,0.0,0.0,0.0,0.0
2020-10-08,10.0,9.63,9.6,1,0.0,0.0,0.0,0.0
2020-10-19,9.57,9.66,9.65,1,0.0,0.0,0.0,0.0
2020-10-20,9.27,9.61,9.62,0,-1.0,300.0,-300.0,2781.0
2020-10-21,9.2,9.57,9.58,0,0.0,0.0,0.0,0.0
2020-10-22,9.68,9.58,9.59,0,0.0,0.0,0.0,0.0
2020-10-26,9.95,9.61,9.61,0,0.0,0.0,0.0,0.0


In [27]:
# Obtain the cash reserve of the pportfolio
signals_df['Portfolio_Cash'] = round(initial_capital - (signals_df['Close'] * signals_df['Position']).cumsum() ,2)

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Postion,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total,Portfolio_Daily_Returns,Cumulative_Daily_Returns,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-10-02,9.200000,9.38,9.38,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.00000,0.0
2020-10-05,9.030000,9.28,9.29,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.00000,0.0
2020-10-06,9.900000,9.43,9.42,1,1.0,300.0,300.0,2970.0,97030.0,100000.0,0.000000,0.00000,300.0
2020-10-07,10.000000,9.55,9.53,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000300,0.00030,0.0
2020-10-08,10.000000,9.63,9.60,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000000,0.00030,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-11,88.550003,86.10,86.18,0,0.0,0.0,0.0,0.0,116227.0,142792.0,-0.000084,0.42792,0.0
2025-04-14,92.620003,86.72,86.43,1,1.0,300.0,300.0,27786.0,88441.0,116227.0,-0.186040,0.16227,300.0
2025-04-15,98.400002,87.84,86.90,1,0.0,0.0,0.0,0.0,88441.0,117961.0,0.014919,0.17961,0.0
2025-04-16,92.709999,88.30,87.13,1,0.0,0.0,0.0,0.0,88441.0,116254.0,-0.014471,0.16254,0.0


In [28]:
# Calculate the total value of the portfolio at each time
signals_df['Portfolio_Total'] = initial_capital

signals_df['Portfolio_Total'] = np.where((signals_df.index <= first_buy_point), initial_capital,
                                    np.where(signals_df['Portfolio_Holdings'] !=0, signals_df['Portfolio_Holdings'] + signals_df['Portfolio_Cash'],
                                       signals_df['Portfolio_Cash'] + round(signals_df['Close'], 2) * 300) )

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Postion,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total,Portfolio_Daily_Returns,Cumulative_Daily_Returns,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-10-02,9.200000,9.38,9.38,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.00000,0.0
2020-10-05,9.030000,9.28,9.29,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.00000,0.0
2020-10-06,9.900000,9.43,9.42,1,1.0,300.0,300.0,2970.0,97030.0,100000.0,0.000000,0.00000,300.0
2020-10-07,10.000000,9.55,9.53,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000300,0.00030,0.0
2020-10-08,10.000000,9.63,9.60,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000000,0.00030,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-11,88.550003,86.10,86.18,0,0.0,0.0,0.0,0.0,116227.0,142792.0,-0.000084,0.42792,0.0
2025-04-14,92.620003,86.72,86.43,1,1.0,300.0,300.0,27786.0,88441.0,116227.0,-0.186040,0.16227,300.0
2025-04-15,98.400002,87.84,86.90,1,0.0,0.0,0.0,0.0,88441.0,117961.0,0.014919,0.17961,0.0
2025-04-16,92.709999,88.30,87.13,1,0.0,0.0,0.0,0.0,88441.0,116254.0,-0.014471,0.16254,0.0


In [97]:
# Obtain the Daily returns of the portfolio and clean the data
signals_df['Portfolio_Daily_Returns'] = signals_df['Portfolio_Total'].pct_change()
signals_df.dropna(inplace = True)

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Postion,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total,Portfolio_Daily_Returns,Cumulative_Daily_Returns,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-10-05,9.030000,9.28,9.29,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.00000,0.0
2020-10-06,9.900000,9.43,9.42,1,1.0,300.0,300.0,2970.0,97030.0,100000.0,0.000000,0.00000,300.0
2020-10-07,10.000000,9.55,9.53,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000300,0.00030,0.0
2020-10-08,10.000000,9.63,9.60,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000000,0.00030,0.0
2020-10-09,9.950000,9.69,9.65,1,0.0,0.0,0.0,0.0,97030.0,100015.0,-0.000150,0.00015,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-14,92.620003,86.72,86.43,1,1.0,300.0,300.0,27786.0,88441.0,116227.0,-0.186040,0.16227,300.0
2025-04-15,98.400002,87.84,86.90,1,0.0,0.0,0.0,0.0,88441.0,117961.0,0.014919,0.17961,0.0
2025-04-16,92.709999,88.30,87.13,1,0.0,0.0,0.0,0.0,88441.0,116254.0,-0.014471,0.16254,0.0
2025-04-17,93.779999,88.82,87.39,1,0.0,0.0,0.0,0.0,88441.0,116575.0,0.002761,0.16575,0.0


In [98]:
signals_df['Cumulative_Daily_Returns'] = (1 + signals_df['Portfolio_Daily_Returns']).cumprod() - 1

crossovers_df = subset_crossover(signals_df, crossover_points)
crossovers_df

Unnamed: 0_level_0,Close,EMA20_Close,EMA50_Close,Signal,Entry/Exit,Share_Size,Postion,Portfolio_Holdings,Portfolio_Cash,Portfolio_Total,Portfolio_Daily_Returns,Cumulative_Daily_Returns,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-10-05,9.030000,9.28,9.29,0,0.0,0.0,0.0,0.0,100000.0,100000.0,0.000000,0.00000,0.0
2020-10-06,9.900000,9.43,9.42,1,1.0,300.0,300.0,2970.0,97030.0,100000.0,0.000000,0.00000,300.0
2020-10-07,10.000000,9.55,9.53,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000300,0.00030,0.0
2020-10-08,10.000000,9.63,9.60,1,0.0,0.0,0.0,0.0,97030.0,100030.0,0.000000,0.00030,0.0
2020-10-09,9.950000,9.69,9.65,1,0.0,0.0,0.0,0.0,97030.0,100015.0,-0.000150,0.00015,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-14,92.620003,86.72,86.43,1,1.0,300.0,300.0,27786.0,88441.0,116227.0,-0.186040,0.16227,300.0
2025-04-15,98.400002,87.84,86.90,1,0.0,0.0,0.0,0.0,88441.0,117961.0,0.014919,0.17961,0.0
2025-04-16,92.709999,88.30,87.13,1,0.0,0.0,0.0,0.0,88441.0,116254.0,-0.014471,0.16254,0.0
2025-04-17,93.779999,88.82,87.39,1,0.0,0.0,0.0,0.0,88441.0,116575.0,0.002761,0.16575,0.0


In [99]:
signals_df['Cumulative_Daily_Returns'].hvplot()

In [100]:
# Plot the buys and sells on the graph
exit = signals_df[signals_df['Entry/Exit'] == -1]['Portfolio_Total'].hvplot.scatter(color = 'red',
                                       marker = 'v',
                                       legend = False,
                                       width = 1000,
                                       height = 500,
                                       size = 200, ylabel = 'Price in $')
exit

entry = signals_df[signals_df['Entry/Exit'] == 1]['Portfolio_Total'].hvplot.scatter(color = 'green',
                                       marker = '^',
                                       legend = False,
                                       width = 1000,
                                       height = 500,
                                       size = 200, ylabel = 'Price in $')

entry



portfolio_price_chart = signals_df['Portfolio_Total'].hvplot(color = 'lightgray')

entry_exit_chart = portfolio_price_chart * entry * exit 

entry_exit_chart.opts(title = 'Entry/Exit Plot on the Portfolio Cumulative Returns',
                     height = 500,
                     width = 1000)

## Calculating Metrics

In [101]:
metics = ['Annualized Returns',
          'Cumulative Returns',
          'Annualized Volatility',
          'Sharpe Ratio',
          'Sortino Ratio']

evaluation_df = pd.DataFrame(columns = ['Backtest'], index = metics)
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,
Cumulative Returns,
Annualized Volatility,
Sharpe Ratio,
Sortino Ratio,


In [102]:
# Add the first four respective data to the data frame
evaluation_df.loc['Annualized Returns'] = signals_df['Portfolio_Daily_Returns'].mean() * 252
evaluation_df.loc['Cumulative Returns'] = signals_df['Cumulative_Daily_Returns'][-1]
evaluation_df.loc['Annualized Volatility'] = signals_df['Portfolio_Daily_Returns'].std() * np.sqrt(252)
evaluation_df.loc['Sharpe Ratio'] = (signals_df['Portfolio_Daily_Returns'].mean() * 252) / (signals_df['Portfolio_Daily_Returns'].std() * np.sqrt(252))

# Display the data
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,0.069131
Cumulative Returns,0.26757
Annualized Volatility,0.191547
Sharpe Ratio,0.360906
Sortino Ratio,


In [103]:
# Convert the returns to numneric and drop 'NaN's for calulation
daily_returns = pd.to_numeric(signals_df['Portfolio_Daily_Returns']).dropna()


# Calculate average daily return
average_return = daily_returns.mean()

# Calculate downside returns (only negative returns)
downside_returns = daily_returns[daily_returns < 0]

# Calculate the downside standard deviation
downside_std = downside_returns.std()

# Calculate the Sortino Ratio
sortino_ratio = (average_return / downside_std) * np.sqrt(252)
sortino_ratio

0.39429324212201644

In [104]:
# Obtain the sortino ratio and add it to the evaluation dataframe
evaluation_df.loc['Sortino Ratio'] = sortino_ratio
evaluation_df

Unnamed: 0,Backtest
Annualized Returns,0.069131
Cumulative Returns,0.26757
Annualized Volatility,0.191547
Sharpe Ratio,0.360906
Sortino Ratio,0.394293


## Performance analysis of the strategy

In [105]:
# Create the dataframe specifying the features of the trades along with its profit and loss
performance_data = []

    
for index, row in signals_df.iterrows():
    if row['Entry/Exit'] == 1:
        entry_date = index
        entry_share_price = row['Close']
        share_size = abs(row['Position'])
        entry_portfolio_holdings = row['Portfolio_Holdings']

    elif row['Entry/Exit'] == -1 and entry_date is not None:
        exit_date = index
        exit_share_price = row['Close']
        share_size = abs(row['Position'])
        exit_portfolio_holdings = row['Portfolio_Holdings']
        profit_loss = exit_portfolio_holdings - entry_portfolio_holdings

        performance_data.append({
            'Stock': 'PLTR',
            'Entry Date': entry_date,
            'Exit Date': exit_date,
            'Entry Price': entry_share_price,
            'Exit Price': exit_share_price,
            'Shares': share_size,
            'Entry Portfolio Holding': entry_portfolio_holdings,
            'Exit Portfolio Holding': exit_portfolio_holdings,
            'Profit/Loss': profit_loss
        })

        

performance_data_df = pd.DataFrame(performance_data)

performance_data_df

Unnamed: 0,Stock,Entry Date,Exit Date,Entry Price,Exit Price,Shares,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,PLTR,2020-10-06,2020-10-20,9.9,9.27,300.0,2970.0,2781.0,-189.0
1,PLTR,2020-10-27,2021-03-03,10.95,23.59,300.0,3285.0,7077.0,3792.0
2,PLTR,2021-06-10,2021-07-15,24.139999,21.469999,300.0,7242.0,6441.0,-801.0
3,PLTR,2021-08-18,2021-10-11,25.280001,23.530001,300.0,7584.0,7059.0,-525.0
4,PLTR,2021-10-29,2021-11-11,25.879999,22.99,300.0,7764.0,6897.0,-867.0
5,PLTR,2022-07-28,2022-08-19,10.13,8.51,300.0,3039.0,2553.0,-486.0
6,PLTR,2023-02-01,2023-05-02,8.2,7.59,300.0,2460.0,2277.0,-183.0
7,PLTR,2023-05-10,2023-09-08,9.94,15.13,300.0,2982.0,4539.0,1557.0
8,PLTR,2023-09-11,2023-09-20,15.79,14.74,300.0,4737.0,4422.0,-315.0
9,PLTR,2023-10-06,2024-01-02,16.610001,16.58,300.0,4983.0,4974.0,-9.0


## Split the data ```signals_df``` into training and testing datasets for the machine learning model

In [106]:
# Create input data for the model
X = signals_df.drop(columns = ['Entry/Exit'])


# Create the output data y for the model
y = signals_df['Entry/Exit']

In [107]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2020-10-02 00:00:00


In [108]:
# Select the end of the training period
training_end = X.index.min() + DateOffset(years = 4)

# Display the training end date
training_end

Timestamp('2024-10-02 00:00:00')

In [109]:
# Generate training datasets X_train and y_train dataframe
X_train = X[training_begin : training_end]
y_train = y[training_begin : training_end]

In [110]:
# Generate testing datasets X_train and y_train dataframe
X_test = X[training_end : ]
y_test = y[training_end : ]

In [111]:
# Create an object for the standard scaler
scaler = StandardScaler()

# Apply the scaler model to fit the X_train data
X_scaler = scaler.fit(X_train)

# Transform the input data with the scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [112]:
# Create an object for the Graient Boosing Classifier model
gb_model = GradientBoostingClassifier(n_estimators = 100, learning_rate = 0.1, max_depth = 3, random_state = 1)

# Fit the training data in to the model
gb_model.fit(X_train_scaled, y_train)

In [114]:
# Make prdictions with the model on training and testing data
gb_model_train_perdiction = gb_model.predict(X_train_scaled)
gb_model_test_perdiction = gb_model.predict(X_test_scaled)

In [131]:
# Generate the classification report
gb_model_train_report = classification_report(gb_model_train_perdiction, y_train)
gb_model_test_report = classification_report(gb_model_test_perdiction, y_test)

# Display the classification reports
print(f"Classification report of gb_model_train_perdiction and y_train\n {'-' * 59}\n")
print(gb_model_train_report)

print(f"Classification report of gb_model_test_perdiction and y_test\n {'-' * 58}\n")
print(gb_model_test_report)

Classification report of gb_model_train_perdiction and y_train
 -----------------------------------------------------------

              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00        12
         0.0       1.00      1.00      1.00       981
         1.0       1.00      1.00      1.00        13

    accuracy                           1.00      1006
   macro avg       1.00      1.00      1.00      1006
weighted avg       1.00      1.00      1.00      1006

Classification report of gb_model_test_perdiction and y_test
 ----------------------------------------------------------

              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00         1
         0.0       1.00      1.00      1.00       168
         1.0       1.00      1.00      1.00         1

    accuracy                           1.00       170
   macro avg       1.00      1.00      1.00       170
weighted avg       1.00      1.00      1.00   