In [None]:
#Uncomment below if libraries havent been installed
# !pip install backtesting
# !pip install pandas_ta
# !pip install yfinance
# !pip install scikit-learn
# !pip install tensorflow
# !pip install -U keras-tuner
# !pip install matplotlib


In [1]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import os
import shutil

import warnings
warnings.filterwarnings('ignore')

from backtesting import Backtest, Strategy
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

2023-10-21 14:42:52.779131: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
#Retrieve pair price history from Yahoo Finance using pandas_ta
df = pd.DataFrame()
df = df.ta.ticker("GBPUSD=X")


In [3]:
#Drop uneccassary columns
del df["Dividends"]
del df["Stock Splits"]
del df["Volume"]

In [4]:
#Used as a target variable to see if model correctly predicts price action
df["Tomorrow_Close"] = df["Close"].shift(-1)

In [5]:
#Used as a target variable to see if model correctly predicts price action
df["Target"] = (df["Tomorrow_Close"] > df["Close"]).astype(int)
df

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow_Close,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2003-12-01 00:00:00+00:00,1.723811,1.727205,1.718390,1.718597,1.730313,1
2003-12-02 00:00:00+00:00,1.719010,1.732112,1.717298,1.730313,1.728101,0
2003-12-03 00:00:00+00:00,1.730493,1.731812,1.725209,1.728101,1.720697,0
2003-12-04 00:00:00+00:00,1.727414,1.728997,1.718302,1.720697,1.733102,1
2003-12-05 00:00:00+00:00,1.720608,1.733102,1.719809,1.733102,1.734214,1
...,...,...,...,...,...,...
2023-05-02 00:00:00+01:00,1.248736,1.251220,1.243657,1.248595,1.247661,0
2023-05-03 00:00:00+01:00,1.247972,1.255209,1.246961,1.247661,1.256897,1
2023-05-04 00:00:00+01:00,1.256866,1.259684,1.255225,1.256897,1.258336,1
2023-05-05 00:00:00+01:00,1.258257,1.265102,1.256597,1.258336,1.262053,1


In [6]:
#Create ratios using the rolling averages as an indicator of market projection
time_horizons = [2,5,60,250,1000]
new_features = []

for h in time_horizons:
    rolling_averages = df.rolling(h).mean()
    
    ratio = f"Close_Ratio_{h}"
    df[ratio] = df["Close"] / rolling_averages["Close"]
    
    trend = f"Trend_{h}"
    df[trend] = df.shift(1).rolling(h).sum()["Target"]
    
    new_features+= [ratio, trend]


In [7]:
#Adding RSI column with 14 day lookback as it is the standard
df['RSI'] = ta.rsi(close = df["Close"], length= 14)
new_features.append("RSI")

In [8]:
#Adding MACD indicator columns with 12 and 26 day ema and 9 unit period ema of macd chart
macd = ta.macd(close = df['Close'], fast = 12, slow = 26, signal = 9)
df= pd.concat([df, macd], axis = 1)
new_features+= list(macd)

In [9]:
#Adding Stochastic Oscillator indicator
stoch = ta.stoch(high = df['High'], low = df['Low'], close = df['Close'], k= 14) 
df = pd.concat([df,stoch], axis = 1)
new_features+= list(stoch)

In [10]:
#Drop null values created by indicator creation
df = df.dropna()
df

Unnamed: 0_level_0,Open,High,Low,Close,Tomorrow_Close,Target,Close_Ratio_2,Trend_2,Close_Ratio_5,Trend_5,...,Close_Ratio_250,Trend_250,Close_Ratio_1000,Trend_1000,RSI,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,STOCHk_14_3_3,STOCHd_14_3_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-10-04 00:00:00+01:00,2.032603,2.042400,2.028315,2.038611,2.043694,1,1.001476,1.0,0.999151,2.0,...,1.031575,132.0,1.095111,507.0,57.885143,0.006289,0.002464,0.003825,80.072541,84.726310
2007-10-05 00:00:00+01:00,2.038487,2.043820,2.031901,2.043694,2.035416,0,1.001245,2.0,1.001888,2.0,...,1.033799,132.0,1.097656,507.0,59.962376,0.006940,0.002492,0.004448,82.107199,81.794322
2007-10-08 00:00:00+01:00,2.041900,2.042901,2.033595,2.035416,2.037697,1,0.997971,1.0,0.998702,2.0,...,1.029294,131.0,1.093030,507.0,55.188454,0.006710,0.001810,0.004900,82.568243,81.582661
2007-10-09 00:00:00+01:00,2.035499,2.038985,2.026301,2.037697,2.041816,1,1.000560,1.0,1.000046,3.0,...,1.030106,132.0,1.094069,508.0,56.222705,0.006635,0.001388,0.005247,80.884388,81.853277
2007-10-10 00:00:00+01:00,2.037697,2.047209,2.037697,2.041816,2.034215,0,1.001010,2.0,1.001162,4.0,...,1.031837,133.0,1.096098,508.0,58.103188,0.006829,0.001266,0.005564,77.813756,80.422129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-01 00:00:00+01:00,1.255840,1.257000,1.248954,1.255761,1.248595,0,1.002399,2.0,1.005552,4.0,...,1.046120,132.0,0.975436,498.0,62.502240,0.006112,-0.000188,0.006300,71.463313,60.450082
2023-05-02 00:00:00+01:00,1.248736,1.251220,1.243657,1.248595,1.247661,0,0.997139,1.0,1.000039,3.0,...,1.040095,131.0,0.969882,498.0,56.169330,0.005834,-0.000373,0.006207,70.122656,64.770662
2023-05-03 00:00:00+01:00,1.247972,1.255209,1.246961,1.247661,1.256897,1,0.999626,0.0,0.998302,3.0,...,1.039319,130.0,0.969165,498.0,55.381028,0.005475,-0.000585,0.006061,67.045379,69.543783
2023-05-04 00:00:00+01:00,1.256866,1.259684,1.255225,1.256897,1.258336,1,1.003688,1.0,1.004126,3.0,...,1.046936,131.0,0.976341,499.0,61.178977,0.005868,-0.000154,0.006022,66.695143,67.954392


In [11]:
df['Target'].value_counts()

Target
0    2059
1    1994
Name: count, dtype: int64

In [12]:
X = df[new_features]  
y = df["Target"]

train_num = int(0.8*len(X))

x_train, x_test = X[:train_num], X[train_num:]
y_train, y_test = y[:train_num], y[train_num:]

x_train.shape, x_test.shape, 

((3242, 16), (811, 16))

In [13]:
def create_model(hp):
    model = keras.models.Sequential()
    model.add(keras.layers.Input(shape=(16,)))
    model.add(keras.layers.Dense(units=hp.Int('units',
                                              min_value=10,
                                              max_value=130,
                                              step=5),
                                 activation="relu"))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[0.001, 0.004])),
                  metrics=['accuracy'])

    return model
    

In [14]:
#Checks to see if ANN_Weights directory exists prior to parameter tuning and deletes accordingly for successive runs
folder_name = "ANN_Weights"

# Check if the folder exists
if os.path.exists(folder_name):
    # Delete the folder and its contents
    shutil.rmtree(folder_name)
    print(f"The folder '{folder_name}' has been deleted.")
else:
    print(f"The folder '{folder_name}' does not exist.")

The folder 'ANN_Weights' has been deleted.


In [15]:
tuner = RandomSearch(
    create_model,
    objective = 'val_accuracy',
    max_trials = 5,
    executions_per_trial =3,
    project_name = 'ANN_Weights')

2023-05-08 22:35:31.493803: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
tuner.search_space_summary()

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 130, 'step': 5, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.001, 'conditions': [], 'values': [0.001, 0.004], 'ordered': True}


In [17]:
tuner.search(x_train, y_train, epochs = 5,
             validation_data = (x_test,y_test))

Trial 5 Complete [00h 00m 08s]
val_accuracy: 0.5125359495480856

Best val_accuracy So Far: 0.5330867171287537
Total elapsed time: 00h 00m 41s
INFO:tensorflow:Oracle triggered exit


In [18]:
tuner.results_summary()

Results summary
Results in ./ANN_Weights
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 2 summary
Hyperparameters:
units: 20
learning_rate: 0.001
Score: 0.5330867171287537

Trial 3 summary
Hyperparameters:
units: 85
learning_rate: 0.001
Score: 0.5252774357795715

Trial 0 summary
Hyperparameters:
units: 65
learning_rate: 0.004
Score: 0.5170571208000183

Trial 1 summary
Hyperparameters:
units: 95
learning_rate: 0.004
Score: 0.5166461070378622

Trial 4 summary
Hyperparameters:
units: 25
learning_rate: 0.004
Score: 0.5125359495480856


In [19]:
#building final model withe best hyper parameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]
best_model = tuner.hypermodel.build(best_hps)

In [20]:
n_folds = 10
history = best_model.fit(x_train, y_train, epochs = n_folds, validation_split=0.2, verbose = 0)
val_acc_per_epoch = history.history['val_accuracy'] 
cv_error = np.average(val_acc_per_epoch)
print('The {}-fold cross-validation accuracy score for this classifier is {:.2f}'.format(n_folds, cv_error))

The 10-fold cross-validation accuracy score for this classifier is 0.49


In [21]:
all_predictions = []
for _ in range(10):
    predictions = best_model.predict(x_test)
    all_predictions.append(predictions)
    
average_pred = sum(all_predictions) / len(all_predictions)
final_pred = np.where(average_pred >= 0.5, 1, 0)

print(classification_report(y_test, final_pred))

              precision    recall  f1-score   support

           0       0.48      0.53      0.50       396
           1       0.51      0.47      0.49       415

    accuracy                           0.50       811
   macro avg       0.50      0.50      0.50       811
weighted avg       0.50      0.50      0.50       811



In [22]:
training_data = round(0.8*len(df))


class RandomForestStrategy(Strategy):
    delta = 0.004
    
    def init(self):
        
        #Initialise model using best hyper parameters
        self.model = tuner.hypermodel.build(best_hps)
        
        #Train model using training set
        df = self.data.df.iloc[:train_num]
        self.model.fit(df[new_features], df["Target"], verbose = 0)
        
        #Create custom indicator for tmr predictions
        self.predictions = self.I(lambda: np.repeat(np.nan, len(self.data)), name = "prediction")
        
    def next(self):
        
        #Skip the data used for training and only perform trades on testing set
        if len(self.data) < training_data:
            return
        
        #Prepare variables for take profit and stop loss calculations 
        high, low, close = self.data.High, self.data.Low, self.data.Close
        
        #Predict the trend for the next close candle (upwards or downwards)
        x = self.data.df.iloc[-1:]
        prediction = self.model.predict(x[new_features], verbose = 0)[0]
  
        #Update the predictions indicator 
        self.predictions[-1] = prediction
        
        
        #upper is the take profit which is calculated to be one price delta away from closing price
        #lower is the stop loss which is calculated to be one price delta away from the closing price 
        upper,lower = close[-1] * (1 + np.r_[1, -1] * self.delta)
        
        #If the prediction is upwards and a long position is not held, place a long order using 20% of the accounts equity 
        #if the prediction is downwards and a short position is not help, place a short order using 20% of the accounts equity
        if prediction >= 0.5 and not self.position.is_long:
            self.buy(size =0.2, tp = upper, sl=lower)
        elif prediction <=0.5  and not self.position.is_short:
            self.sell(size =0.2, tp=lower, sl=upper)
            
            
#Single backtest run using ANNStrategy
backtest = Backtest(df, RandomForestStrategy, margin= 0.1)
backtest.run()


Start                     2007-10-04 00:00...
End                       2023-05-05 00:00...
Duration                   5692 days 00:00:00
Exposure Time [%]                   20.009869
Equity Final [$]                   6092.51426
Equity Peak [$]                  10073.448733
Return [%]                         -39.074857
Buy & Hold Return [%]               -38.27482
Return (Ann.) [%]                   -3.034001
Volatility (Ann.) [%]                3.587811
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                  -39.519082
Avg. Drawdown [%]                  -19.944483
Max. Drawdown Duration     1124 days 00:00:00
Avg. Drawdown Duration      563 days 00:00:00
# Trades                                  739
Win Rate [%]                        23.951286
Best Trade [%]                       0.704504
Worst Trade [%]                     -1.412609
Avg. Trade [%]                    

In [23]:
backtest = Backtest(df, RandomForestStrategy, margin= 0.1)

results = np.zeros((5, 6))

for i in range(5):
    stats = backtest.run()
    results[i] = [stats['Return [%]'], stats['Win Rate [%]'], stats['Max. Drawdown [%]'],
                  stats['Avg. Drawdown [%]'], stats['Equity Final [$]'], stats['Equity Peak [$]']]

# Calculate the average statistics for 5 runs
averager = np.mean(results[:, 0])
averageb = np.mean(results[:, 1])
am_dd = np.mean(results[:, 2])
aa_dd = np.mean(results[:, 3])
eqf = np.mean(results[:, 4])
eqp = np.mean(results[:, 5])

# Print the average statistics
print("Mean Return%: {}".format(averager))
print("Mean Win Rate%: {}".format(averageb))
print("Mean Final Equity$: {}".format(eqf))
print("Mean Peak Equity$: {}".format(eqp))
print("Mean Max Drawdown%: {}".format(am_dd))
print("Mean Average Drawdown%: {}".format(aa_dd))

Mean Return%: -39.977777340802234
Mean Win Rate%: 23.833707210426734
Mean Final Equity$: 6002.222265919777
Mean Peak Equity$: 10091.301417762948
Mean Max Drawdown%: -41.028882210375336
Mean Average Drawdown%: -22.19056200265594
