# Tune the Baseline Trading Algorithm

In [273]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

In [274]:
# Import the OHLCV dataset into a Pandas Dataframe
ohlcv_df = pd.read_csv(
    r"C:\Users\ycola\Machine-Learning-Trading-Bot\emerging_markets_ohlcv.csv", 
    index_col='date', 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
ohlcv_df.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-21 09:30:00,23.83,23.83,23.83,23.83,100
2015-01-21 11:00:00,23.98,23.98,23.98,23.98,100
2015-01-22 15:00:00,24.42,24.42,24.42,24.42,100
2015-01-22 15:15:00,24.42,24.44,24.42,24.44,200
2015-01-22 15:30:00,24.46,24.46,24.46,24.46,200


In [275]:
# Filter the date index and close columns
signals_df = ohlcv_df.loc[:, ["close"]]

# Use the pct_change function to generate returns from close prices
signals_df["Actual Returns"] = signals_df["close"].pct_change()

# Drop all NaN values from the DataFrame
signals_df = signals_df.dropna()

# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,close,Actual Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-21 11:00:00,23.98,0.006295
2015-01-22 15:00:00,24.42,0.018349
2015-01-22 15:15:00,24.44,0.000819
2015-01-22 15:30:00,24.46,0.000818
2015-01-26 12:30:00,24.33,-0.005315


Unnamed: 0_level_0,close,Actual Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-22 09:30:00,33.27,-0.006866
2021-01-22 11:30:00,33.35,0.002405
2021-01-22 13:45:00,33.42,0.002099
2021-01-22 14:30:00,33.47,0.001496
2021-01-22 15:45:00,33.44,-0.000896


In [276]:
# Set the short window and long window
short_window = 20
long_window = 100

# Generate the fast and slow simple moving averages (4 and 100 days, respectively)
signals_df['SMA_Fast'] = signals_df['close'].rolling(window=short_window).mean()
signals_df['SMA_Slow'] = signals_df['close'].rolling(window=long_window).mean()

signals_df = signals_df.dropna()

# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,close,Actual Returns,SMA_Fast,SMA_Slow
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-04-02 14:45:00,24.92,0.0,24.868,24.3214
2015-04-02 15:00:00,24.92,0.0,24.8855,24.3308
2015-04-02 15:15:00,24.94,0.000803,24.903,24.336
2015-04-02 15:30:00,24.95,0.000401,24.9095,24.3411
2015-04-02 15:45:00,24.98,0.001202,24.916,24.3463


Unnamed: 0_level_0,close,Actual Returns,SMA_Fast,SMA_Slow
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-22 09:30:00,33.27,-0.006866,32.481,30.40215
2021-01-22 11:30:00,33.35,0.002405,32.5955,30.44445
2021-01-22 13:45:00,33.42,0.002099,32.69,30.48745
2021-01-22 14:30:00,33.47,0.001496,32.788,30.53085
2021-01-22 15:45:00,33.44,-0.000896,32.8785,30.57495


In [277]:
# Initialize the new Signal column
signals_df['Signal'] = 0.0

# When Actual Returns are greater than or equal to 0, generate signal to buy stock long
signals_df.loc[(signals_df['Actual Returns'] >= 0), 'Signal'] = 1

# When Actual Returns are less than 0, generate signal to sell stock short
signals_df.loc[(signals_df['Actual Returns'] < 0), 'Signal'] = -1

# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,close,Actual Returns,SMA_Fast,SMA_Slow,Signal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-04-02 14:45:00,24.92,0.0,24.868,24.3214,1.0
2015-04-02 15:00:00,24.92,0.0,24.8855,24.3308,1.0
2015-04-02 15:15:00,24.94,0.000803,24.903,24.336,1.0
2015-04-02 15:30:00,24.95,0.000401,24.9095,24.3411,1.0
2015-04-02 15:45:00,24.98,0.001202,24.916,24.3463,1.0


Unnamed: 0_level_0,close,Actual Returns,SMA_Fast,SMA_Slow,Signal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-22 09:30:00,33.27,-0.006866,32.481,30.40215,-1.0
2021-01-22 11:30:00,33.35,0.002405,32.5955,30.44445,1.0
2021-01-22 13:45:00,33.42,0.002099,32.69,30.48745,1.0
2021-01-22 14:30:00,33.47,0.001496,32.788,30.53085,1.0
2021-01-22 15:45:00,33.44,-0.000896,32.8785,30.57495,-1.0


In [278]:
signals_df['Signal'].value_counts()

 1.0    2368
-1.0    1855
Name: Signal, dtype: int64

In [279]:
# Calculate the strategy returns and add them to the signals_df DataFrame
signals_df['Strategy Returns'] = signals_df['Actual Returns'] * signals_df['Signal'].shift()

# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,close,Actual Returns,SMA_Fast,SMA_Slow,Signal,Strategy Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-04-02 14:45:00,24.92,0.0,24.868,24.3214,1.0,
2015-04-02 15:00:00,24.92,0.0,24.8855,24.3308,1.0,0.0
2015-04-02 15:15:00,24.94,0.000803,24.903,24.336,1.0,0.000803
2015-04-02 15:30:00,24.95,0.000401,24.9095,24.3411,1.0,0.000401
2015-04-02 15:45:00,24.98,0.001202,24.916,24.3463,1.0,0.001202


Unnamed: 0_level_0,close,Actual Returns,SMA_Fast,SMA_Slow,Signal,Strategy Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-22 09:30:00,33.27,-0.006866,32.481,30.40215,-1.0,-0.006866
2021-01-22 11:30:00,33.35,0.002405,32.5955,30.44445,1.0,-0.002405
2021-01-22 13:45:00,33.42,0.002099,32.69,30.48745,1.0,0.002099
2021-01-22 14:30:00,33.47,0.001496,32.788,30.53085,1.0,0.001496
2021-01-22 15:45:00,33.44,-0.000896,32.8785,30.57495,-1.0,-0.000896


In [280]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = signals_df[['SMA_Fast', 'SMA_Slow']].shift().dropna()

# Review the DataFrame
X.head()

Unnamed: 0_level_0,SMA_Fast,SMA_Slow
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-04-02 15:00:00,24.868,24.3214
2015-04-02 15:15:00,24.8855,24.3308
2015-04-02 15:30:00,24.903,24.336
2015-04-02 15:45:00,24.9095,24.3411
2015-04-06 09:30:00,24.916,24.3463


In [281]:
# Create the target set selecting the Signal column and assiging it to y
y = signals_df['Signal']

# Review the value counts
y.value_counts()

 1.0    2368
-1.0    1855
Name: Signal, dtype: int64

In [282]:
# Select the start of the training period
training_begin =X.index.min()

# Display the training begin date
print(training_begin)


2015-04-02 15:00:00


In [283]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=4)

# Display the training end date
print(training_end)

2015-08-02 15:00:00


In [284]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,SMA_Fast,SMA_Slow
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-04-02 15:00:00,24.868,24.3214
2015-04-02 15:15:00,24.8855,24.3308
2015-04-02 15:30:00,24.903,24.336
2015-04-02 15:45:00,24.9095,24.3411
2015-04-06 09:30:00,24.916,24.3463


In [285]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_train.head()

Unnamed: 0_level_0,SMA_Fast,SMA_Slow
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-04-02 15:00:00,24.868,24.3214
2015-04-02 15:15:00,24.8855,24.3308
2015-04-02 15:30:00,24.903,24.336
2015-04-02 15:45:00,24.9095,24.3411
2015-04-06 09:30:00,24.916,24.3463


In [286]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [287]:
# Import a new classifier from SKLearn
from sklearn.linear_model import LogisticRegression

# Initiate the model instance
logistic_regression_model = LogisticRegression()


In [288]:
# Fit the model using the training data
model = logistic_regression_model.fit(X_train_scaled, y_train)

# Use the testing dataset to generate the predictions for the new model
pred = logistic_regression_model.predict(X_test_scaled)

# Review the model's predicted values
pred


array([-1., -1., -1., ...,  1.,  1.,  1.])

In [289]:
# Use a classification report to evaluate the model using the predictions and testing data
report = classification_report(y_test, pred)

# Print the classification report
print(report)


              precision    recall  f1-score   support

        -1.0       0.45      0.16      0.23      1779
         1.0       0.56      0.85      0.68      2270

    accuracy                           0.55      4049
   macro avg       0.51      0.50      0.45      4049
weighted avg       0.51      0.55      0.48      4049



In [290]:
# Create a new empty predictions DataFrame:
pred_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
pred_df['SVM'] =  pred

# Add the actual returns to the DataFrame
pred_df["Actual Returns"] = signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
pred_df["Strategy returns"] = pred_df['Actual Returns'] * pred

# Review the DataFrame
pred_df.head()


Unnamed: 0_level_0,SVM,Actual Returns,Strategy returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-08-03 09:30:00,-1.0,-0.013925,0.013925
2015-08-03 10:15:00,-1.0,-0.002648,0.002648
2015-08-03 10:30:00,-1.0,0.002212,-0.002212
2015-08-03 10:45:00,-1.0,0.000883,-0.000883
2015-08-03 11:00:00,-1.0,0.000441,-0.000441


In [291]:
# Plot the actual returns versus the strategy returns
(1+ pred_df[['Actual Returns', 'Strategy returns']]).cumprod().hvplot()