# Machine Learning Trading Bot

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from finta import TA

# Import CSV file and create pandas DataFrame

In [None]:
# Import stocks dataset
stocks_df = pd.read_csv(
    Path('../Resources/[insert_title].csv'),
    index_col= 'date',
    parse_dates = True,
    infer_datetime_format=True
)

# Review the DataFrame
stocks_df.head()

In [None]:
#Filter the date index and close columns
signals_df = stocks_df.loc[:, ['close']]

# Use pct_change function to generate returns from close prices
signals_df['Actual Returns'] = signals_df['close'].pct_change()

#Drop all NaN values from the DataFrame
signals_df = signals_df.dropna()

#Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

# Create a trading algorithm using technical indicators

## Generate Signals - Example using SMA with short and long window

In [None]:
# Create SMA signals DataFrame
sma_signals_df = stocks_df.loc[:, ["close"]].copy()

# Set the short and long windows
short_window = 50
long_window = 100

# Create a short window SMA and long window SMA
sma_signals_df['SMA_Fast'] = signals_df['close'].rolling(window=short_window).mean()
sma_signals_df['SMA_Slow'] = signals_df['close'].rolling(window=long_window).mean()

sma_signals_df = sma_signals_df.dropna()

# Review the DataFrame
display(sma_signals_df.head())
display(sma_signals_df.tail())

In [None]:
# Initialize the new Signal column
sma_signals_df['Signal'] = 0.0

# When Actual Returns are greater than or equal to 0, generate signal to buy stock long
sma_signals_df.loc[(signals_df['Actual Returns'] >= 0), 'Signal'] = 1.0

# When Actual Returns are less than 0, generate signal to sell stock short
sma_signals_df.loc[(signals_df['Actual Returns'] < 0), 'Signal'] = 0.0

# Review the DataFrame
display(sma_signals_df.head())
display(sma_signals_df.tail())

In [None]:
# Review value counts
sma_signals_df['Signal'].value_counts()

In [None]:
# Visualize close price for the investment
stocks_close = signals_df[["close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = signals_df[["SMA_Fast", "SMA_Slow"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
sma_ema_plot = stocks_close * moving_avgs
sma_ema_plot

In [None]:
# Calculate the strategy returns and add them to the signals_df DataFrame
sma_signals_df['Strategy Returns'] = sma_signals_df['Actual Returns'] * sma_signals_df['Signal'].shift()

# Review the DataFrame
display(sma_signals_df.head())
display(sma_signals_df.tail())

In [None]:
# Plot Strategy Returns to examine performance
(1 + signals_df['Strategy Returns']).cumprod().hvplot(title='Strategy Returns')

## Example using EMA technical indicators

In [None]:
# Create EMA signals DataFrame
ema_signals_df = stocks_df.loc[:, ["close"]].copy()

# Set the short window and long windows
short_window = 50
long_window = 100

# Generate the short and long moving averages (50 and 100 days, respectively)
ema_signals_df["EMA_Short"] = signals_df["close"].ewm(span=short_window).mean()
ema_signals_df["EMA_Long"] = signals_df["close"].ewm(span=long_window).mean()

# Review the DataFrame
ema_signals_df.head()

In [None]:
# Set the signal column
ema_signals_df['Signal'] = 0

# Generate the trading signal 1 or 0
# Where 1 is when the EMA50 is greater than (i.e. crosses over) the EMA100
# Where 0 is when the EMA 50 is under the EMA100
ema_signals_df['Signal'][short_window:] = np.where(
    ema_signals_df['EMA50'][short_window:] > ema_signals_df['EMA100'][short_window:], 1.0, 0.0
)

# Calculate the points in time at which a position should be taken, 1 or 0
ema_signals_df['Entry/Exit'] = ema_signals_df['Signal'].diff()

# Review DataFrame
ema_signals_df.head()

In [None]:
# Review value counts
ema_signals_df['Signal'].value_counts()

In [None]:
# Visualize entry position relative to close price
entry = ema_signals_df[ema_signals_df["Entry/Exit"] == 1.0]["close"].hvplot.scatter(
    color='purple',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit = ema_signals_df[ema_signals_df["Entry/Exit"] == 0.0]["close"].hvplot.scatter(
    color='orange',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = ema_signals_df[["close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = ema_signals_df[["EMA50", "EMA100"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
entry_exit_plot = security_close * moving_avgs * entry * exit
entry_exit_plot


In [None]:
# Calculate the strategy returns and add them to the signals_df DataFrame
ema_signals_df['Strategy Returns'] = ema_signals_df['Actual Returns'] * ema_signals_df['Signal'].shift()

# Review the DataFrame
display(ema_signals_df.head())
display(ema_signals_df.tail())

In [None]:
# Plot Strategy Returns to examine performance
(1 + signals_df['Strategy Returns']).cumprod().hvplot(title='Strategy Returns')

# Example - Using Bollinger Bands

In [None]:
# Create Bollinger Bands signals DataFrame
bb_stocks_df = TA.BBANDS(stocks_df)

# Review DataFrame
bb_stocks_df.head[17:25, :]

In [None]:
# Concatenate the Bollinger Bands to the stocks DataFrame
bb_signals_df = pd.concat([stocks_df, bb_signals_df])

# Review the DataFrame
bb_signals_df[17:25, :]

In [None]:
# Visualize close price for stocks
security_close = bb_signals_df[["close"]].hvplot(
    line_color="lightgray",
    ylabel="Price in $",
    width=1000,
    height=400
)

bb_upper = bb_signals_df[["BB_UPPER"]].hvplot(
    line_color="purple",
    ylabel="Price in $",
    width=1000,
    height=400
)


bb_middle = bb_signals_df[["BB_MIDDLE"]].hvplot(
    line_color="orange",
    ylabel="Price in $",
    width=1000,
    height=400
)

bb_lower = bb_signals_df[["BB_LOWER"]].hvplot(
    line_color="blue",
    ylabel="Price in $",
    width=1000,
    height=400
)


# Overlay plots
bbands_plot = security_close * bb_upper * bb_middle * bb_lower
bbands_plot

# Create Trading Algorithm Using Bollinger Bands

In [None]:
# Create a Trading Algorithm Using Bollinger Bands
# Set the Signal column
bb_signals_df['Signal'] = 0.0

# Generate the trading signals 1 (entry) or -1 (exit) for a long position trading algorithm
# where 1 is when the Close price is less than the BB_LOWER window
# where -1 is when the Close price is greater the the BB_UPPER window
for index, row in bb_stocks_df.iterrows():
    if row["close"] < row["BB_LOWER"]:
        bb_stocks_df.loc[index, "Signal"] = 1.0
    if row["close"] > row["BB_UPPER"]:
        bb_stocks_df.loc[index,"Signal"] = 0.0

# Review the DataFrame
bb_stocks_df.tail()

# Visualize Entry/Exit Positions using Bollinger Bands

In [None]:
# Visualize entry position relative to close price
entry = bb_stocks_df[bb_signals_df["Signal"] == 1.0]["close"].hvplot.scatter(
    color="green",
    marker="^",
    size=200,
    legend=False,
    ylabel="Price in $",
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit = bb_stocks_df[bb_signals_df["Signal"] == 0.0]["close"].hvplot.scatter(
    color="red",
    marker="v",
    size=200,
    legend=False,
    ylabel="Price in $",
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = bb_signals_df[["close"]].hvplot(
    line_color="lightgray",
    ylabel="Price in $",
    width=1000,
    height=400
)

bb_upper = bb_signals_df[["BB_UPPER"]].hvplot(
    line_color="purple",
    ylabel="Price in $",
    width=1000,
    height=400
)


bb_middle = bb_signals_df[["BB_MIDDLE"]].hvplot(
    line_color="orange",
    ylabel="Price in $",
    width=1000,
    height=400
)

bb_lower = bb_signals_df[["BB_LOWER"]].hvplot(
    line_color="blue",
    ylabel="Price in $",
    width=1000,
    height=400
)


# Overlay plots
bbands_plot = security_close * bb_upper * bb_middle * bb_lower * entry * exit
bbands_plot

In [None]:
# Calculate the strategy returns and add them to the signals_df DataFrame
bb_signals_df['Strategy Returns'] = bb_signals_df['Actual Returns'] * bb_signals_df['Signal'].shift()

# Review the DataFrame
display(bb_signals_df.head())
display(bb_signals_df.tail())

In [None]:
# Plot Strategy Returns to examine performance
(1 + bb_signals_df['Strategy Returns']).cumprod().hvplot(title='Strategy Returns'

# Split Data into Training and Testing datasets

In [None]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = signals_df[['SMA_Fast', 'SMA_Slow']].shift().dropna().copy()

# Review the DataFrame
X.head()

In [None]:
# Create the target set selecting the Signal column and assiging it to y
y = signals_df['Signal'].copy()

# Review the value counts
y.value_counts()

In [None]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

In [None]:
# Select the ending period for the training data with an offset of 3 months
training_end = training_begin + DateOffset(months=48)
# Display the training end date
print(training_end)

In [None]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

In [None]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Review the X_test DataFrame
X_test.head()

In [None]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# From instantiate classifier model instance
model = svm.SVC()
 
# Fit the model to the data using the training data
model = model.fit(X_train_scaled, y_train)
 
# Use the testing data to make the model predictions
predictions = model.predict(X_test_scaled)

# Review the model's predicted values
predictions[:10]

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
testing_report = classification_report(y_test, predictions)

# Print the classification report
print(testing_report)

In [None]:
# Create a new empty predictions DataFrame:

# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
predictions_df['Predicted'] = svm_pred

# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] = signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
predictions_df['Strategy Returns'] = signals_df['Strategy Returns']

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['Actual Returns', 'Strategy Returns']]).cumprod().hvplot(
    title='[ML Model] Strategy Returns'
    )

# Tune the Baseline Trading Algorithm

## Make Adjustments with parameters or with different model

### Tuning options include tuning windows of SMA/EMA, adjusting train/test split, include/exclude indicators/signals

# Evaluate a New Machine Learning Classifier and compare models

# Import a new classifier from SKLearn
from sklearn.ensemble import RandomForestClassifier

# Initiate the model instance
ml_model = RandomForestClassifier()

In [None]:
# Using the original training data as the baseline model, fit another model with the new classifier.
# Fit the model using the training data
ml_model.fit(X_train_scaled, y_train)

# Use the testing dataset to generate the predictions for the new model
ml_predictions = ml_model.predict(X_test_scaled)

# Review the model's predicted values
ml_predictions[:10]


In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
ml_testing_report = classification_report(y_test, ml_predictions)

# Print the classification report
print(ml_testing_report)

In [None]:
# Create a new empty predictions DataFrame:

# Create a predictions DataFrame
ml_predictions_df = pd.DataFrame(index=X_test.index)

# Add the Logistic Regression model predictions to the DataFrame
ml_predictions_df['Strategy Returns'] = ml_pred

# Add the actual returns to the DataFrame
ml_predictions_df['Actual Returns'] = signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
ml_predictions_df['Trading Algorithm Returns'] = (
    ml_predictions_df['Actual Returns'] * ml_predictions_df['Strategy Returns']
)

# Review the DataFrame
display(ml_predictions_df.head())
display(ml_predictions_df.tail())

In [None]:
# Plot the actual returns versus the strategy returns
(1 + ml_predictions_df[['Actual Returns', 'Trading Algorithm Returns']]).cumprod().hvplot(
    title='[ML Model] Strategy Returns'
    )