# Marketwhale

* Our company is focused on researching and creating accurate trading algorithms for crypto currency using machine learning.

In [None]:
# Initial imports
import os
import requests
import json
import yfinance as yf
import pandas as pd
from pandas.tseries.offsets import DateOffset
import numpy as np
from dotenv import load_dotenv
from pathlib import Path
import hvplot.pandas
from finta import TA
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Flatten, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D
from tensorflow.keras.optimizers import Adam, schedules, SGD
from tensorflow.keras.models import model_from_json
from finta import TA
from datetime import datetime
import math
import holoviews as hv

%matplotlib inline

# Part 1: Algorithmic Trading- SVM Model 
### Completed by Rodrigo Monge
**  **

In [None]:
# Load the environment variables from the .env file by calling the load_dotenv function
load_dotenv()

In [None]:
# Free Crypto API Call endpoint URLs for the held cryptocurrency assets
btc_url = "https://api.alternative.me/v2/ticker/Bitcoin/?convert=USD"

In [None]:
# Using the Python requests library, make an API call to access the current price of BTC
btc_response = requests.get(btc_url).json()

# json.dumps function to review the response data from the API call
# Indent and sort_keys parameters to make the response object readable
print(json.dumps(
    btc_response,
    indent=4,
    sort_keys=True)
)

In [None]:
# Navigate the BTC response object to access the current price of BTC
btc_price = btc_response["data"]['1']["quotes"]["USD"]["price"]

# Print the current price of BTC
print(f"The current price for Bitcoin is ${btc_price:,}")


In [None]:
# Use yfinance to retrieve BTC close values (note that with the current API 1m data can only be done for 7 days, and 1 hour only for 730 days)
start = "2020-10-30"
end = "2021-10-14"

In [None]:
# Use yfinance to retrieve BTC (hourly data)
btc_df = yf.download(
    "BTC-USD",
    start=start,
    end=end,
    interval="1h"
)

btc_df= btc_df.rename(columns=str.lower)
btc_df= btc_df.drop(['volume','adj close'], axis=1)
btc_df

In [None]:
btc_df.loc[:,['close']].hvplot()

In [None]:
# Using the Python requests library, make an API call to access the current fear and greed index (indicator of sentiment, used for volatility)
fear_and_greed_url = "https://api.alternative.me/fng/?limit=350"

fear_and_greed_response = requests.get(fear_and_greed_url).json()

fear_greed_df = pd.DataFrame(fear_and_greed_response["data"])

fear_greed_df['timestamp'] = pd.to_datetime(fear_greed_df['timestamp'], unit='s')

fear_greed_df = fear_greed_df.set_index('timestamp').drop(['time_until_update', 'value_classification'], axis=1)

display(fear_greed_df)

In [None]:
# Merge the columns

btc_df_new = btc_df.reset_index()
btc_df_new['Dates'] = pd.to_datetime(btc_df_new['index']).dt.date
btc_df_new['Time'] = pd.to_datetime(btc_df_new['index']).dt.time
btc_df_new = btc_df_new.set_index("Dates")
btc_df_new = pd.merge(btc_df_new, fear_greed_df, left_index=True, right_index=True)
btc_df_new = btc_df_new.reset_index()
btc_df_new = btc_df_new.drop(['level_0', 'Time'], axis=1)
btc_df_new = btc_df_new.set_index("index")
btc_df_new.rename({'value': 'fear_greed'}, axis=1, inplace=True)
btc_df = btc_df_new

display(btc_df)

In [None]:
# We add SMA to our data, long and short to be verified

sma_short = 20
sma_long = 100

SMA20 = TA.SMA(btc_df, sma_short)
SMA100 = TA.SMA(btc_df, sma_long)

btc_df["SMA20"]=SMA20
btc_df["SMA100"]=SMA100

btc_df

In [None]:
# Create a column to hold the trading signal
btc_df["SMA Signal"] = 0.0

# Generate the trading signal 0 or 1,
# where 1 is the short-window (SMA20) greater than the long-window (SMA100)
# and 0 is when the condition is not met
btc_df['SMA Signal'] = np.where((btc_df['SMA20'] < btc_df['SMA100']),
                            1.0, 0.0)
# Review the DataFrame
btc_df

In [None]:
# Calculate the points in time when the Signal value changes
# Identify trade entry (1) and exit (-1) points
btc_df["SMA Entry/Exit"] = btc_df["SMA Signal"].diff()

# Review the DataFrame
btc_df

In [None]:
# Visualize moving averages
moving_avgs = btc_df[['SMA20', 'SMA100', 'close']].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400)

# Show the plot
moving_avgs

In [None]:
# Visualize exit position relative to close price
exit = btc_df[btc_df['SMA Entry/Exit'] == -1.0]['close'].hvplot.scatter(
    color='blue',
    marker='v',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)


In [None]:
# Visualize entry position relative to close price
entry = btc_df[btc_df['SMA Entry/Exit'] == 1.0]['close'].hvplot.scatter(
    color='limegreen',
     marker='^',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

In [None]:
# Visualize close price for the investment
security_close = btc_df[['close']].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400)

In [None]:
# Visualize moving averages
moving_avgs = btc_df[['SMA20', 'SMA100']].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400)

In [None]:
# Create the overlay plot
entry_exit_plot = security_close * moving_avgs * entry * exit

# Show the plot
entry_exit_plot.opts(
    title="BTC - SMA20, SMA100, Entry and Exit Points"
)

In [None]:
# We add Bollinger Bands Width to our data

BBWIDTH = TA.BBWIDTH(btc_df)

btc_df["BBWIDTH"]=BBWIDTH
btc_df

In [None]:
# We add WMA to our data

WMA = TA.WMA(btc_df)

btc_df["WMA"]=WMA
btc_df

In [None]:
# Calculate the daily returns using the closing prices and the pct_change function
btc_df["actual_returns"] = btc_df["close"].pct_change()
btc_df = btc_df.dropna()

# Display sample data
btc_df

In [None]:
# Create a new column for actual returns signals
btc_df['actual returns signal'] = 0.0

# Create the signal to buy
btc_df['actual returns signal'] = np.where((btc_df['actual_returns'] < 0),
                            -1.0, 1.0)

btc_df

In [None]:
# Calculate the points in time when the Signal value changes
# Identify trade entry (1) and exit (-1) points
btc_df["actual returns Entry/Exit"] = btc_df["actual returns signal"].diff()

# Review the DataFrame
btc_df

In [None]:
# Assign a copy of the `sma_fast` and `sma_slow` columns to a new DataFrame called `X`
X_btc = btc_df[['close','fear_greed','WMA','BBWIDTH','SMA20','SMA100',"WMA"]].shift().dropna().copy()

# Display sample data
display(X_btc.head())
display(X_btc.tail())

In [None]:
# Copy the new "signal" column to a new Series called `y`.

y_btc = btc_df['actual returns signal'].dropna().copy()

display(y_btc.head())
display(y_btc.tail())

In [None]:
# Select the start of the training period
training_begin = X_btc.index.min()

# Display the training begin date
print(training_begin)

# Select the ending period for the training data with an offset of 7 months
training_end = X_btc.index.min() + DateOffset(months=7)

# Display the training end date
print(training_end)

In [None]:
# Generate the X_train and y_train DataFrames
X_btc_train = X_btc.loc[training_begin:training_end]
y_btc_train = y_btc.loc[training_begin:training_end]

# Display sample data
display(X_btc_train.head())
display(y_btc_train.head())

In [None]:
# Generate the X_test and y_test DataFrames
X_btc_test = X_btc.loc[training_end:]
y_btc_test = y_btc.loc[training_end:]

# Display sample data
display(X_btc_test.head())
display(y_btc_test.head())

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X_train data
X_btc_scaler = scaler.fit(X_btc_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_btc_train_scaled = X_btc_scaler.transform(X_btc_train)
X_btc_test_scaled = X_btc_scaler.transform(X_btc_test)

In [None]:
# Create the classifier model
svm_model = svm.SVC()

# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(X_btc_train_scaled, y_btc_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(X_btc_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

In [None]:
# Evaluate the model using a classification report
training_report = classification_report(y_btc_train, training_signal_predictions)
print(training_report)

In [None]:
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_btc_test_scaled)

In [None]:
# Evaluate the model's ability to predict the trading signal for the testing data
testing_report = classification_report(y_btc_test, testing_signal_predictions)
print(testing_report)

In [None]:
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_btc_test.index)

predictions_df["predicted_signal"] = testing_signal_predictions

predictions_df["actual_returns"] = btc_df["actual_returns"]

predictions_df["trading_algorithm_returns"] = (
    predictions_df["actual_returns"] * predictions_df["predicted_signal"]
)

# Review the DataFrame
predictions_df.head(30)

In [None]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot()

# Part 2: Neural Network - LSTM Model
### Completed by Jonathan Woolsey
** **

In [None]:
#model is without fear and greed index 
#Use yfinance to retrieve BTC and ETH close values (note that with the current API 1m data can only be done for 7 days, and 1 hour only for 730 days)

start = "2020-10-30"
end = "2021-10-14"

In [None]:
# Use yfinance to retrieve BTC and ETH close values
btc_df = yf.download(
    "BTC-USD",
    start=start,
    end=end,
    interval="1h"
)

btc_df= btc_df.rename(columns=str.lower)
btc_df= btc_df.drop(['volume','adj close'], axis=1)

btc_df

In [None]:
# add some TA
sma_short = 20
sma_long = 100

btc_df["SMA20"] = TA.SMA(btc_df, sma_short)
btc_df["SMA100"] = TA.SMA(btc_df, sma_long)
btc_df["WMA"] = TA.WMA(btc_df)

btc_df.head()

In [None]:
# Calculate the daily returns using the closing prices and the pct_change function
btc_df["actual_returns"] = btc_df["close"].pct_change()
btc_df.dropna(inplace=True)

# Display sample data
display(btc_df.head())
display(btc_df.tail())

In [None]:
# Create a new column for actual returns signals
btc_df['actual returns signal'] = 0.0

# When Actual Returns are greater than or equal to 0, generate signal to buy stock long
btc_df.loc[(btc_df['actual_returns'] >= 0), 'actual returns signal'] = 1

# When Actual Returns are less than 0, generate signal to sell stock short
btc_df.loc[(btc_df['actual_returns'] < 0), 'actual returns signal'] = -1

btc_df

In [None]:
# Create the features (X) and target (y) sets
y = btc_df['actual returns signal'].dropna().copy().replace(-1, 0)
display(y.value_counts())

X = btc_df[['close','WMA','SMA20','SMA100']].shift().dropna().copy()
display(X.head())

In [None]:
# Select the start of the training period
training_begin = X.index.min()
training_period_months = 6
training_end = X.index.min() + DateOffset(months=training_period_months)

# Display the training begin / end dates
print(training_begin)
print(training_end)

In [None]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()
X_train.tail()

In [None]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

In [None]:
# Scale the features DataFrames
# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the model
number_output_neurons = 1
number_input_features = X.shape[1]

hidden_nodes_layer1 = math.floor((number_input_features + number_output_neurons) / 2)
hidden_nodes_layer2 = math.floor((hidden_nodes_layer1 + number_output_neurons) / 2)

X_train_reshaped = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

In [None]:
nn = Sequential()

# add LSTM layer
nn.add(LSTM(10, input_shape=(1, number_input_features), return_sequences=True))
nn.add(Flatten())

# First hidden layer
nn.add(Dense(units=hidden_nodes_layer1, activation="relu"))

# Output layer
nn.add(Dense(units=number_output_neurons, activation="sigmoid"))

In [None]:
# Compile model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Fit the model
model = nn.fit(X_train_reshaped, y_train, epochs=100)

In [None]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_reshaped, y_test, verbose=2)

# Display the model loss and accuracy results
print()
print(f"Loss: {model_loss}")
print(f"Accuracy: {model_accuracy}")

In [None]:
# Save model as JSON - model will be backtested
if not os.path.exists('Models'):
  os.makedirs('Models')

def save_model_weights(nn, name):
  nn_json = nn.to_json()
  file_path = Path(f"Models/{name}.json")
  with open(file_path, "w") as json_file:
    json_file.write(nn_json)
  nn.save_weights(f"Models/{name}.h5")

def load_model_weights(name):
  file_path = Path(f"Models/{name}.json")
  with open(file_path, "r") as json_file:
      model_json = json_file.read()
  loaded_model = model_from_json(model_json)
  loaded_model.load_weights(f"Models/{name}.h5")
  return loaded_model

save_model_weights(nn, 'model_lstm')

nn_loaded = load_model_weights('model_lstm')

In [None]:
# Create a predictions DataFrame
predictions = nn_loaded.predict(X_test_reshaped)

predictions_df = pd.DataFrame(index=X_test.index)

predictions_df["predicted_signal"] = np.where(predictions > 0.5, 1, -1 )

predictions_df["actual_returns"] = btc_df["actual_returns"]

predictions_df["trading_algorithm_returns"] = (
    predictions_df["actual_returns"] * predictions_df["predicted_signal"]
)

# Review the DataFrame
display(predictions_df["predicted_signal"].value_counts())
predictions_df.head()

In [None]:
hv.extension('bokeh')
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot()

# Part 3: Neural Network - CNN Model
### Completed by Jonathan Woolsey
** **

In [None]:
#model is without fear/greed index
#split a data into samples
# code below borrowed from https://machinelearningmastery.com/how-to-develop-convolutional-neural-network-models-for-time-series-forecasting/

def split_data(df, n_steps):
  X, y = list(), list()
  for i in range(len(df)):
    # find the end of this pattern
    end_ix = i + n_steps
    # check if we are beyond the sequence
    if end_ix > len(df)-1:
      break
    # gather input and output parts of the pattern
    seq_x, seq_y = df[['close','WMA','SMA20','SMA100']][i:end_ix], df['actual returns signal'][end_ix]
    X.append(seq_x)
    y.append(seq_y)
  return np.array(X), np.array(y)
 

In [None]:
# Create the features (X) and target (y) sets
feature_cols = ['close','WMA','SMA20','SMA100']
prepped_df = btc_df[feature_cols + ['actual returns signal']].dropna().copy()
prepped_df['actual returns signal'] = btc_df['actual returns signal'].dropna().copy().replace(-1, 0)
prepped_df[feature_cols] = btc_df[feature_cols].shift()
prepped_df.dropna(inplace=True)

display(prepped_df.head())

n_steps = 6
n_features = 4

X, y = split_data(prepped_df, n_steps)
display(len(X))
display(len(y))

# reshape from [samples, timesteps] into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], n_features))

In [None]:
train_percent = .5
train_samples = math.floor(len(X) * train_percent)
X_train, X_test = X[:train_samples], X[train_samples + 1:]
y_train, y_test = y[:train_samples], y[train_samples + 1:]
X_train

In [None]:
# Scale the features DataFrames

# You'll have to fit and store a scaler for each channel
# https://stackoverflow.com/questions/50125844/how-to-standard-scale-a-3d-matrix

scalers = {}
for i in range(X_train.shape[1]):
    scalers[i] = StandardScaler()
    X_train[:, i, :] = scalers[i].fit_transform(X_train[:, i, :]) 

for i in range(X_test.shape[1]):
    X_test[:, i, :] = scalers[i].transform(X_test[:, i, :]) 

In [None]:
# Define the model
number_output_neurons = 1
number_input_features = n_features

hidden_nodes_layer1 = math.floor((number_input_features + number_output_neurons) / 2)

In [None]:
nn = Sequential()

nn.add(Conv1D(filters=10, kernel_size=3, activation='relu', input_shape=(n_steps, n_features)))
nn.add(MaxPooling1D(pool_size=2))
nn.add(Flatten())

# First hidden layer
nn.add(Dense(units=hidden_nodes_layer1, activation="relu"))

# Output layer
nn.add(Dense(units=1, activation="sigmoid"))

In [None]:
# Compile model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Fit the model
model = nn.fit(X_train, y_train, epochs=100)

In [None]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Save model as JSON - model will be backtested
if not os.path.exists('Models'):
  os.makedirs('Models')

def save_model_weights(nn, name):
  nn_json = nn.to_json()
  file_path = Path(f"Models/{name}.json")
  with open(file_path, "w") as json_file:
    json_file.write(nn_json)
  nn.save_weights(f"Models/{name}.h5")

def load_model_weights(name):
  file_path = Path(f"Models/{name}.json")
  with open(file_path, "r") as json_file:
      model_json = json_file.read()
  loaded_model = model_from_json(model_json)
  loaded_model.load_weights(f"Models/{name}.h5")
  return loaded_model

save_model_weights(nn, 'model_cnn')

nn_loaded = load_model_weights('model_cnn')

In [None]:
# Create a predictions DataFrame
predictions = nn_loaded.predict(X_test)
predictions = predictions.flatten()
predictions
predictions_df = pd.DataFrame(index=prepped_df.iloc[(train_samples + n_steps) + 1:].index)

predictions_df["predicted_signal"] = np.where(predictions > 0.5, 1, -1 )

predictions_df["actual_returns"] = btc_df["actual_returns"]

predictions_df["trading_algorithm_returns"] = (
    predictions_df["actual_returns"] * predictions_df["predicted_signal"]
)

# Review the DataFrame
display(predictions_df["predicted_signal"].value_counts())
predictions_df.head()

In [None]:
hv.extension('bokeh')
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot()