In [3]:
# Coinbase Sandbox Demo
#
# this demo uses the Coinbase Pro client for Python: 
# https://github.com/danpaquin/coinbasepro-python
#
# it's recommended to run cbpro in its own Python environment:
# conda create --name environment_name
#
# pip install cbpro
# or
# pip install git+git://github.com/danpaquin/coinbasepro-python.git
#
# there are a number of libraries that need to be in the python environment for this demo
# to run.  please review the import statements below and install the necessary dependencies
# before running this notebook.

import os

import cbpro

import json, hmac, hashlib, time, requests, base64
from requests.auth import AuthBase

import time
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

# the Coinbase Pro Sandbox api key, secret, and passphrase have been stored locally as environment variables.
# in order to implement this project, setup a Coinbase Pro account and generate a Coinbase Pro Sandbox
# api key.  For more information go to the Coinbase Pro Sandbox Documentation url below--
# https://docs.pro.coinbase.com/#sandbox

# get current directory
dir_path = os.getcwd()

# Authentication credentials
api_key = os.environ.get('coinbaseSandboxKey')
api_secret = os.environ.get('coinbaseSandboxSecret')
passphrase = os.environ.get('coinbaseSandboxPassphrase')

# sandbox authenticated client
auth_client = cbpro.AuthenticatedClient(api_key, api_secret, passphrase, \
                                        api_url='https://api-public.sandbox.pro.coinbase.com')
# live account authenticated client
# uses a different set of API access credentials (api_key, api_secret, passphrase)
# auth_client = cbpro.AuthenticatedCliet(api_key, api_secret, passphrase)

# get accounts listed for api key
# account_info = auth_client.get_accounts()

# df = pd.DataFrame(account_info)

# df.to_csv('account_info.csv') # save data locally for development use

In [4]:
# get list of products

import cbpro

import pandas as pd

import json, hmac, hashlib, time, requests, base64
from requests.auth import AuthBase

# Define Public Client
public_client = cbpro.PublicClient()

# Get Products
products = pd.DataFrame(public_client.get_products())

products.sort_values('max_market_funds')

print(products[:10])

products.to_csv('products.csv')

         id base_currency quote_currency base_min_size    base_max_size  \
0  WBTC-BTC          WBTC            BTC    0.00010000      10.00000000   
1   XTZ-EUR           XTZ            EUR    1.00000000  100000.00000000   
2   BAL-BTC           BAL            BTC    0.10000000    6700.00000000   
3   BNT-EUR           BNT            EUR    1.00000000   95000.00000000   
4   XTZ-USD           XTZ            USD    1.00000000  100000.00000000   
5   REN-BTC           REN            BTC    1.00000000  460000.00000000   
6   ZRX-EUR           ZRX            EUR    1.00000000  600000.00000000   
7  AAVE-USD          AAVE            USD    0.01000000    1200.00000000   
8   XLM-BTC           XLM            BTC    1.00000000  600000.00000000   
9  CGLD-GBP          CGLD            GBP    0.10000000   34000.00000000   

  quote_increment base_increment display_name min_market_funds  \
0      0.00010000     0.00000001     WBTC/BTC           0.0001   
1      0.00001000     0.01000000      XTZ/

In [5]:
# Get Historic Rates
# the max number of data points for a single request is 300 candles
# if start/end time and granularity results in more than 300 data points,
# the request will be rejected.
# Make multiple requests if fine granularity results in > 300 data points.

import cbpro

import json, hmac, hashlib, time, requests, base64
from requests.auth import AuthBase

# Define Public Client
public_client = cbpro.PublicClient()

# Define ticker symbol to retrieve
symbol = 'BTC-USD'

# Issue: start and end dates results in error
# Default returns 300 data points at specified granularity
# Optional parameters: start and end dates need to be in ISO 8601 format (YYYYMMDD)
#start = 20201215
#end = 20201231

# granularity is the time between ticks and must be one of these values:
# {60, 300, 900, 3600, 21600, 86400} which corresponds to
# one minute (60 sec), five minutes (300 sec), fifteen minutes (900 sec),
# one hour (3600 sec), six hours (21600 sec), and one day (86400 sec)

granularity = 86400

try:
    tick_data = pd.DataFrame(public_client.get_product_historic_rates(symbol, granularity=granularity))
except: print("Request is invalid: verify start-end dates and granularity results in less than 300 data points")

tick_data.rename(columns={0:'time', 1:'low', 2:'high', 3:'open', 4:'close', 5:'volume'}, inplace=True)

tick_data.to_csv('BTC-USD.csv')

In [6]:
# work with saved historic data to minimize the number of api requests

import pandas as pd

tick_data = pd.read_csv('BTC-USD.csv', index_col=0, infer_datetime_format=True, parse_dates=True)

tick_data.iloc[:, 0] = pd.to_datetime(tick_data.iloc[:, 0], infer_datetime_format=True, unit='s')

tick_data.set_index('time', inplace=True)

tick_data.sort_values(by='time', inplace=True)

data = tick_data

data.head

<bound method NDFrame.head of                  low      high      open     close        volume
time                                                            
2020-03-27   6260.00   6880.00   6760.00   6372.36  20458.244021
2020-03-28   6030.00   6372.36   6372.36   6251.82  20353.874846
2020-03-29   5870.46   6279.96   6251.45   5877.21  16111.236378
2020-03-30   5853.00   6631.23   5878.98   6406.40  23659.802642
2020-03-31   6333.91   6524.79   6406.40   6424.35  11939.025331
...              ...       ...       ...       ...           ...
2021-01-16  35372.59  37948.00  36754.60  36006.94  20861.425452
2021-01-17  33850.03  36860.00  36004.80  35820.00  19182.049347
2021-01-18  34736.46  37402.00  35820.01  36624.23  16609.641084
2021-01-19  35895.11  37857.00  36624.23  35917.28  18904.779106
2021-01-20  35641.00  36415.34  35921.91  35937.07   2296.983325

[300 rows x 5 columns]>

In [7]:
# Create the features data by lagging the log returns

# Adapted from Python For Finance, 2nd ed., Hilpisch, Yves.
# Chapter 16 - Automated Trading, ML-Based Trading Strategy: Vectorized Backtesting

import numpy as np
import pandas as pd

data['mid'] = (data['high']+data['low'])/2

data['returns'] = np.log(data['mid']/data['mid'].shift(1))

lags = 5

def create_lags(data):
    global cols
    cols = []
    for lag in range(1, lags + 1):
        col = 'lag_{}'.format(lag)
        data[col] = data['returns'].shift(lag)
        cols.append(col)

create_lags(data)

data.dropna(inplace=True)

data[cols] = np.where(data[cols] > 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

data[cols + ['direction']].head

<bound method NDFrame.head of             lag_1  lag_2  lag_3  lag_4  lag_5  direction
time                                                    
2020-04-02    1.0    1.0    1.0    0.0    0.0          1
2020-04-03    1.0    1.0    1.0    1.0    0.0         -1
2020-04-04    0.0    1.0    1.0    1.0    1.0          1
2020-04-05    1.0    0.0    1.0    1.0    1.0         -1
2020-04-06    0.0    1.0    0.0    1.0    1.0          1
...           ...    ...    ...    ...    ...        ...
2021-01-16    0.0    1.0    1.0    1.0    0.0         -1
2021-01-17    0.0    0.0    1.0    1.0    1.0         -1
2021-01-18    0.0    0.0    0.0    1.0    1.0          1
2021-01-19    1.0    0.0    0.0    0.0    1.0          1
2021-01-20    1.0    1.0    0.0    0.0    0.0         -1

[294 rows x 6 columns]>

In [8]:
# A support vector machine algorithm for classification is used.
# The code trains and tests the algorithmic trading strategy based on a sequential train-test split.

# Adapted from Python For Finance, 2nd ed., Hilpisch, Yves.
# Chapter 16 - Automated Trading, ML-Based Trading Strategy: Vectorized Backtesting

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

model = SVC(C=1, kernel='linear', gamma = 'auto')

split = int(len(data) * 0.80)

train = data.iloc[:split].copy() # training dataset

model.fit(train[cols], train['direction'])

accuracy_score(train['direction'], model.predict(train[cols]))

0.5957446808510638

In [9]:
test = data.iloc[split:].copy()  # test dataset

test['position'] = model.predict(test[cols])

accuracy_score(test['direction'], test['position'])

0.6610169491525424

In [10]:
# Backtesting with Proportional Transaction Costs (ptc)

# Coinbase uses a maker-taker fee model.  However, for amounts < $10k the maker-taker fee is 0.50%
# The maker-taker fee model uses a pricing tier model where larger transaction volumes, in USD,
# are charged a lower percentage: 
#                                  < $10k   0.50%  (taker = maker)
#                            $50M - $100M   0.10% (taker) 0.00% (maker)

# https://help.coinbase.com/en/pro/trading-and-funding/trading-rules-and-fees/fees.html

ptc = 0.005  # 0.50%

test['strategy'] = test['position'] * test['returns']

sum(test['position'].diff() !=0)

1

In [14]:
test['strategy_tc'] = np.where(test['position'].diff() != 0,
                              test['strategy'] - ptc,
                              test['strategy'])

test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        1.980422
strategy       1.980422
strategy_tc    1.970545
dtype: float64

In [13]:
# plot results

from pylab import mpl, plt

test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))
plt.savefig('10-BTC-USD_SVM.png')
plt.show()