# CUSUM trading techniques for Cryptos

## 1. Lam and Yam (1997)
CUSUM based filter trading strategy.

### Load data
Watch list: top nine cryptocurrencies (ranked by market capitalization) plus Dogecoin, as of November 2021
+ Bitcoin, Ethereum, Solana, Ripple, Binance Coin, Tether, Cardano, Polkadot, Terra, Dogecoin

In [1]:
# install pmdarima library
# !pip install pmdarima

In [2]:
%matplotlib inline

import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
from pmdarima.arima import auto_arima
from statsmodels.tsa.arima.model import ARIMA
from pandas.plotting import autocorrelation_plot


# just set the seed for the random number generator
np.random.seed(107)

plt.rcParams['figure.figsize'] = (16, 9)
warnings.filterwarnings('ignore')

In [3]:
cryptos = pd.read_csv("../data/crypto_1y.csv", parse_dates=True, index_col="time")
cryptos.head()

Unnamed: 0_level_0,BTC,ETH,SOL,XRP,BNB,USDT,ADA,DOT,LUNA,DOGE
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-11-01 01:00:00,13770.78,389.16,1.556,0.2403,28.59,1.002,0.09378,4.194,0.303,0.002549
2020-11-01 02:00:00,13721.73,387.91,1.553,0.2393,28.39,1.002,0.09374,4.193,0.3011,0.00257
2020-11-01 03:00:00,13700.1,386.75,1.534,0.2392,28.4,1.002,0.09355,4.177,0.3013,0.002574
2020-11-01 04:00:00,13683.91,386.57,1.529,0.2391,28.23,1.002,0.09338,4.157,0.3002,0.002567
2020-11-01 05:00:00,13705.51,386.08,1.531,0.2391,28.27,1.002,0.09315,4.143,0.3006,0.002611


### Generalized CUSUM filter rule

For simplicity, we do not allow short thus we always start by detecting a buy signal. Then we try to detect sell signal after a buy signal.

To do later:
1. Find optimal k, h value
2. Optimize plots
3. Backtest

In [4]:
# should test for an optimal k, h
k = 0.0005
h = 10*k

def trainTestSplit(df):
    return df.loc[df.index<'2021-08-01', :], df.loc[df.index>='2021-08-01', :]


def calculateReturn(df, coin: str):
    df.loc[:, "price"] = df.loc[:, coin]
    df.loc[:, "logPrice"] = np.log(df.loc[:, "price"])
    df.loc[:, "return"] = df.loc[:, "logPrice"] - df.loc[:, "logPrice"].shift(1)
    
    df.loc[df.index[0], "return"] = 0
    df.loc[df.index[0], 'signal'] = 0
    cols = ['price', 'logPrice', 'return', 'signal']
    return df.loc[:, cols]


def detectSellSignal(df, start, feature, method):
    df.loc[df.index[start], 'cMinus'] = 0
    for i in range(start+1, len(df)):
        if method == "montgomery":
            cMinus = np.min([0, -df.loc[df.index[i-1], 'cMinus'] + df.loc[df.index[i], feature]+k])
        else:
            cMinus = np.min([0, df.loc[df.index[i-1], 'cMinus'] + df.loc[df.index[i], feature]-k])
        df.loc[df.index[i], 'cMinus'] = cMinus
        
        if cMinus <= -h:
            # sell signal appears
            df.loc[df.index[i], 'signal'] = -1
            return detectBuySignal(df, i, feature=feature, method=method)
        else:
            df.loc[df.index[i], 'signal'] = 0
            
    return df


def detectBuySignal(df, start, feature, method):
    df.loc[df.index[start], "cPlus"] = 0
    for i in range(start+1, len(df)):
        cPlus = np.max([0, df.loc[df.index[i-1], 'cPlus'] + df.loc[df.index[i], feature]-k])
        df.loc[df.index[i], 'cPlus'] = cPlus
        
        if cPlus >= h:
            # buy signal appears
            df.loc[df.index[i], 'signal'] = 1
            return detectSellSignal(df, i, feature=feature, method=method)
        else:
            df.loc[df.index[i], 'signal'] = 0
            
    return df


def createTradeSignal(df, feature='return', method="montgomery"):
    return detectBuySignal(df, start=0, feature=feature, method=method)

In [None]:
traindf, testdf = trainTestSplit(cryptos)

coin = "BTC"

df = (traindf
      .pipe(calculateReturn, coin)
      .pipe(createTradeSignal)) # method="46"

In [None]:
def plotPriceWithSignal(df):
    # optimze looking later
    df.loc[:, 'price'].plot()
    buy = df.loc[df['signal']==1, 'price']
    plt.scatter(buy.index, buy, c="g", marker="^", label="Buy signal")
    sell = df.loc[df['signal']==-1, 'price']
    plt.scatter(sell.index, sell, c="r", marker="v", label="Sell signal")
    plt.legend(loc='best')
    plt.show()
    
plotPriceWithSignal(df.head(100))

In [None]:
def plotCUSUM(df):
    plt.plot(df.index,df['cPlus'], color='b', marker='h', markersize=6)
    plt.plot(df.index,df['cMinus'], color='r', marker='h', markersize=6)
    plt.plot(df.index,np.array([h]*len(df.index)),color='g')
    plt.plot(df.index,np.array([-h]*len(df.index)),color='g')
    plt.show()
    

plotCUSUM(df.head(100))

In [None]:
### to delete
# calculate profit
buyPrice = df.loc[df['signal']==1, 'price']
sellPrice = df.loc[df['signal']==-1, 'price']
buyPriceEven = buyPrice[:len(sellPrice)]
np.sum((sellPrice.values-buyPriceEven.values)/buyPriceEven.values)

## 2. Yi *et al* (2006)

Take transaction fee of each buying and selling into consideration for each trading cycle.

In [None]:
def calculateTransactionFee(df):
    # get trading prices
    buyPrice = df.loc[df['signal']==1, 'price']
    sellPrice = df.loc[df['signal']==-1, 'price']
    buyPriceEven = buyPrice[:len(sellPrice)]

    profitOfEachCycle = []
    for buy, sell in zip(buyPriceEven, sellPrice):
        profitOfEachCycle.append( sell/buy )

    totalProfit = np.prod(profitOfEachCycle)

    buyTime = df.loc[df['signal']==1, :].index
    sellTime = df.loc[df['signal']==-1, :].index
    buyTimeEven = buyTime[:len(sellTime)]

    cycleLength = [] 
    for buy, sell in zip(buyTimeEven, sellTime):
        cycleLength.append(sell - buy)

    totalCycleLength = np.sum(cycleLength) / timedelta(hours=1) # hours
    # average porfit without transaction fee
    averageHourlyProfit = (totalProfit-1) / totalCycleLength

    feeRate = 0.005
    totalProfitWithFee = totalProfit * (1-feeRate)**(2*len(buyTimeEven))
    # average porfit with transaction fee
    averageHourlyProfitWithFee = (totalProfitWithFee-1) / totalCycleLength

    return (totalProfit, totalProfitWithFee,
            averageHourlyProfit, averageHourlyProfitWithFee)

In [None]:
profitMetrics = calculateTransactionFee(df)
profitMetrics

## 3. Žmuk (2016)

Deal with autocorrelation in our data.

### ARIMA(p,d,q)

In [None]:
def applyARIMA(df, coin):
    df.loc[:, "price"] = df.loc[:, coin]
    autoARIMA = auto_arima(df['price'],
                       start_p=0, max_p=5,
                       d=0, max_d=2,
                       start_q=0, max_q=5)
    print("Auto ARIMA parameters: ", autoARIMA.order)
    model = ARIMA(df['price'], order=autoARIMA.order)
    model_fit = model.fit()
    df.loc[:, 'residual'] = model_fit.resid
    return df


def plotBeforeAndAfterARIMA(df):
    autocorrelation_plot(df['price'], label="Price")
    autocorrelation_plot(df['residual'], label="ARIMA")
    plt.legend(loc='best')
    plt.show()

to do:

1. residual-based cusum
2. grid search
2. backtest
3. risk-adjusted

## 4. Chen and Huang (2012)

### Residual based CUSUM chart

In [None]:
def calculateResidualFeature(df):
    std = np.std(df.loc[:, 'residual'])
    df.loc[:, 'residualFeature'] = df.loc[:, 'residual'] / std
    return df

In [None]:
coin = "BTC"

df = (traindf
      .pipe(applyARIMA, coin)
      .pipe(calculateResidualFeature)
      .pipe(createTradeSignal, feature='residualFeature')
     )

plotBeforeAndAfterARIMA(df.head(100))
plotPriceWithSignal(df.head(100))
plotCUSUM(df.head(100))

## 5.  Backtest

## 6. GridSearchCV for k, h