In [165]:
# %%file run_2thre.py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
from scipy.stats.mstats import winsorize
from random import seed
from random import random
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None


def run_2thre(q, n_fee = 3):
    def load_data(load):
        spot = pd.read_csv(f'data/spot_{load}_usdt_1h.csv', parse_dates=['timestamp'])
        perp = pd.read_csv(f'data/perp_{load}_usdt_1h.csv', parse_dates=['timestamp'])
        perp.sort_values(by = 'epoch', ascending=True, inplace=True)
        spot.sort_values(by = 'epoch', ascending=True, inplace=True)
        perp = perp.reset_index(drop=True)
        spot = spot.reset_index(drop=True)
        funding_rate = pd.read_csv(f'data/rate_{load}_usdt.csv', parse_dates=['timestamp'])
        return spot, perp, funding_rate

    def create_panel(spot, perp, funding_rate):
        panel = perp[['epoch', 'timestamp', 'close', 'usd_volume']]
        panel['day'] = panel['timestamp'].dt.round('D')
        panel = panel.rename(columns={'close': 'perp', 'usd_volume': 'perp_usd_volume'})
        panel = pd.merge(panel, spot[['epoch', 'close', 'usd_volume']], on='epoch', how='left')
        panel = panel.rename(columns={'close': 'spot', 'usd_volume': 'spot_usd_volume'})
        panel = pd.merge(panel, funding_rate[['epoch', 'funding_rate']], on='epoch', how='left')
        panel.sort_values(by = 'epoch', ascending=True, inplace=True)
        panel = panel.reset_index(drop=True)
        return panel

    def open_long_position(ethusdt_spot, ethusdt_perp, cash_acc):
        position_spot = share_spot*(cash_acc/ethusdt_spot)*(1-fee_spot)
        position_perp = share_perp*(cash_acc/ethusdt_perp)*(1-fee_perp)*-1
        fee = cash_acc*(share_spot*fee_spot+share_perp*fee_perp)
        cash_acc = 0

        return position_spot, position_perp, cash_acc, fee

    def close_long_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc):
        spot_value = position_spot*ethusdt_spot
        perp_value = (position_perp*-1)*(buyin_perp*2-ethusdt_perp)

        fee = spot_value*fee_spot+perp_value*fee_perp
        cash_acc = cash_acc+spot_value*(1-fee_spot)+perp_value*(1-fee_perp)

        position_spot = 0
        position_perp = 0

        return position_spot, position_perp, cash_acc, fee

    def open_short_position(ethusdt_spot, ethusdt_perp, cash_acc):
        position_spot = share_spot*(cash_acc/ethusdt_spot)*(1-fee_spot)*-1
        position_perp = share_perp*(cash_acc/ethusdt_perp)*(1-fee_perp)
        fee = cash_acc*(share_spot*fee_spot+share_perp*fee_perp)
        cash_acc = 0

        return position_spot, position_perp, cash_acc, fee

    def close_short_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc):
        spot_value = (position_spot*-1)*(buyin_spot*2-ethusdt_spot)
        perp_value = position_perp*ethusdt_perp

        fee = spot_value*fee_spot+perp_value*fee_perp
        cash_acc = cash_acc+spot_value*(1-fee_spot)+perp_value*(1-fee_perp)

        position_spot = 0
        position_perp = 0

        return position_spot, position_perp, cash_acc, fee

    def calculate_equity(position_spot, position_perp, cash_acc, equity):
        equity_old = equity

        if spot == 'long':
            spot_value = position_spot*ethusdt_spot
            perp_value = (position_perp*-1)*(buyin_perp*2-ethusdt_perp)

        elif spot == 'short':
            spot_value = (position_spot*-1)*(buyin_spot*2-ethusdt_spot)
            perp_value = position_perp*ethusdt_perp

        else: #no position
            spot_value = 0.0
            perp_value = 0.0

        equity = cash_acc + spot_value + perp_value
        pnl = equity - equity_old
        return equity, pnl, spot_value, perp_value

    def calculate_payment(position_perp, ethusdt_perp, funding_rate):
        fr_facevalue = position_perp*ethusdt_perp*-1 #*-1 -> if funding_rate < 0 short perps, pay long perps
        payment = fr_facevalue * funding_rate
        return payment
    
    currencies = ['eth', 'btc', 'doge', 'bnb', 'ada']
    string = ['no', 'low', 'medium-low', 'medium-high']
    fee_spot_lst = [0.0, 0.00015, 0.000375, 0.000525]
    fee_perp_lst = [0.0, 0.0, 0.000054, 0.000108]

    fee_spot = fee_spot_lst[n_fee-1]
    fee_perp = fee_perp_lst[n_fee-1]

    threshold_lst = list(np.arange(.0,1.0,0.05))
    thre_all = []
    for i in threshold_lst:
        for j in threshold_lst:
            if i <= j:
                thre_all.append((j, i))

    thre = thre_all[q-1]
    threshold = thre[0]
    threshold2 = thre[1]

    notional = 1000000.0
    plot = False
    wins = False

    panel_all = []
    for i in range (0,len(currencies)):
        spot, perp, funding_rate = load_data(currencies[i])
        panel_all.append(panel_temp)
    for i in range (0,len(currencies)):
        T = 1/(3*365)
        panel_all[i]['cip'] = -(1/T)*(np.log(panel_all[i]['perp']) - np.log(panel_all[i]['spot'])) 
        if wins:
            panel_all[i]['cip'] = winsorize(panel_all[i]['cip'], limits = (0.025, 0.025))
        mean = np.round(panel_all[i]['cip'].mean(),4)
        days = (panel_all[i]['day'].iloc[-1] - panel_all[i]['day'].iloc[0]).days

    cutoff = '2022-08-30 12:00:00'

    eth_start = '2019-11-27 07:00:00'
    btc_start = '2019-09-10 08:00:00'
    doge_start = '2020-07-10 09:00:00'
    bnb_start = '2020-02-10 08:00:00'
    ada_start = '2020-01-31 08:00:00'

    for i in range (0,len(currencies)):
        panel_all[i] = panel_all[i][panel_all[i]['timestamp'] <= cutoff] #cutoff, such that all data is equally long

    for i in range (0,len(currencies)):

        panel_all[i]['spot'] = panel_all[i]['spot'].fillna(method='ffill')
        panel_all[i]['spot_usd_volume'] = panel_all[i]['spot_usd_volume'].fillna(method='ffill')
        panel_all[i]['funding_rate'] = panel_all[i]['funding_rate'].fillna(0)
        panel_all[i]['iusdt'] = 0
        #panel_all[i] = panel_all[i].dropna()
        panel_all[i].sort_values(by = 'epoch', ascending=True, inplace=True)
        panel_all[i] = panel_all[i].reset_index(drop=True)

    share_spot = (1-fee_perp)/(2-fee_spot-fee_perp)
    share_perp = 1-share_spot

    df_all = []

    for t in range(0, len(currencies)):
        panel = panel_all[t]
        #starting variables
        lst=[]
        position_open = False
        position_spot = 0.0
        position_perp = 0.0
        cash_acc = notional

        spot = 'none'

        equity = cash_acc
        turnover = 0.0
        buyin_spot = 0.0
        buyin_perp = 0.0


        for i in range(0, len(panel)):
            action = False

            cip = panel['cip'][i]
            ethusdt_spot = panel['spot'][i]
            ethusdt_perp = panel['perp'][i]
            fee = 0.0

            if cip < -threshold: #cip smaller than threshold; short perp, long spot            

                if position_open == False:
                    turnover = turnover + cash_acc
                    position_spot, position_perp, cash_acc, fee = open_long_position(ethusdt_spot, ethusdt_perp, cash_acc)
                    buyin_spot = ethusdt_spot
                    buyin_perp = ethusdt_perp

                    position_open = True
                    action = True
                    spot = 'long'

                elif position_open == True:
                    if spot == 'long':
                        pass      

                    elif spot == 'short':
                        position_spot, position_perp, cash_acc, fee_temp1 = close_short_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc)
                        turnover = turnover + cash_acc*2
                        position_spot, position_perp, cash_acc, fee_temp2 = open_long_position(ethusdt_spot, ethusdt_perp, cash_acc)
                        buyin_spot = ethusdt_spot
                        buyin_perp = ethusdt_perp

                        fee = fee_temp1 + fee_temp2

                        action = True
                        spot = 'long'

            elif cip > threshold: #cip larger than threshold; short spot, long perp
                if position_open == False:
                    turnover = turnover + cash_acc
                    position_spot, position_perp, cash_acc, fee = open_short_position(ethusdt_spot, ethusdt_perp, cash_acc)
                    buyin_spot = ethusdt_spot
                    buyin_perp = ethusdt_perp

                    position_open = True
                    action = True
                    spot = 'short'

                elif position_open == True:
                    if spot == 'long':
                        position_spot, position_perp, cash_acc, fee_temp1 = close_long_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc)
                        turnover = turnover + cash_acc*2
                        position_spot, position_perp, cash_acc, fee_temp2 = open_short_position(ethusdt_spot, ethusdt_perp, cash_acc)
                        buyin_spot = ethusdt_spot
                        buyin_perp = ethusdt_perp

                        fee = fee_temp1 + fee_temp2

                        action = True
                        spot = 'short'

                    elif spot == 'short':
                        pass

            elif (cip > -threshold2) and (cip < threshold2): #cip is within no-action zone       
                if position_open == False:
                    pass

                elif position_open == True:
                    if spot == 'long':
                        position_spot, position_perp, cash_acc, fee = close_long_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc)
                        turnover = turnover + cash_acc

                    elif spot == 'short':
                        position_spot, position_perp, cash_acc, fee = close_short_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc)
                        turnover = turnover + cash_acc

                    buyin_spot = 0.0
                    buyin_perp = 0.0

                    position_open = False
                    action = True
                    spot = 'none'

            elif (cip >= -threshold) & (cip <= -threshold2):
                if position_open == False:
                    pass

                elif position_open == True:
                    if spot == 'long':
                        pass

                    elif spot == 'short':
                        position_spot, position_perp, cash_acc, fee_temp1 = close_short_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc)
                        turnover = turnover + cash_acc*2
                        position_spot, position_perp, cash_acc, fee_temp2 = open_long_position(ethusdt_spot, ethusdt_perp, cash_acc)
                        buyin_spot = ethusdt_spot
                        buyin_perp = ethusdt_perp

                        fee = fee_temp1 + fee_temp2

                        action = True
                        spot = 'long'

            elif (cip >= threshold2) & (cip <= threshold):
                if position_open == False:
                    pass

                elif position_open == True:
                    if spot == 'short':
                        pass

                    elif spot == 'long':
                        position_spot, position_perp, cash_acc, fee_temp1 = close_long_position(position_spot, position_perp, ethusdt_spot, ethusdt_perp, cash_acc)
                        turnover = turnover + cash_acc*2
                        position_spot, position_perp, cash_acc, fee_temp2 = open_short_position(ethusdt_spot, ethusdt_perp, cash_acc)
                        buyin_spot = ethusdt_spot
                        buyin_perp = ethusdt_perp

                        fee = fee_temp1 + fee_temp2

                        action = True
                        spot = 'short'

            payment = calculate_payment(position_perp, ethusdt_perp, panel['funding_rate'][i])
            cash_acc = cash_acc + payment
            equity, pnl, spot_value, perp_value = calculate_equity(position_spot, position_perp, cash_acc, equity)
            lst.append([panel['epoch'][i], panel['timestamp'][i], position_open, cash_acc, equity, payment, position_spot, position_perp, fee, cip, panel['funding_rate'][i],  buyin_spot, buyin_perp, ethusdt_spot, ethusdt_perp, pnl, action, turnover, spot, spot_value, perp_value])


        df = pd.DataFrame(lst, columns=['epoch', 'timestamp', 'position_open', 'cash_acc', 'equity', 'funding_payment', 'position_spot', 'position_perp', 'fee', 'cip', 'funding_rate', 'buyin_spot', 'buyin_perp', 'ethusdt_spot', 'ethusdt_perp', 'pnl_position', 'action','turnover', 'spot', 'spot_value', 'perp_value'])
        df['cash_acc'] = df['cash_acc'].astype('int')
        df['equity'] = df['equity'].astype('int')
        df['funding_payment'] = df['funding_payment'].astype('int')
        df['pnl_position'] = df['pnl_position'].astype('int')

        df['equity_adj'] = df[df['position_open'] == True]['cash_acc'] #we adjust the equity by incorporating negative balances from the cash acc
        df['equity_adj'] = df['equity_adj'].fillna(0) #if the position has been closed, the equity is automatically updated; if its open, there might be some margin balance
        df['equity_adj'] = df['equity_adj'] + df['equity']
        df['equity_adj'] = df['equity_adj'].astype('int')

        df['return'] = 0.0
        df['return'].iloc[0] = (df['equity_adj'].iloc[0]/notional) - 1
        df['return'][1:] = (df['equity_adj'][1:].values/df['equity_adj'][0:-1].values) - 1

        df_all.append(df)

    dff = []
    for i in range(5):
        dff.append(df_all[i][['timestamp', 'return']])
    pd.to_pickle(dff, f'validate_output/output_{n_fee}_{q}.pkl')
    
    return dff

Overwriting run_2thre.py


In [None]:
for q in range(1, 211):
    for n_fee in range(1, 5):
        run_2thre(q, n_fee = n_fee)

In [166]:
# for n_fee in [1, 2, 3, 4]:
#     with open(f'run_2thre_{n_fee}.sbatch', 'w') as fb:
#         fb.write(f"""#!/bin/bash

# #SBATCH --partition=standard
# #SBATCH --account=pi-dachxiu
# #SBATCH --job-name=data_pre
# #SBATCH --output=JOBLOG/Job_%A_%a.txt
# #SBATCH --ntasks=1
# #SBATCH --time=7-00:00:00
# #SBATCH --mem-per-cpu=4G
# #SBATCH --array=1-210

# module load anaconda/2021.05
# python -c "import run_2thre; run_2thre.run_2thre(${{SLURM_ARRAY_TASK_ID}}, {n_fee})"
# """)

In [252]:
# import subprocess 
# subprocess.run(['sbatch', 'run_2thre_1.sbatch'])

CompletedProcess(args=['sbatch', 'run_2thre_1.sbatch'], returncode=0)

In [1]:
import pandas as pd
import glob
import numpy as np

curr_ls = ['eth', 'btc', 'doge', 'bnb', 'ada']

for n_fee in [4,3]:
    sum_df_all = pd.DataFrame()
    if n_fee == 4:
        print('########################################')
        print('Transaction cost: medium-high')
        print('########################################')
    if n_fee == 3:
        print('########################################')
        print('Transaction cost: medium-low')
        print('########################################')
    for curr_i in range(5):

        f = glob.glob(f'validate_output/output_{n_fee}_*')

        s = pd.read_pickle(f[0])
        N = 210

        ret_all = []
        for i in range(5):
            ret_all.append(np.zeros((len(s[i]), N)))

        for q in range(1, N + 1):
            s = pd.read_pickle(f'validate_output/output_{n_fee}_{q}.pkl')
            for i in range(5):
                ret_all[i][:, q - 1] = s[i]['return']

        f = glob.glob(f'validate_output/output_{n_fee}_*')
        s = pd.read_pickle(f[0])

        s0 = s[curr_i].drop(columns = ['return'])
        s0['timestamp'] = pd.to_datetime(s0['timestamp'])
        s0['yyyymm'] = s0['timestamp'].apply(lambda x: x.year*100 + x.month)

        for n in range(N):
            s0[f'q{n}'] = ret_all[curr_i][:, n]

        ym = list(set(s0['yyyymm']))
        ym.sort()

        def get_best_q(df_):
            df_ = df_.drop(columns = ['timestamp', 'yyyymm'])
            v = df_.values
            s = v.mean(axis = 0) / v.std(axis = 0)
            q = np.argmax(s)
            return q

        rolling_month = 6

        q_ls = []
        for i, ym_i in enumerate(ym[rolling_month:]):
            df_ = s0[(s0['yyyymm'] < ym_i) & (s0['yyyymm']>= ym[i])]
            q = get_best_q(df_)
            q_ls.append(q)

        q_df = pd.DataFrame()
        q_df['yyyymm'] = ym[rolling_month:]
        q_df['q'] = q_ls

        s0 = pd.merge(s0, q_df, on = 'yyyymm')
        def get_ret(x):
            return x.loc[f'q{x.q}']

        s0['ret'] = s0.apply(get_ret, axis = 1)
        s0 = s0[['timestamp', 'ret']]
        s0['year'] = s0['timestamp'].apply(lambda x: x.year)

        sum_df = s0.groupby('year').mean()
        sum_df.columns = ['ann_return']
        sum_df['N'] = s0.groupby('year').count()['ret']
        sum_df['ann_std'] = s0.groupby('year').std()['ret']
        sum_df['ann_return'] = sum_df['ann_return'] * 24 * 365
        sum_df['ann_std'] = sum_df['ann_std'] * np.sqrt(24 * 365)
        sum_df['ann_sharpe']  = sum_df['ann_return'] / sum_df['ann_std']
        sum_df = sum_df[['N', 'ann_return', 'ann_std', 'ann_sharpe']]
        sum_df.loc['mean', 'N'] = len(s0)
        sum_df.loc['mean', 'ann_return'] = s0['ret'].mean() * 24 * 365
        sum_df.loc['mean', 'ann_std'] = s0['ret'].std() * np.sqrt(24 * 365)
        sum_df.loc['mean', 'ann_sharpe'] = sum_df.loc['mean', 'ann_return'] / sum_df.loc['mean', 'ann_std']

        for col in sum_df.columns:
            sum_df[col] = np.round(sum_df[col], 3)
        sum_df['crypto'] = curr_ls[curr_i]

        sum_df = sum_df.reset_index()
        sum_df_all = sum_df_all.append(sum_df)
    display(sum_df_all.set_index(['crypto', 'year']))

########################################
Transaction cost: medium-high
########################################


Unnamed: 0_level_0,Unnamed: 1_level_0,N,ann_return,ann_std,ann_sharpe
crypto,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
eth,2020,5880.0,0.092,0.038,2.405
eth,2021,8760.0,0.155,0.049,3.147
eth,2022,5793.0,0.013,0.011,1.192
eth,mean,20433.0,0.097,0.039,2.499
btc,2020,7344.0,0.07,0.029,2.385
btc,2021,8760.0,0.147,0.063,2.352
btc,2022,5793.0,-0.005,0.005,-1.025
btc,mean,21897.0,0.081,0.043,1.875
doge,2021,8760.0,0.481,0.074,6.488
doge,2022,5793.0,-0.052,0.017,-3.022


########################################
Transaction cost: medium-low
########################################


Unnamed: 0_level_0,Unnamed: 1_level_0,N,ann_return,ann_std,ann_sharpe
crypto,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
eth,2020,5880.0,0.085,0.038,2.215
eth,2021,8760.0,0.177,0.046,3.832
eth,2022,5793.0,0.012,0.008,1.427
eth,mean,20433.0,0.104,0.037,2.814
btc,2020,7344.0,0.083,0.028,2.951
btc,2021,8760.0,0.138,0.058,2.36
btc,2022,5793.0,0.001,0.004,0.147
btc,mean,21897.0,0.083,0.04,2.058
doge,2021,8760.0,0.59,0.074,7.979
doge,2022,5793.0,0.112,0.017,6.752
