Dynamic triple determination for "pairs trading"

In [1]:
# Historical snapshot of coins from December 01, 2019: https://coinmarketcap.com/historical/20191201/ used to
# get a representative set of coins from before the period of analysis to avoid look-ahead bias.

strat_univ = [
    'BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'BCHUSDT', 'LTCUSDT', 'EOSUSDT', 'BNBUSDT',
    'XLMUSDT', 'TRXUSDT', 'ADAUSDT', 'XTZUSDT', 'LINKUSDT', 'ATOMUSDT',
    'NEOUSDT', 'MKRUSDT', 'DASHUSDT', 'ETCUSDT', 'USDCUSDT', 'ONTUSDT', 'VETUSDT',
    'DOGEUSDT', 'BATUSDT', 'ZECUSDT',
    'SNXUSDT', 'QTUMUSDT', 'TUSDUSDT', 'ZRXUSDT',
    'THXUSDT', 'THRUSDT', 'ALGOUSDT', 'REPUSDT', 'NANOUSDT', 'KBCUSDT', 'BTGUSDT', 'RVNUSDT', 'OMGUSDT',
    'CNXUSDT', 'ABBCUSDT', 'XINUSDT', 'VSYSUSDT', 'SEELEUSDT', 'EONUSDT', 'ZBUSDT', 'EKTUSDT', 'DGBUSDT',
    'BTMUSDT', 'LSKUSDT', 'KMDUSDT', 'SAIUSDT', 'LUNAUSDT', 'KCSUSDT', 'FTTUSDT', 'QNTUSDT', 'SXPUSDT',
    'BDXUSDT', 'GAPUSDT', 'BCDUSDT', 'THETAUSDT', 'ICXUSDT', 'FSTUSDT', 'MATICUSDT', 'SCUSDT', 'EVRUSDT',
    'BTTUSDT', 'MOFUSDT', 'IOSTUSDT', 'MCOUSDT', 'WAVESUSDT', 'XVGUSDT', 'MONAUSDT', 'BTSUSDT', 'BCNUSDT',
    'HCUSDT', 'MAIDUSDT', 'NEXOUSDT', 'ARDRUSDT', 'DXUSDT', 'OKBUSDT', 'FXCUSDT', 'RLCUSDT', 'MBUSDT',
    'BXKUSDT', 'AEUSDT', 'ENJUSDT', 'STEEMUSDT', 'SLVUSDT', 'BRZEUSDT', 'ZILUSDT', 'VESTUSDT', 'ZENUSDT',
    'SOLVEUSDT', 'CHZUSDT', 'NOAHUSDT', 'LAUSDT', 'BTMXUSDT', 'ETNUSDT', 'ENGUSDT', 'ILCUSDT', 'NPXSUSDT',
    'CRPTUSDT', 'GNTUSDT', 'SNTUSDT', 'ELFUSDT', 'JWLUSDT', 'FETUSDT', 'BOTXUSDT', 'NRGUSDT', 'DGDUSDT',
    'EXMRUSDT', 'EURSUSDT', 'AOAUSDT', 'RIFUSDT', 'CIX100USDT', 'BFUSDT', 'XZCUSDT', 'FABUSDT', 'GRINUSDT',
    'NETUSDT', 'VERIUSDT', 'DGTXUSDT', 'KNCUSDT', 'RENUSDT', 'STRATUSDT', 'ETPUSDT', 'NEXUSDT', 'NEWUSDT',
    'BCZEROUSDT', 'GXCUSDT', 'TNTUSDT', 'BTC2USDT', 'PPTUSDT', 'USDKUSDT', 'ELAUSDT', 'IGNISUSDT', 'PLCUSDT',
    'BNKUSDT', 'DTRUSDT', 'RCNUSDT', 'HPTUSDT', 'LAMBUSDT', 'MANAUSDT', 'EDCUSDT', 'BEAMUSDT', 'TTUSDT',
    'AIONUSDT', 'BZUSDT', 'WTCUSDT', 'WICCUSDT', 'LRCUSDT', 'BRDUSDT', 'FCTUSDT', 'NULSUSDT', 'FTMUSDT',
    'IOTXUSDT', 'QBITUSDT', 'XMXUSDT', 'YOUUSDT', 'NASUSDT', 'WAXPUSDT', 'ARKUSDT', 'RDDUSDT', 'GNYUSDT',
    'AGVCUSDT', 'HYNUSDT', 'CVCUSDT', 'WANUSDT', 'WINUSDT', 'LINAUSDT', 'RUSDT', 'PAIUSDT', 'FSNUSDT',
    'FUNUSDT', 'DPTUSDT', 'BHDUSDT', 'LOOMUSDT', 'XACUSDT', 'BUSDUSDT', 'BHPUSDT', 'TRUEUSDT', 'LOKIUSDT',
    'QASHUSDT', 'BNTUSDT', 'DOTUSDT', 'SOLUSDT']

## Load And Structure Data

In [2]:
from binance.client import Client as bnb_client
from binance.client import BinanceAPIException
from datetime import datetime
import pandas as pd

client = bnb_client(tld='US')

def get_price_data_for_coin(coin, freq, start_date):
    bn_data = client.get_historical_klines(coin, freq, start_date)
    columns = ['open_time','open','high','low','close','volume','close_time','quote_volume',
        'num_trades','taker_base_volume','taker_quote_volume','ignore']

    bn_data = pd.DataFrame(bn_data, columns=columns)
    bn_data['open_time'] = bn_data['open_time'].map(lambda x: datetime.utcfromtimestamp(x / 1000))
    bn_data['close_time'] = bn_data['close_time'].map(lambda x: datetime.utcfromtimestamp(x / 1000))
    return bn_data


def get_price_data_for_universe(freq, start_date):
    px_data = {}

    for coin in strat_univ:
        try:
            px_data[coin] = get_price_data_for_coin(coin, freq, start_date).set_index('open_time')
            print(f"Downloaded data for {coin}.")
        except BinanceAPIException:
            print(f"Couldn't load data for {coin}.")
    
    return px_data



In [3]:
data_freq = '1d'
look_back_window_size = 30
look_back_min_periods = 10
start_datetime = '2020-01-01'
in_sample_end = pd.to_datetime("2022-12-31")
out_sample_start = pd.to_datetime("2023-01-01")

In [4]:
px_data = get_price_data_for_universe(data_freq, start_datetime)

Downloaded data for BTCUSDT.
Downloaded data for ETHUSDT.
Downloaded data for XRPUSDT.
Downloaded data for BCHUSDT.
Downloaded data for LTCUSDT.
Downloaded data for EOSUSDT.
Downloaded data for BNBUSDT.
Downloaded data for XLMUSDT.
Downloaded data for TRXUSDT.
Downloaded data for ADAUSDT.
Downloaded data for XTZUSDT.
Downloaded data for LINKUSDT.
Downloaded data for ATOMUSDT.
Downloaded data for NEOUSDT.
Downloaded data for MKRUSDT.
Downloaded data for DASHUSDT.
Downloaded data for ETCUSDT.
Downloaded data for USDCUSDT.
Downloaded data for ONTUSDT.
Downloaded data for VETUSDT.
Downloaded data for DOGEUSDT.
Downloaded data for BATUSDT.
Downloaded data for ZECUSDT.
Downloaded data for SNXUSDT.
Downloaded data for QTUMUSDT.
Downloaded data for TUSDUSDT.
Downloaded data for ZRXUSDT.
Couldn't load data for THXUSDT.
Couldn't load data for THRUSDT.
Downloaded data for ALGOUSDT.
Couldn't load data for REPUSDT.
Couldn't load data for NANOUSDT.
Couldn't load data for KBCUSDT.
Couldn't load data 

In [19]:
px_close = {coin: px_data[coin]["close"] for coin in px_data}
px_close = pd.DataFrame(px_close).astype(float)
px_close.index.rename("datetime", inplace=True)
# px_data

px_close

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,XRPUSDT,BCHUSDT,LTCUSDT,EOSUSDT,BNBUSDT,XLMUSDT,TRXUSDT,ADAUSDT,...,MANAUSDT,LRCUSDT,FTMUSDT,IOTXUSDT,WAXPUSDT,LOOMUSDT,BUSDUSDT,BNTUSDT,DOTUSDT,SOLUSDT
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,7195.65,130.52,0.19301,204.31,41.53,,13.6954,0.04525,,0.03346,...,,,,,,,,,,
2020-01-02,6961.80,127.22,0.18762,195.41,39.44,,13.0225,0.04363,,0.03272,...,,,,,,,,,,
2020-01-03,7345.65,133.95,0.19320,222.70,42.05,,13.6231,0.04566,,0.03403,...,,,,,,,,,,
2020-01-04,7349.45,134.02,0.19277,224.25,42.58,,13.7944,0.04574,,0.03441,...,,,,,,,,,,
2020-01-05,7364.19,135.23,0.19462,223.29,43.26,,14.0368,0.04543,,0.03455,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-21,105790.01,3319.65,3.17090,452.30,118.84,0.8560,691.1900,0.44230,,1.00510,...,0.4944,0.1807,0.5961,0.04447,0.03852,0.05436,,0.719,6.616,250.56
2025-01-22,103876.07,3240.00,3.17110,437.50,115.23,0.8058,693.4500,0.42720,,0.98290,...,0.4767,0.1763,0.5925,0.04176,0.03878,0.05826,,0.687,6.417,257.82
2025-01-23,103973.76,3332.01,3.12230,431.80,116.57,0.7682,686.7600,0.43030,,0.98720,...,0.4748,0.1677,0.5636,0.03834,0.03878,0.05737,,0.667,6.365,253.78
2025-01-24,104829.35,3312.50,3.10390,426.00,118.79,0.7987,679.4000,0.43040,,0.96990,...,0.4693,0.1678,0.5330,0.03843,0.03902,0.05315,,0.667,6.310,253.65


## Determine Dynamic Triples Of Correlated Coins

In [24]:
# dynamic_triples = {px_close.index[0]: []}

# for index_pos in range(1, px_close.shape[0]+1):
#     available_coins = get_available_coins(dynamic_triplesindex_pos, px_close)

coins = px_close.iloc[50].sort_values(ascending=False)[:8].index
coins

Index(['BTCUSDT', 'BCHUSDT', 'ETHUSDT', 'LTCUSDT', 'BNBUSDT', 'NEOUSDT',
       'ETCUSDT', 'ATOMUSDT'],
      dtype='object')

In [27]:
# SOLUSDT, DOTUSDT
all_coins = list(coins) + ['SOLUSDT', 'DOTUSDT']
all_coins

['BTCUSDT',
 'BCHUSDT',
 'ETHUSDT',
 'LTCUSDT',
 'BNBUSDT',
 'NEOUSDT',
 'ETCUSDT',
 'ATOMUSDT',
 'SOLUSDT',
 'DOTUSDT']

In [28]:
px_close[all_coins]

Unnamed: 0_level_0,BTCUSDT,BCHUSDT,ETHUSDT,LTCUSDT,BNBUSDT,NEOUSDT,ETCUSDT,ATOMUSDT,SOLUSDT,DOTUSDT
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-01,7195.65,204.31,130.52,41.53,13.6954,8.864,4.5350,4.452,,
2020-01-02,6961.80,195.41,127.22,39.44,13.0225,8.474,4.2373,4.168,,
2020-01-03,7345.65,222.70,133.95,42.05,13.6231,8.894,4.5539,4.238,,
2020-01-04,7349.45,224.25,134.02,42.58,13.7944,8.996,4.7631,4.265,,
2020-01-05,7364.19,223.29,135.23,43.26,14.0368,9.006,4.8606,4.245,,
...,...,...,...,...,...,...,...,...,...,...
2025-01-21,105790.01,452.30,3319.65,118.84,691.1900,14.620,27.0100,6.216,250.56,6.616
2025-01-22,103876.07,437.50,3240.00,115.23,693.4500,14.130,26.2400,6.189,257.82,6.417
2025-01-23,103973.76,431.80,3332.01,116.57,686.7600,13.570,28.9900,6.115,253.78,6.365
2025-01-24,104829.35,426.00,3312.50,118.79,679.4000,13.940,27.1100,6.216,253.65,6.310


In [29]:
dependent_coins = all_coins[:]
dependent_coins.remove('BTCUSDT')
dependent_coins

['BCHUSDT',
 'ETHUSDT',
 'LTCUSDT',
 'BNBUSDT',
 'NEOUSDT',
 'ETCUSDT',
 'ATOMUSDT',
 'SOLUSDT',
 'DOTUSDT']

In [32]:
dependent_data = px_close[dependent_coins][:]
dependent_data

Unnamed: 0_level_0,BCHUSDT,ETHUSDT,LTCUSDT,BNBUSDT,NEOUSDT,ETCUSDT,ATOMUSDT,SOLUSDT,DOTUSDT
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-01,204.31,130.52,41.53,13.6954,8.864,4.5350,4.452,,
2020-01-02,195.41,127.22,39.44,13.0225,8.474,4.2373,4.168,,
2020-01-03,222.70,133.95,42.05,13.6231,8.894,4.5539,4.238,,
2020-01-04,224.25,134.02,42.58,13.7944,8.996,4.7631,4.265,,
2020-01-05,223.29,135.23,43.26,14.0368,9.006,4.8606,4.245,,
...,...,...,...,...,...,...,...,...,...
2025-01-21,452.30,3319.65,118.84,691.1900,14.620,27.0100,6.216,250.56,6.616
2025-01-22,437.50,3240.00,115.23,693.4500,14.130,26.2400,6.189,257.82,6.417
2025-01-23,431.80,3332.01,116.57,686.7600,13.570,28.9900,6.115,253.78,6.365
2025-01-24,426.00,3312.50,118.79,679.4000,13.940,27.1100,6.216,253.65,6.310


In [33]:
import statsmodels.api as sm

dependent_data = sm.add_constant(dependent_data)