# Summary

All strategies:
  - Strategy 1: Correlation to one asset based on returns, one period behind, train up to 2024-01-01, correlation for all assets, fixed correlation in test period to calculate weights
  - Strategy 2: Correlation based on prices
  - Strategy 3: Rolling correlation of returns based on last N periods, dynamically selected at each time step
  - Strategy 4: Rolling correlation of prices based on last N periods, dynamically selected at each time step
  - Strategy 5: Correlation to one asset based on returns, one period behind, trained up to 2024-01-01, correlation for only assets with correlation above threshold x
  - Strategy 6: Correlation to one asset based on prices, one period behind, trained up to 2024-01-01, correlation for only assets with correlation above threshold x
  - Strategy 7: Correlation to one asset based on returns, one period behind, train up to 2024-01-01, correlation for all assets, dynamic trailing correlation in test period to calculate weights

# Get Historical Data For Cryptocurrencies

In [2]:
from binance.client import Client as bnb_client
from datetime import datetime
import pandas as pd

# The universe is based on this snapshot from December 20, 2020: https://coinmarketcap.com/historical/20201220/

univ = [
    "BTCUSDT", "ETHUSDT", "ADAUSDT", "BNBUSDT", "XRPUSDT", "DOTUSDT", "MATICUSDT", "LTCUSDT", "BCHUSDT",
    "LINKUSDT", "XLMUSDT", "USDCUSDT", "EOSUSDT", "TRXUSDT", "XTZUSDT", "FILUSDT", "NEOUSDT", "DAIUSDT",
    "DASHUSDT", "VETUSDT", "ATOMUSDT", "AAVEUSDT", "UNIUSDT", "GRTUSDT", "THETAUSDT", "IOTAUSDT", "BUSDUSDT",
    "ZECUSDT", "YFIUSDT", "ETCUSDT", "WAVESUSDT", "COMPUSDT", "SNXUSDT", "DOGEUSDT", "MKRUSDT", "ZILUSDT",
    "SUSHIUSDT", "KSMUSDT", "OMGUSDT", "ONTUSDT", "ALGOUSDT", "EGLDUSDT", "BATUSDT", "DGBUSDT", "ZRXUSDT",
    "TUSDUSDT", "QTUMUSDT", "ICXUSDT", "AVAXUSDT", "RENUSDT", "HBARUSDT", "NEARUSDT", "LRCUSDT", "CELOUSDT",
    "KNCUSDT", "LSKUSDT", "OCEANUSDT", "QNTUSDT", "USTUSDT", "BANDUSDT", "MANAUSDT", "ENJUSDT", "ANTUSDT",
    "BNTUSDT", "ZENUSDT", "NMRUSDT", "RVNUSDT", "IOSTUSDT", "OXTUSDT", "CRVUSDT", "MATICUSDT", "HNTUSDT",
    "BALUSDT", "CHZUSDT"
]

should_download_stock_data = False



In [3]:
client = bnb_client(tld='US')


def get_binance_px(symbol, freq, start_ts = '2020-12-20'):
    data = client.get_historical_klines(symbol, freq, start_ts)
    columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_volume',
               'num_trades', 'taker_base_volume', 'taker_quote_volume', 'ignore']
    data = pd.DataFrame(data, columns = columns)
    
    # Convert from POSIX timestamp (number of millisecond since jan 1, 1970)
    data['open_time'] = data['open_time'].map(lambda x: datetime.utcfromtimestamp(x/1000))
    data['close_time'] = data['close_time'].map(lambda x: datetime.utcfromtimestamp(x/1000))
    return data 


if should_download_stock_data:
    freq = '4h'
    px = {}
    for x in univ:
        print(f"Downloading data for symbol {x}")
        data = get_binance_px(x, freq)
        px[x] = data.set_index('open_time')['close']

    px = pd.DataFrame(px).astype(float)
    px.to_csv('./class_project_input_prices.csv')
else:
    px = pd.read_csv('./class_project_input_prices.csv')
    date_format = "%Y-%m-%d %H:%M:%S"
    px['open_time'] = px['open_time'].apply(lambda t:  datetime.strptime(t, date_format))
    px.set_index('open_time', inplace=True)

# Data up to 2025-08-31 08:00:00
px

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,ADAUSDT,BNBUSDT,XRPUSDT,DOTUSDT,MATICUSDT,LTCUSDT,BCHUSDT,LINKUSDT,...,BNTUSDT,ZENUSDT,NMRUSDT,RVNUSDT,IOSTUSDT,OXTUSDT,CRVUSDT,HNTUSDT,BALUSDT,CHZUSDT
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-20 00:00:00,23353.97,646.62,0.16284,32.9681,0.56944,,,117.34,313.46,,...,,12.056,,,,0.2492,,1.41095,,
2020-12-20 04:00:00,23604.24,655.23,0.16638,33.6559,0.57916,,,121.30,340.00,,...,,12.107,,,,0.2477,,1.43156,,
2020-12-20 08:00:00,23549.50,652.88,0.16463,34.8228,0.57948,,,118.09,349.70,,...,,12.237,,,,0.2477,,1.44273,,
2020-12-20 12:00:00,23880.85,653.24,0.16542,35.0120,0.57798,,,119.10,361.21,,...,,12.074,,,,0.2533,,1.47130,,
2020-12-20 16:00:00,23932.71,649.82,0.16502,34.7042,0.57306,,,116.60,357.09,,...,,12.008,,,,0.2503,,1.43083,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-08-30 16:00:00,108921.64,4352.10,0.81950,856.7600,2.80720,3.816,,110.23,542.00,23.36,...,0.725,7.240,16.62,0.01323,0.00342,0.0535,0.7646,,,0.03830
2025-08-30 20:00:00,108569.75,4374.56,0.82170,862.6200,2.81940,3.773,,110.77,552.90,23.47,...,0.725,7.240,15.16,0.01323,0.00342,0.0535,0.7646,,,0.03830
2025-08-31 00:00:00,109155.73,4486.79,0.83590,863.4900,2.85180,3.867,,111.67,551.30,23.90,...,0.764,7.240,15.21,0.01353,0.00342,0.0535,0.7936,,,0.04045
2025-08-31 04:00:00,108660.63,4451.64,0.82760,858.7700,2.82920,3.823,,111.23,549.80,23.76,...,0.764,7.240,14.87,0.01331,0.00342,0.0535,0.7838,,,0.04045


# Utility Functions

In [6]:
# Using pd.corrwith() yields a large number of hardware-related warnings so I implemented by own version.
def calculate_correlation_directly(ser_1, ser_2):
    available_1 = ser_1.notna()
    available_2 = ser_2.notna()
    
    common_1 = ser_1[available_1][available_2]
    common_2 = ser_2[available_1][available_2]
    
    mean_1 = common_1.mean()
    demeaned_1 = common_1 - mean_1
    
    mean_2 = common_2.mean()
    demeaned_2 = common_2 - mean_2
    
    return (demeaned_1 * demeaned_2).sum() / (demeaned_1.shape[0] - 1) / (common_1.std() * common_2.std())


# time_point must be in the index of df
def get_train_test_data(df, time_point):
    train_data = df.loc[:time_point].iloc[:-1]
    test_data = df.loc[t:]
    return train_data, test_data

# Strategy 1: Correlation to one asset based on returns, one period behind, train up to 2024-01-01, correlation for all assets, fixed correlation in test period to calculate weights

In [17]:
from datetime import datetime

ret = px / px.shift() - 1

t = datetime(2024, 1, 1, 0, 0)
train_data, test_data = get_train_test_data(ret, t)

coins_to_top_corr_coins_by_ret = dict()

for col in train_data.columns:
    corr_with_coin = ret.shift().apply(lambda coin_col: calculate_correlation_directly(coin_col, ret[col]))
    sorted_corr_with_coin = corr_with_coin.sort_values()
    non_coin_corr = sorted_corr_with_coin[sorted_corr_with_coin.index != col]
    #train_data.iloc[-10:]

    coins_not_ending_null = non_coin_corr[train_data.iloc[-10:].notna().any()]
    coins_to_top_corr_coins_by_ret[col] = coins_not_ending_null[
        coins_not_ending_null.abs() >= coins_not_ending_null.abs().max()].index[0]


coins_to_top_corr_coins_by_ret
# test_data
# ret.loc[]

# corr_with_BTC = ret.shift().apply(lambda coin_col: calculate_correlation_directly(coin_col, ret['BTCUSDT']))

{'BTCUSDT': 'TUSDUSDT',
 'ETHUSDT': 'TUSDUSDT',
 'ADAUSDT': 'TUSDUSDT',
 'BNBUSDT': 'TUSDUSDT',
 'XRPUSDT': 'LTCUSDT',
 'DOTUSDT': 'TUSDUSDT',
 'MATICUSDT': 'TUSDUSDT',
 'LTCUSDT': 'TUSDUSDT',
 'BCHUSDT': 'TUSDUSDT',
 'LINKUSDT': 'TUSDUSDT',
 'XLMUSDT': 'TUSDUSDT',
 'USDCUSDT': 'BNTUSDT',
 'EOSUSDT': 'VETUSDT',
 'TRXUSDT': 'NMRUSDT',
 'XTZUSDT': 'ADAUSDT',
 'FILUSDT': 'MATICUSDT',
 'NEOUSDT': 'TUSDUSDT',
 'DAIUSDT': 'IOTAUSDT',
 'DASHUSDT': 'LTCUSDT',
 'VETUSDT': 'TUSDUSDT',
 'ATOMUSDT': 'TUSDUSDT',
 'AAVEUSDT': 'TUSDUSDT',
 'UNIUSDT': 'TUSDUSDT',
 'GRTUSDT': 'TUSDUSDT',
 'THETAUSDT': 'TUSDUSDT',
 'IOTAUSDT': 'ICXUSDT',
 'BUSDUSDT': 'TUSDUSDT',
 'ZECUSDT': 'LINKUSDT',
 'YFIUSDT': 'TUSDUSDT',
 'ETCUSDT': 'TUSDUSDT',
 'WAVESUSDT': 'DASHUSDT',
 'COMPUSDT': 'TUSDUSDT',
 'SNXUSDT': 'IOTAUSDT',
 'DOGEUSDT': 'TUSDUSDT',
 'MKRUSDT': 'NEOUSDT',
 'ZILUSDT': 'DAIUSDT',
 'SUSHIUSDT': 'TUSDUSDT',
 'KSMUSDT': 'TUSDUSDT',
 'OMGUSDT': 'TUSDUSDT',
 'ONTUSDT': 'DOTUSDT',
 'ALGOUSDT': 'TUSDUSDT',
 'EGLDU

In [24]:
import numpy as np

corr_coin = coins_to_top_corr_coins_by_ret['BTCUSDT']


px[corr_coin].iloc[-40:]

# # train_data['BTCUSDT'] ~ train_data[corr_coin].shift()

# beta = np.cov(train_data['BTCUSDT'], train_data[corr_coin].shift())
# beta


open_time
2025-08-24 20:00:00   NaN
2025-08-25 00:00:00   NaN
2025-08-25 04:00:00   NaN
2025-08-25 08:00:00   NaN
2025-08-25 12:00:00   NaN
2025-08-25 16:00:00   NaN
2025-08-25 20:00:00   NaN
2025-08-26 00:00:00   NaN
2025-08-26 04:00:00   NaN
2025-08-26 08:00:00   NaN
2025-08-26 12:00:00   NaN
2025-08-26 16:00:00   NaN
2025-08-26 20:00:00   NaN
2025-08-27 00:00:00   NaN
2025-08-27 04:00:00   NaN
2025-08-27 08:00:00   NaN
2025-08-27 12:00:00   NaN
2025-08-27 16:00:00   NaN
2025-08-27 20:00:00   NaN
2025-08-28 00:00:00   NaN
2025-08-28 04:00:00   NaN
2025-08-28 08:00:00   NaN
2025-08-28 12:00:00   NaN
2025-08-28 16:00:00   NaN
2025-08-28 20:00:00   NaN
2025-08-29 00:00:00   NaN
2025-08-29 04:00:00   NaN
2025-08-29 08:00:00   NaN
2025-08-29 12:00:00   NaN
2025-08-29 16:00:00   NaN
2025-08-29 20:00:00   NaN
2025-08-30 00:00:00   NaN
2025-08-30 04:00:00   NaN
2025-08-30 08:00:00   NaN
2025-08-30 12:00:00   NaN
2025-08-30 16:00:00   NaN
2025-08-30 20:00:00   NaN
2025-08-31 00:00:00   NaN
20

In [11]:
train_data[corr_coin].notna().astype(int).sum()

402

In [12]:
test_data[corr_coin].notna().astype(int).sum()

0

In [None]:
coins_to_top_corr_coins_by_ret

In [3]:
ret = px / px.shift() - 1
ret

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,ADAUSDT,BNBUSDT,XRPUSDT,DOTUSDT,MATICUSDT,LTCUSDT,BCHUSDT,LINKUSDT,...,BNTUSDT,ZENUSDT,NMRUSDT,RVNUSDT,IOSTUSDT,OXTUSDT,CRVUSDT,HNTUSDT,BALUSDT,CHZUSDT
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-20 00:00:00,,,,,,,,,,,...,,,,,,,,,,
2020-12-20 04:00:00,0.010716,0.013315,0.021739,0.020863,0.017069,,,0.033748,0.084668,,...,,0.004230,,,,-0.006019,,0.014607,,
2020-12-20 08:00:00,-0.002319,-0.003587,-0.010518,0.034671,0.000553,,,-0.026463,0.028529,,...,,0.010738,,,,0.000000,,0.007803,,
2020-12-20 12:00:00,0.014070,0.000551,0.004799,0.005433,-0.002589,,,0.008553,0.032914,,...,,-0.013320,,,,0.022608,,0.019803,,
2020-12-20 16:00:00,0.002172,-0.005235,-0.002418,-0.008791,-0.008512,,,-0.020991,-0.011406,,...,,-0.005466,,,,-0.011844,,-0.027506,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-08-30 16:00:00,0.000563,-0.005439,-0.003769,-0.002863,-0.000178,0.007924,,-0.003526,-0.005505,-0.002136,...,-0.069320,0.000000,0.003623,0.000000,0.0,0.000000,0.000000,,,0.000000
2025-08-30 20:00:00,-0.003231,0.005161,0.002685,0.006840,0.004346,-0.011268,,0.004899,0.020111,0.004709,...,0.000000,0.000000,-0.087846,0.000000,0.0,0.000000,0.000000,,,0.000000
2025-08-31 00:00:00,0.005397,0.025655,0.017281,0.001009,0.011492,0.024914,,0.008125,-0.002894,0.018321,...,0.053793,0.000000,0.003298,0.022676,0.0,0.000000,0.037928,,,0.056136
2025-08-31 04:00:00,-0.004536,-0.007834,-0.009929,-0.005466,-0.007925,-0.011378,,-0.003940,-0.002721,-0.005858,...,0.000000,0.000000,-0.022354,-0.016260,0.0,0.000000,-0.012349,,,0.000000
