In [7]:
strat_univ = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'BNBUSDT', 'XRPUSDT', 'DOTUSDT', 'MATICUSDT']

# Load And Structure Data

In [8]:
from binance.client import Client as bnb_client
from binance.client import BinanceAPIException
from datetime import datetime
import pandas as pd

client = bnb_client(tld='US')

def get_price_data_for_coin(coin, freq, start_date):
    bn_data = client.get_historical_klines(coin, freq, start_date)
    columns = ['open_time','open','high','low','close','volume','close_time','quote_volume',
        'num_trades','taker_base_volume','taker_quote_volume','ignore']

    bn_data = pd.DataFrame(bn_data, columns=columns)
    bn_data['open_time'] = bn_data['open_time'].map(lambda x: datetime.utcfromtimestamp(x / 1000))
    bn_data['close_time'] = bn_data['close_time'].map(lambda x: datetime.utcfromtimestamp(x / 1000))
    return bn_data


def get_price_data_for_universe(freq, start_date):
    px_data = {}

    for coin in strat_univ:
        try:
            px_data[coin] = get_price_data_for_coin(coin, freq, start_date).set_index('open_time')
            print(f"Downloaded data for {coin}.")
        except BinanceAPIException:
            print(f"Couldn't load data for {coin}.")
    
    return px_data

In [9]:
data_freq = '4h'
look_back_window_size = 30 * 6
look_back_min_periods = 10 * 6
start_datetime = '2020-01-01 00:00:00'

In [10]:
px_data = get_price_data_for_universe(data_freq, start_datetime)

Downloaded data for BTCUSDT.
Downloaded data for ETHUSDT.
Downloaded data for ADAUSDT.
Downloaded data for BNBUSDT.
Downloaded data for XRPUSDT.
Downloaded data for DOTUSDT.
Downloaded data for MATICUSDT.


In [11]:
px_data

{'BTCUSDT':                                open            high             low  \
 open_time                                                             
 2020-01-01 00:00:00   7180.46000000   7238.03000000   7175.62000000   
 2020-01-01 04:00:00   7226.19000000   7231.70000000   7196.20000000   
 2020-01-01 08:00:00   7209.50000000   7231.17000000   7181.17000000   
 2020-01-01 12:00:00   7193.32000000   7244.43000000   7193.32000000   
 2020-01-01 16:00:00   7237.54000000   7245.54000000   7215.51000000   
 ...                             ...             ...             ...   
 2025-03-26 16:00:00  86624.17000000  86988.40000000  85897.93000000   
 2025-03-26 20:00:00  86567.36000000  87297.57000000  86492.30000000   
 2025-03-27 00:00:00  86942.42000000  87705.46000000  86841.92000000   
 2025-03-27 04:00:00  87549.17000000  87614.95000000  87197.56000000   
 2025-03-27 08:00:00  87408.00000000  87613.36000000  87236.67000000   
 
                               close       volume  

In [12]:
px_close = {coin: px_data[coin]["close"] for coin in px_data}
px_close = pd.DataFrame(px_close).astype(float)
px_close.index.rename("datetime", inplace=True)

px_close

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,ADAUSDT,BNBUSDT,XRPUSDT,DOTUSDT,MATICUSDT
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-01 00:00:00,7230.71,130.18,0.03308,13.8159,0.19406,,
2020-01-01 04:00:00,7205.50,130.52,0.03320,13.7648,0.19518,,
2020-01-01 08:00:00,7195.80,130.84,0.03321,13.7162,0.19358,,
2020-01-01 12:00:00,7233.02,131.84,0.03357,13.7958,0.19428,,
2020-01-01 16:00:00,7223.72,131.98,0.03361,13.7270,0.19474,,
...,...,...,...,...,...,...,...
2025-03-26 16:00:00,86492.30,2002.42,0.72810,618.9600,2.37350,4.654,
2025-03-26 20:00:00,86907.83,2007.52,0.72730,617.1000,2.34840,4.684,
2025-03-27 00:00:00,87584.39,2032.07,0.74180,626.5900,2.37840,4.740,
2025-03-27 04:00:00,87421.21,2026.08,0.73890,633.9100,2.35590,4.673,


In [14]:
look_back_df = px_close.iloc[-30:]

In [34]:
import numpy as np
import statsmodels.api as sm

def update_correlated_coins_by_simple_regression(
    coin,
    current_correlated_triples,
    price_indicator_window_df,
    secondary_coin_min_corr, residual_min_corr):
    
    corrs_with_coin = price_indicator_window_df.corrwith(price_indicator_window_df[coin])
    
    high_corr_coins = []
    for other_coin, other_coin_value in corrs_with_coin[corrs_with_coin > secondary_coin_min_corr].items():
        if other_coin == coin:
            continue
        if np.isnan(price_indicator_window_df[other_coin]).any():
            continue
        high_corr_coins.append(other_coin)
    
    if len(high_corr_coins) == 0:
        return
    
    for other_coin in high_corr_coins:
        coin_look_back = price_indicator_window_df[coin]
        other_coin_look_back = price_indicator_window_df[other_coin]
        
        beta = np.cov(coin_look_back, other_coin_look_back)[0][1] / other_coin_look_back.var()
        alpha = coin_look_back.mean() - beta * other_coin_look_back.mean()
        
        spreads = coin_look_back - (beta * other_coin_look_back + alpha)
        
        #model = sm.OLS(coin_look_back, sm.add_constant(other_coin_look_back))
        #result = model.fit()
        #beta = result.params[other_coin]
        #alpha = result.params['const']
        #estimate = model.predict(result.params, sm.add_constant(other_coin_look_back))
        #spreads = coin_look_back - estimate
        
        corrs_with_spread = price_indicator_window_df.corrwith(spreads)
        
        for third_coin, third_coin_value in corrs_with_spread[corrs_with_spread > residual_min_corr].items():
            key = (coin, min(other_coin, third_coin), max(other_coin, third_coin))
            if key not in current_correlated_triples:
                current_correlaed_triples[key] = pd.DataFrame(
                    0.0, columns=[
                        min(other_coin, third_coin), max(other_coin, third_coin), 'alpha', 'residual'], index=[])

In [35]:
update_correlated_coins_by_simple_regression("BTCUSDT", dict(), px_close.iloc[-30:], 0.7)

datetime
2025-03-22 12:00:00    -771.399656
2025-03-22 16:00:00    -889.361629
2025-03-22 20:00:00    -821.809798
2025-03-23 00:00:00   -1223.468707
2025-03-23 04:00:00   -1311.913999
2025-03-23 08:00:00    -856.255196
2025-03-23 12:00:00    -401.883794
2025-03-23 16:00:00    -186.412860
2025-03-23 20:00:00     556.642003
2025-03-24 00:00:00     803.960911
2025-03-24 04:00:00    -622.149584
2025-03-24 08:00:00    -710.781309
2025-03-24 12:00:00      17.648028
2025-03-24 16:00:00      27.333117
2025-03-24 20:00:00    -750.948253
2025-03-25 00:00:00    -249.890138
2025-03-25 04:00:00    -649.099152
2025-03-25 08:00:00    -281.703272
2025-03-25 12:00:00     257.321995
2025-03-25 16:00:00     265.793875
2025-03-25 20:00:00     -94.524469
2025-03-26 00:00:00     177.066676
2025-03-26 04:00:00     496.826735
2025-03-26 08:00:00     286.374799
2025-03-26 12:00:00     895.163182
2025-03-26 16:00:00    1100.194292
2025-03-26 20:00:00    1345.992686
2025-03-27 00:00:00    1205.511323
2025-03-27 

In [3]:
import pandas as pd

X = pd.Series([1, 2, 3])
Y = pd.DataFrame({'A': [2, 4, 6], 'B': [6, 4, 2], 'C': [1, 1, 1]})

In [6]:
Y.corrwith(X)



  c /= stddev[:, None]
  c /= stddev[None, :]


A    1.0
B   -1.0
C    NaN
dtype: float64