In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from procedures import create_binance_bot, create_binance_bot_spot, make_get_filepath
from pure_funcs import get_template_live_config, ts_to_date, get_daily_from_income
from njit_funcs import round_dynamic
from time import time, sleep
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
plt.rcParams["figure.figsize"] = [21, 13]
pd.set_option("precision", 10)

In [None]:
config = get_template_live_config()
config["symbol"] = "XMRUSDT"  # dummy symbol
config["exchange"] = "binance"
config["market_type"] = "futures"
config["user"] = "binance_01"
try:
    await bot.session.close()
except:
    pass
if config["market_type"] == "spot":
    bot = await create_binance_bot_spot(config)
else:
    bot = await create_binance_bot(config)

In [None]:
info = await bot.public_get(bot.endpoints["exchange_info"])
symbols = [e["symbol"] for e in info["symbols"] if e["symbol"].endswith("USDT")]

interval = "5m"

fpath = f"logs/binance/ohlcvs_{interval}/"
if os.path.exists(fpath):
    print("loading cache")
    csvs = {}
    for f in [x for x in os.listdir(fpath) if x.endswith(".csv")]:
        csvs[f[:-4]] = pd.read_csv(fpath + f)
else:
    fpath = make_get_filepath(f"logs/binance/ohlcvs_{interval}/")

    data = {}
    for s in sorted(symbols):
        print(s, end=" ")
        ohlcvs = await bot.fetch_ohlcvs(symbol=s, interval=interval)
        data[s] = ohlcvs
        sleep(0.5)
    # cache as csv for later use
    csvs = {}
    fpath = make_get_filepath(f"logs/binance/ohlcvs_{interval}/")
    for s in data:
        csvs[s] = pd.DataFrame(data[s])
        csvs[s].to_csv(f"{fpath}{s}.csv")
n_days = (
    (csvs[symbols[0]].timestamp.iloc[-1] - csvs[symbols[0]].timestamp.iloc[0]) / 1000 / 60 / 60 / 24
)
print("n days", n_days)

In [None]:
def correlation(x, y):
    x_mean = x.mean()
    y_mean = y.mean()
    a = x - x_mean
    b = y - y_mean
    return ((a * b).sum()) / (((a ** 2).sum() * (b ** 2).sum()) ** 0.5)

In [None]:
n_symbols = 30
vols = [(csvs[s].volume * csvs[s].close).sum() for s in csvs]
minvol = sorted(vols)[-n_symbols]
ss = [s for s in csvs if (csvs[s].volume * csvs[s].close).sum() >= minvol]
corrs = []
corrs_dict = {}
done = set()
for s0 in sorted(ss):
    for s1 in sorted(ss):
        if s0 == s1:
            continue
        key = tuple(sorted([s0, s1]))
        if key in done:
            continue
        done.add(key)
        x = csvs[s0].close
        y = csvs[s1].close
        corr = correlation(x, y)
        print(f"\r{s0} {s1} {corr:.4f}      ", end=" ")
        corrs.append((corr,) + key)

corrs_dict = {}
corrs_dict_abs = {}
for e in corrs:
    if e[1] not in corrs_dict:
        corrs_dict[e[1]] = {e[2]: e[0]}
        corrs_dict_abs[e[1]] = {e[2]: abs(e[0])}

    else:
        corrs_dict[e[1]][e[2]] = e[0]
        corrs_dict_abs[e[1]][e[2]] = abs(e[0])

    if e[2] not in corrs_dict:
        corrs_dict[e[2]] = {e[1]: e[0]}
        corrs_dict_abs[e[2]] = {e[1]: abs(e[0])}

    else:
        corrs_dict[e[2]][e[1]] = e[0]
        corrs_dict_abs[e[2]][e[1]] = abs(e[0])

In [None]:
# sum correlation of each symbol to every other symbol
sorted([(sum(corrs_dict[s].values()), s) for s in corrs_dict])

In [None]:
# sum abs correlation of each symbol to every other symbol
sorted([(sum(corrs_dict_abs[s].values()), s) for s in corrs_dict])

In [None]:
# mean of all symbol
mean_prices = pd.concat([e.close / e.close.iloc[0] for e in csvs.values()], axis=1).mean(axis=1)

# compare individual symbol to mean
symbol = "SOLUSDT"
mean_prices.rename("mean_prices").plot()
(csvs[symbol].close / csvs[symbol].close.iloc[0]).rename(symbol).plot()
plt.legend()

In [None]:
# compare two symbols
symbol0 = "BNBUSDT"
symbol1 = "LINKUSDT"
(csvs[symbol0].close / csvs[symbol0].close.iloc[0]).rename(symbol0).plot()
(csvs[symbol1].close / csvs[symbol1].close.iloc[0]).rename(symbol1).plot()
plt.legend()