In [1]:
import os
from dotenv import load_dotenv
from binance.client import Client

# Load the keys from the invisible .env file
load_dotenv()

api_key = os.getenv("BINANCE_API_KEY")
api_secret = os.getenv("BINANCE_SECRET")

# Safety Check: Print a warning if keys are missing (Optional)
if not api_key:
    print("CRITICAL ERROR: API Key not found!")

client = Client(api_key, api_secret)

In [2]:
import requests
import pandas as pd

url = "https://api.coingecko.com/api/v3/coins/markets"

params = {
    "vs_currency": "usd",
    "order": "market_cap_desc",
    "per_page": 50,   # pull more so filtering still leaves ~20
    "page": 1,
    "sparkline": False
}

response = requests.get(url, params=params)
data = response.json()

df = pd.DataFrame(data)[
    ["id", "market_cap_rank", "symbol", "name", "market_cap", "current_price"]
]

# -----------------------------
# Filters
# -----------------------------

stablecoins = {
    "usdt", "usdc", "busd", "dai", "tusd",
    "usdp", "fdusd", "gusd", "lusd",
    "ust", "ustc", "usdd", "eurc",'usds','usde', 'usd1', 'bsc-usd'
}

# normalize text
df["symbol"] = df["symbol"].str.lower()
df["name"] = df["name"].str.lower()

# remove stablecoins
df = df[~df["symbol"].isin(stablecoins)]

# remove wrapped coins
df = df[
    ~df["name"].str.contains("wrapped") &
    ~df["symbol"].str.startswith("w")
]

# take top 20 after filtering
df = df.sort_values("market_cap", ascending=False).head(20)

print(df)

                id  market_cap_rank      symbol               name  \
0          bitcoin                1         btc            bitcoin   
1         ethereum                2         eth           ethereum   
3      binancecoin                4         bnb                bnb   
4           ripple                5         xrp                xrp   
6           solana                7         sol             solana   
7             tron                8         trx               tron   
8     staked-ether                9       steth  lido staked ether   
9         dogecoin               10        doge           dogecoin   
10    figure-heloc               11  figr_heloc       figure heloc   
11         cardano               12         ada            cardano   
13    bitcoin-cash               14         bch       bitcoin cash   
20       leo-token               21         leo          leo token   
21          monero               22         xmr             monero   
22       chainlink  

In [7]:
# Binance symbols are uppercase
df["binance_symbol"] = df["symbol"].str.upper() + "USDT"

# Check which ones actually trade on Binance
exchange_info = client.get_exchange_info()
binance_pairs = {s["symbol"] for s in exchange_info["symbols"]}

df = df[df["binance_symbol"].isin(binance_pairs)]

print(df[["symbol", "binance_symbol"]])

   symbol binance_symbol
0     btc        BTCUSDT
1     eth        ETHUSDT
3     bnb        BNBUSDT
4     xrp        XRPUSDT
6     sol        SOLUSDT
7     trx        TRXUSDT
9    doge       DOGEUSDT
11    ada        ADAUSDT
13    bch        BCHUSDT
21    xmr        XMRUSDT
22   link       LINKUSDT
27    xlm        XLMUSDT
29    zec        ZECUSDT
31    ltc        LTCUSDT
32    sui        SUIUSDT


In [8]:
import numpy as np
from statsmodels.tsa.stattools import coint

##Fetch historical klines from Binance

def get_binance_klines(symbol, interval=Client.KLINE_INTERVAL_1DAY, limit=365):
    klines = client.get_klines(
        symbol=symbol,
        interval=interval,
        limit=limit
    )

    df = pd.DataFrame(klines, columns=[
        "open_time", "open", "high", "low", "close", "volume",
        "close_time", "qav", "num_trades", "taker_base_vol",
        "taker_quote_vol", "ignore"
    ])

    df["open_time"] = pd.to_datetime(df["open_time"], unit="ms")
    df.set_index("open_time", inplace=True)

    df["close"] = df["close"].astype(float)

    return df[["close"]]


In [9]:
## Set BTC price series (benchmark)

btc = get_binance_klines("BTCUSDT", limit=365)
btc_log = np.log(btc["close"])

##Run Engle-Granger test vs BTC

results = []

for _, row in df.iterrows():
    symbol = row["binance_symbol"]

    try:
        alt = get_binance_klines(symbol, limit=365)

        merged = btc.join(alt, how="inner", lsuffix="_btc", rsuffix="_alt")

        if len(merged) < 200:
            continue  # insufficient data

        btc_log = np.log(merged["close_btc"])
        alt_log = np.log(merged["close_alt"])

        test_stat, p_value, crit_vals = coint(btc_log, alt_log)

        results.append({
            "coin": symbol.replace("USDT", ""),
            "binance_pair": symbol,
            "p_value": p_value,
            "cointegrated_5pct": p_value < 0.05
        })

    except Exception as e:
        print(f"Skipping {symbol}: {e}")

results_df = (
    pd.DataFrame(results)
    .sort_values("p_value")
    .reset_index(drop=True)
)

print(results_df)


  test_stat, p_value, crit_vals = coint(btc_log, alt_log)


    coin binance_pair   p_value  cointegrated_5pct
0    BTC      BTCUSDT  0.000000               True
1    SOL      SOLUSDT  0.200220              False
2   DOGE     DOGEUSDT  0.489865              False
3   LINK     LINKUSDT  0.511178              False
4    XLM      XLMUSDT  0.515501              False
5    XRP      XRPUSDT  0.520286              False
6    LTC      LTCUSDT  0.615719              False
7    SUI      SUIUSDT  0.677186              False
8    ZEC      ZECUSDT  0.753339              False
9    TRX      TRXUSDT  0.764864              False
10   ADA      ADAUSDT  0.798475              False
11   ETH      ETHUSDT  0.921771              False
12   BCH      BCHUSDT  0.932052              False
13   BNB      BNBUSDT  0.937043              False
