In [None]:
import numpy as np
import pandas as pd

master = pd.read_csv("../Idea_data/master_data.csv")

# --- 1) ensure sorted, unique dates
master = master.sort_values('date').drop_duplicates('date').reset_index(drop=True)


col_map = {
    # spot
    'close': 'spot_close',
    'spot_volume': 'spot_volume',
    'spot_z30d_price': 'z_close_30d',          # alias to a clear name
    'spot_z30d_volume': 'z_spot_volume_30d',   # if you have this; else we’ll compute later
    'ema200': 'ema200',
    'above_ema200': 'above_ema200',

    # perp/futures
    'buy_share': 'buy_share',
    'OFI': 'OFI',
    'z_prep_OFI': 'z_OFI_3d',                  # if this is a short window z, we’ll treat as 3d
    'prep_volume': 'perp_volume',              # optional

    # funding
    'funding_mean': 'funding_mean',
    'funding_first': 'funding_first',
    'funding_last': 'funding_last',
    'z7_funding': 'z7_funding',
    'sign_changed': 'funding_sign_changed',    # boolean
}

# keep only columns that exist; rename to our aliases
cols_present = {k:v for k,v in col_map.items() if k in master.columns}
df = master[['date'] + list(cols_present.keys())].rename(columns=cols_present).copy()

print('Working columns:', df.columns.tolist())
df.head(3)


master

In [5]:
k = 3                  # swing window on each side (t-3..t+3)
horizons = [1,3,5]     # forward horizons

# swing labels (centered rolling – no look-ahead for labels)
roll_max = df['spot_close'].rolling(2*k+1, center=True).max()
roll_min = df['spot_close'].rolling(2*k+1, center=True).min()
df['is_top']    = (df['spot_close'] == roll_max)
df['is_bottom'] = (df['spot_close'] == roll_min)

# forward returns for evaluation
for H in horizons:
    df[f'fwd_ret_{H}'] = df['spot_close'].shift(-H) / df['spot_close'] - 1

print("tops:", int(df['is_top'].sum()), "bottoms:", int(df['is_bottom'].sum()))
df.loc[df['is_top'] | df['is_bottom'], ['date','spot_close','is_top','is_bottom']].head(30)


tops: 14 bottoms: 10


Unnamed: 0,date,spot_close,is_top,is_bottom
8,2025-05-10,104788.8,True,False
10,2025-05-12,102739.3,False,True
15,2025-05-17,103079.6,False,True
20,2025-05-22,111662.7,True,False
24,2025-05-26,109386.8,True,False
28,2025-05-30,103950.0,False,True
30,2025-06-02,105814.3,True,False
33,2025-06-05,101458.6,False,True
38,2025-06-10,110235.4,True,False
44,2025-06-16,106750.1,True,False


In [6]:
k = 5                  # swing window on each side (t-3..t+3)
horizons = [1,3,5]     # forward horizons

# swing labels (centered rolling – no look-ahead for labels)
roll_max = df['spot_close'].rolling(2*k+1, center=True).max()
roll_min = df['spot_close'].rolling(2*k+1, center=True).min()
df['is_top']    = (df['spot_close'] == roll_max)
df['is_bottom'] = (df['spot_close'] == roll_min)

# forward returns for evaluation
for H in horizons:
    df[f'fwd_ret_{H}'] = df['spot_close'].shift(-H) / df['spot_close'] - 1

print("tops:", int(df['is_top'].sum()), "bottoms:", int(df['is_bottom'].sum()))
df.loc[df['is_top'] | df['is_bottom'], ['date','spot_close','is_top','is_bottom']].head(30)

tops: 7 bottoms: 4


Unnamed: 0,date,spot_close,is_top,is_bottom
8,2025-05-10,104788.8,True,False
20,2025-05-22,111662.7,True,False
33,2025-06-05,101458.6,False,True
38,2025-06-10,110235.4,True,False
50,2025-06-22,100904.7,False,True
60,2025-07-03,109545.2,True,False
71,2025-07-14,119816.5,True,False
77,2025-07-20,117216.0,False,True
79,2025-07-22,119993.7,True,False
89,2025-08-02,112508.8,False,True


In [10]:


# --- helpers
def rolling_z(s, win=30, minp=None):
    s = s.astype(float)
    if minp is None: minp = int(win*0.8)
    mu = s.rolling(win, min_periods=minp).mean()
    sd = s.rolling(win, min_periods=minp).std(ddof=0)
    return (s - mu) / sd

# Ensure sorted dates
df = df.sort_values('date').reset_index(drop=True)

# 1) Perp/Spot volume ratio (guard zeros)
eps = 1e-9
df['ps_vol_ratio'] = (df['perp_volume'] / (df['spot_volume'] + eps)).replace([np.inf, -np.inf], np.nan)

# 30d z-scores for the things we care about
df['z_ps_vol_ratio_30d'] = rolling_z(df['ps_vol_ratio'], 30)
# if you don't already have spot volume z, create it too
if 'z_spot_volume_30d' not in df.columns:
    df['z_spot_volume_30d'] = rolling_z(df['spot_volume'].replace(0, np.nan), 30)

# 2) Funding "level" (30d z) and "spike" (acceleration)
df['z_funding_30d'] = rolling_z(df['funding_mean'], 30)

df['funding_accel'] = df['funding_mean'] - df['funding_mean'].shift(1)
df['z_funding_accel_7d'] = rolling_z(df['funding_accel'], 7)

# 3) Boolean spike flags (tunable thresholds; start conservative)
df['funding_level_high'] = df['z_funding_30d'] >= 1.5
df['funding_spike_up']   = df['z_funding_accel_7d'] >= 1.0
# Intraday flip can also indicate froth building quickly
df['fund_flip_intraday'] = (df['funding_first'] * df['funding_last'] < 0).fillna(False)

# 4) Clamp z’s to avoid domination when adding them together later
for c in ['z_ps_vol_ratio_30d', 'z_spot_volume_30d', 'z_funding_30d', 'z_funding_accel_7d', 'z_close_30d']:
    if c in df.columns:
        df[c] = df[c].clip(-5, 5)

df[['date','z_close_30d','z_spot_volume_30d','ps_vol_ratio','z_ps_vol_ratio_30d','z_funding_30d','z_funding_accel_7d','funding_level_high','funding_spike_up','fund_flip_intraday']].tail(10)


Unnamed: 0,date,z_close_30d,z_spot_volume_30d,ps_vol_ratio,z_ps_vol_ratio_30d,z_funding_30d,z_funding_accel_7d,funding_level_high,funding_spike_up,fund_flip_intraday
109,2025-08-22,0.003121,1.139275,8.623625,0.152278,0.081833,0.637907,False,False,False
110,2025-08-23,-0.580264,-0.498912,6.921786,-1.846806,0.972633,0.782786,False,False,False
111,2025-08-24,-1.298889,0.841048,6.299022,-2.282592,1.011886,-0.3129,False,False,False
112,2025-08-25,-2.328088,1.731182,8.134117,-0.298394,0.799892,-0.925486,False,False,False
113,2025-08-26,-1.575878,0.576503,7.860286,-0.605328,-0.511304,-1.693663,False,False,False
114,2025-08-27,-1.616247,-0.300806,8.527474,0.162403,-1.454774,-1.206046,False,False,False
115,2025-08-28,-1.088079,-0.669886,8.792132,0.432494,0.179083,1.46701,False,True,False
116,2025-08-29,-2.21421,1.198969,7.959376,-0.574724,-1.361527,-1.236511,False,False,False
117,2025-08-30,-1.87301,-0.724993,5.664425,-2.779879,0.453228,1.485709,False,True,False
118,2025-08-31,-1.845579,-0.872516,7.392321,-0.966803,-0.913237,-0.790636,False,False,True


In [13]:
# Conditions you asked for, all evaluated at t (D0):
cond_top = [
    (df['z_close_30d'] >= 1.0),            # price "hot"
    (df['z_spot_volume_30d'] >= 0.5),      # spot volume elevated
    (df['z_ps_vol_ratio_30d'] >= 1.0),     # leverage activity high vs history
    (df['funding_level_high'] & df['funding_spike_up']) | df['fund_flip_intraday']  # funding high & spiking or intraday flip
]

# K-of-N voting (start with K=3)
K = 3
df['sig_top_hyp'] = (np.nan_to_num(np.column_stack(cond_top)).sum(axis=1) >= K)

# Optional 3-day cooldown to avoid clustering
cooldown = 3
on = df['sig_top_hyp'].values.astype(bool)
for i in range(1, cooldown+1):
    on = on & ~np.roll(df['sig_top_hyp'].values.astype(bool), i)
df['sig_top_hyp'] = on

# Evaluate against your pivot labels (k=5) with forward returns
horizons = [1,3,5]
def evaluate_top(block):
    out = {}
    sig = block['sig_top_hyp']
    for H in horizons:
        r = block[f'fwd_ret_{H}']
        hit = r[sig] < 0   # top → expect negative forward return
        out[f'prec@{H}'] = hit.mean() if len(hit)>0 else np.nan
        out[f'avg_ret@{H}'] = r[sig].mean() if sig.sum()>0 else np.nan
        out[f'n_signals@{H}'] = int(sig.sum())
    return pd.Series(out)

df['date'] = pd.to_datetime(df['date'], errors='coerce', utc=True) \
                 .dt.tz_convert('UTC').dt.tz_localize(None) \
                 .dt.normalize()

split_date = pd.Timestamp('2025-08-01')  # tz-naive now matches df['date']
dev  = df[df['date'] <  split_date].copy()
hold = df[df['date'] >= split_date].copy()

summary = pd.concat({
    'DEV_topHyp':  evaluate_top(dev),
    'HOLD_topHyp': evaluate_top(hold),
}, axis=1)

summary


Unnamed: 0,DEV_topHyp,HOLD_topHyp
prec@1,0.0,
avg_ret@1,0.013113,
n_signals@1,1.0,0.0
prec@3,0.0,
avg_ret@3,0.026376,
n_signals@3,1.0,0.0
prec@5,0.0,
avg_ret@5,0.015169,
n_signals@5,1.0,0.0
