In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv('fundingtesting.csv')

df

Unnamed: 0.1,Unnamed: 0,date,close_spot,volume_spot,close_fut,volume_fut,funding_mean,daily_funding_sum,funding_last,funding_first
0,0,2025-05-01 00:00:00+00:00,,,96423.3,184526.755,0.000004,0.000012,-0.000080,0.000035
1,1,2025-05-02 00:00:00+00:00,96887.14,14905.74811,96831.2,139197.526,-0.000101,-0.000303,-0.000087,-0.000122
2,2,2025-05-03 00:00:00+00:00,95856.42,9723.34838,95803.3,71420.629,-0.000014,-0.000042,-0.000015,-0.000060
3,3,2025-05-04 00:00:00+00:00,94277.62,11036.38342,94230.2,94260.877,0.000024,0.000073,0.000037,0.000004
4,4,2025-05-05 00:00:00+00:00,94733.68,17251.18189,94696.7,154902.375,-0.000003,-0.000010,-0.000022,-0.000024
...,...,...,...,...,...,...,...,...,...,...
118,118,2025-08-27 00:00:00+00:00,111262.01,13392.60875,111194.9,114205.122,0.000036,0.000108,0.000035,0.000018
119,119,2025-08-28 00:00:00+00:00,112566.90,11104.27744,112507.3,97630.277,0.000075,0.000225,0.000075,0.000053
120,120,2025-08-29 00:00:00+00:00,108377.40,22580.31045,108332.4,179725.172,0.000036,0.000108,0.000006,0.000043
121,121,2025-08-30 00:00:00+00:00,108816.33,10708.39159,108757.9,60656.883,0.000078,0.000235,0.000080,0.000075


In [6]:
#identifying high and low

k = 5                  # swing window on each side (t-3..t+3)
horizons = [1,3,5]     # forward horizons

# swing labels (centered rolling – no look-ahead for labels)
roll_max = df['close_fut'].rolling(2*k+1, center=True).max()
roll_min = df['close_fut'].rolling(2*k+1, center=True).min()
df['is_top']    = (df['close_fut'] == roll_max)
df['is_bottom'] = (df['close_fut'] == roll_min)

# forward returns for evaluation
for H in horizons:
    df[f'fwd_ret_{H}'] = df['close_fut'].shift(-H) / df['close_fut'] - 1

print("tops:", int(df['is_top'].sum()), "bottoms:", int(df['is_bottom'].sum()))
df.loc[df['is_top'] | df['is_bottom'], ['date','close_fut','is_top','is_bottom']].head(30)

tops: 7 bottoms: 6


Unnamed: 0,date,close_fut,is_top,is_bottom
9,2025-05-10 00:00:00+00:00,104788.8,True,False
21,2025-05-22 00:00:00+00:00,111662.7,True,False
29,2025-05-30 00:00:00+00:00,103950.0,False,True
35,2025-06-05 00:00:00+00:00,101458.6,False,True
40,2025-06-10 00:00:00+00:00,110235.4,True,False
52,2025-06-22 00:00:00+00:00,100904.7,False,True
61,2025-07-01 00:00:00+00:00,105637.9,False,True
63,2025-07-03 00:00:00+00:00,109545.2,True,False
74,2025-07-14 00:00:00+00:00,119816.5,True,False
80,2025-07-20 00:00:00+00:00,117216.0,False,True


In [11]:
price = 'close_fut'    # or 'perp_close'
df['ret1']  = df[price].pct_change(1)
df['ret3']  = df[price].pct_change(3)
df['absret1'] = df['ret1'].abs()

# funding features
df['fund']    = df['funding_mean']        # alias for readability
df['dFund1']  = df['fund'] - df['fund'].shift(1)
df['dFund3']  = df['fund'] - df['fund'].shift(3)
df['flip']    = np.sign(df['fund']).ne(np.sign(df['fund'].shift(1)))  # True when funding flips sign today

# make an event column for convenience
df['is_event'] = df['is_top'] | df['is_bottom']


df


Unnamed: 0.1,Unnamed: 0,date,close_spot,volume_spot,close_fut,volume_fut,funding_mean,daily_funding_sum,funding_last,funding_first,...,fwd_ret_3,fwd_ret_5,ret1,ret3,absret1,fund,dFund1,dFund3,flip,is_event
0,0,2025-05-01 00:00:00+00:00,,,96423.3,184526.755,0.000004,0.000012,-0.000080,0.000035,...,-0.022745,0.003895,,,,0.000004,,,True,False
1,1,2025-05-02 00:00:00+00:00,96887.14,14905.74811,96831.2,139197.526,-0.000101,-0.000303,-0.000087,-0.000122,...,-0.022044,0.001623,0.004230,,0.004230,-0.000101,-0.000105,,True,False
2,2,2025-05-03 00:00:00+00:00,95856.42,9723.34838,95803.3,71420.629,-0.000014,-0.000042,-0.000015,-0.000060,...,0.010392,0.077500,-0.010615,,0.010615,-0.000014,0.000087,,False,False
3,3,2025-05-04 00:00:00+00:00,94277.62,11036.38342,94230.2,94260.877,0.000024,0.000073,0.000037,0.000004,...,0.029271,0.092451,-0.016420,-0.022745,0.016420,0.000024,0.000039,0.000020,True,False
4,4,2025-05-05 00:00:00+00:00,94733.68,17251.18189,94696.7,154902.375,-0.000003,-0.000010,-0.000022,-0.000024,...,0.090092,0.106573,0.004951,-0.022044,0.004951,-0.000003,-0.000028,0.000098,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,118,2025-08-27 00:00:00+00:00,111262.01,13392.60875,111194.9,114205.122,0.000036,0.000108,0.000035,0.000018,...,-0.021916,,-0.004610,-0.019846,0.004610,0.000036,-0.000025,-0.000064,False,False
119,119,2025-08-28 00:00:00+00:00,112566.90,11104.27744,112507.3,97630.277,0.000075,0.000225,0.000075,0.000053,...,-0.038210,,0.011803,0.022199,0.011803,0.000075,0.000039,-0.000019,False,False
120,120,2025-08-29 00:00:00+00:00,108377.40,22580.31045,108332.4,179725.172,0.000036,0.000108,0.000006,0.000043,...,,,-0.037108,-0.030235,0.037108,0.000036,-0.000039,-0.000026,False,False
121,121,2025-08-30 00:00:00+00:00,108816.33,10708.39159,108757.9,60656.883,0.000078,0.000235,0.000080,0.000075,...,,,0.003928,-0.021916,0.003928,0.000078,0.000043,0.000042,False,False


In [28]:
def event_study(series, mask_events, window=5):
    idx = np.where(mask_events)[0]
    mats = []
    for i in idx:
        lo, hi = i - window, i + window
        if lo < 0 or hi >= len(series): 
            continue
        mats.append(series.iloc[lo:hi+1].to_numpy())
    if not mats: 
        return None
    M = np.vstack(mats)                 # n_events x (2w+1)
    avg = np.nanmean(M, axis=0)
    med = np.nanmedian(M, axis=0)
    x = np.arange(-window, window+1)
    return pd.DataFrame({'t':x, 'avg':avg, 'med':med})

# funding level and change around events
top_fund    = event_study(df['fund'],   df['is_top'])
top_dFund3  = event_study(df['dFund3'], df['is_top'])
bot_fund    = event_study(df['fund'],   df['is_bottom'])
bot_dFund3  = event_study(df['dFund3'], df['is_bottom'])

print("TOPS — fund (avg):\n", top_fund.head(11))
print("TOPS — dFund3 (avg):\n", top_dFund3.head(11))
print("BOTTOMS — fund (avg):\n", bot_fund.head(30))
print("BOTTOMS — dFund3 (avg):\n", bot_dFund3.head(30))


TOPS — fund (avg):
     t       avg       med
0  -5  0.000036  0.000031
1  -4  0.000053  0.000068
2  -3  0.000052  0.000047
3  -2  0.000063  0.000056
4  -1  0.000070  0.000081
5   0  0.000084  0.000090
6   1  0.000083  0.000084
7   2  0.000059  0.000053
8   3  0.000064  0.000080
9   4  0.000066  0.000069
10  5  0.000052  0.000033
TOPS — dFund3 (avg):
     t       avg       med
0  -5  0.000011  0.000006
1  -4  0.000010  0.000035
2  -3  0.000008  0.000000
3  -2  0.000027  0.000033
4  -1  0.000017  0.000012
5   0  0.000032  0.000015
6   1  0.000019  0.000018
7   2 -0.000010 -0.000004
8   3 -0.000020 -0.000003
9   4 -0.000016 -0.000007
10  5 -0.000008  0.000000
BOTTOMS — fund (avg):
     t       avg       med
0  -5  0.000063  0.000066
1  -4  0.000060  0.000060
2  -3  0.000056  0.000045
3  -2  0.000062  0.000054
4  -1  0.000042  0.000037
5   0  0.000048  0.000040
6   1  0.000038  0.000045
7   2  0.000034  0.000032
8   3  0.000044  0.000036
9   4  0.000048  0.000039
10  5  0.000049  0.000042

In [27]:
# mark large funding moves using May–Jul percentiles (discovery range)
# your column
df['date'] = pd.to_datetime(df['date'], utc=True).dt.tz_localize(None).dt.normalize()

start = pd.Timestamp('2025-05-01')   # naive
end   = pd.Timestamp('2025-08-31')   # naive

df_slice = df[(df['date'] >= start) & (df['date'] <= end)]
train_mask = (df['date'] > pd.Timestamp('2025-05-01')) & (df['date']< pd.Timestamp('2025-07-31'))
P80 = df.loc[train_mask, 'dFund3'].abs().quantile(0.80)

df['large_dFund3'] = df['dFund3'].abs() >= P80

tab = pd.crosstab(df['is_event'], df['large_dFund3'], normalize='index')
print("Event vs Large |dFund3| (row %):\n", tab)

Event vs Large |dFund3| (row %):
 large_dFund3     False     True 
is_event                        
False         0.809091  0.190909
True          0.615385  0.384615


In [25]:
df.dtypes

Unnamed: 0                         int64
date                 datetime64[ns, UTC]
close_spot                       float64
volume_spot                      float64
close_fut                        float64
volume_fut                       float64
funding_mean                     float64
daily_funding_sum                float64
funding_last                     float64
funding_first                    float64
is_top                              bool
is_bottom                           bool
fwd_ret_1                        float64
fwd_ret_3                        float64
fwd_ret_5                        float64
ret1                             float64
ret3                             float64
absret1                          float64
fund                             float64
dFund1                           float64
dFund3                           float64
flip                                bool
is_event                            bool
dtype: object