<a href="https://colab.research.google.com/github/viktor-mashalov/Same-Day-SPX-S-R-Directional-Prediction/blob/main/Same_Day_SPX_S_R_%26_Directional_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np

# ───────────────────────────────────────────────────────────────────────────────
# 1. DOWNLOAD MARKET & VOLATILITY DATA
#    - ^GSPC: S&P 500 index OHLCV
#    - ^VIX : 1-month implied volatility
#    - ^VXV : 3-month implied volatility
# ───────────────────────────────────────────────────────────────────────────────
symbol, start, end = '^GSPC', '2014-01-01', '2026-05-18'
# Historical price & volume for SPX
spx   = yf.download(symbol, start=start, end=end, progress=False)
# VIX closing price series
vix   = yf.download('^VIX', start=start, end=end, progress=False)['Close']
# 3-month VIX closing price series
vix3m = yf.download('^VXV', start=start, end=end, progress=False)['Close']

# ───────────────────────────────────────────────────────────────────────────────
# 2. BUILD PRIMARY DATAFRAME
#    - Keep OHLCV from SPX
#    - Add VIX & 3m VIX as columns
#    - Ensure datetime index
# ───────────────────────────────────────────────────────────────────────────────
df = spx[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
df.columns = ['open', 'high', 'low', 'close', 'volume']
df['vix']   = vix            # previous-day VIX
df['vix3m'] = vix3m          # previous-day 3m VIX
df.index    = pd.to_datetime(df.index)

# ───────────────────────────────────────────────────────────────────────────────
# 3. BASIC SERIES CALCULATIONS
#    - prev_close : Yesterday’s close
#    - ret_open   : % return from open→close
#    - gap        : Overnight gap % from prev_close→open
# ───────────────────────────────────────────────────────────────────────────────
df['prev_close'] = df['close'].shift(1)
df['ret_open']   = df['close'] / df['open'] - 1
df['gap']        = df['open'] / df['prev_close'] - 1

# ───────────────────────────────────────────────────────────────────────────────
# 4. ATR & VOLATILITY ESTIMATES
#    - True range components (prior day)
#    - 14-day ATR (rolling mean)
#    - ATR%: ATR normalized by prior close
# ───────────────────────────────────────────────────────────────────────────────
high_low = df['high'].shift(1) - df['low'].shift(1)
high_pc  = (df['high'].shift(1) - df['prev_close']).abs()
low_pc   = (df['low'].shift(1)  - df['prev_close']).abs()
# True range = max of the three range measures
df['tr']      = pd.concat([high_low, high_pc, low_pc], axis=1).max(axis=1)
df['atr14']   = df['tr'].rolling(14).mean()             # average true range
df['atr_pct'] = df['atr14'] / df['prev_close']          # ATR as % of price

# ───────────────────────────────────────────────────────────────────────────────
# 5. IMPLIED VOLATILITY ESTIMATE
#    - Convert VIX % to one-day σ: (VIX/100) / √252
# ───────────────────────────────────────────────────────────────────────────────
df['imp_vol_vix'] = df['vix'].shift(1) / 100 / np.sqrt(252)

# ───────────────────────────────────────────────────────────────────────────────
# 6. DYNAMIC Z-SCORE BASED ON GAP MAGNITUDE
#    - base_z = 1.645 (~95% confidence)
#    - gap_factor scales from 1→2 as |gap| grows from 0→0.5%
# ───────────────────────────────────────────────────────────────────────────────
base_z             = 1.645
df['gap_factor']   = np.minimum(2, 1 + df['gap'].abs() / 0.005)
df['z_score']      = base_z * df['gap_factor']

# ───────────────────────────────────────────────────────────────────────────────
# 7. COMBINE VOLATILITY ESTIMATES
#    - Use the larger of implied (VIX) vs. realised (ATR%)
# ───────────────────────────────────────────────────────────────────────────────
df['vol_est'] = np.maximum(df['imp_vol_vix'], df['atr_pct'])

# ───────────────────────────────────────────────────────────────────────────────
# 8. CALCULATE DAILY LEVEL PCTs & SIGNALS
#    - max_pct: absolute cap on intraday level (0.4%)
#    - w_vol, w_gap: weights for vol estimate vs. gap size
#    - Determine support vs. resistance based on gap sign
#    - held: did the close remain on the correct side?
# ───────────────────────────────────────────────────────────────────────────────
max_pct = 0.004
w_vol, w_gap = 0.7, 0.3

levels, sides, held = [], [], []

for _, row in df.iterrows():
    pc, gap, z, vol = row['prev_close'], row['gap'], row['z_score'], row['vol_est']
    # skip if any input is NaN
    if np.isnan(pc) or np.isnan(gap) or np.isnan(z) or np.isnan(vol):
        levels.append(np.nan)
        sides.append(None)
        held.append(np.nan)
        continue

    # decide side: support if gap≥0, else resistance
    side = 'support' if gap >= 0 else 'resistance'
    sign = -1 if side == 'support' else 1

    raw_pct      = z * vol                               # z × vol estimate
    combined_pct = w_vol * raw_pct + w_gap * abs(gap)    # blend with gap size
    pct          = sign * min(max_pct, combined_pct)     # cap magnitude

    levels.append(pct)
    sides.append(side)

    # determine if the level "held" by the close
    ret = row['ret_open']
    if side == 'support':
        held_flag = (ret >= pct)   # price did not close below support
    else:
        held_flag = (ret <= pct)   # price did not close above resistance
    held.append(held_flag)

df['level_pct'] = levels
df['side']      = sides
df['held']      = held

# ───────────────────────────────────────────────────────────────────────────────
# 9. INTRADAY LEVEL PRICE & FILL LOGIC
#    - level_price: open × (1 + level_pct)
#    - filled: was high/low touching the level?
#    - success: filled AND held by close
# ───────────────────────────────────────────────────────────────────────────────
df['level_price'] = df['open'] * (1 + df['level_pct'])
df['filled']      = (
    ((df['side'] == 'support') & (df['low'] <= df['level_price'])) |
    ((df['side'] == 'resistance') & (df['high'] >= df['level_price']))
)
df['success']     = df['filled'] & (
    ((df['side'] == 'support') & (df['close'] >= df['level_price'])) |
    ((df['side'] == 'resistance') & (df['close'] <= df['level_price']))
)

# ───────────────────────────────────────────────────────────────────────────────
# 10. ACCURACY & PERFORMANCE METRICS
#    - overall accuracy
#    - accuracy when |level_pct|≤0.5%
#    - fill accuracy & rate
#    - baseline comparison (|ret_open|≤0.4%)
#    - directional correctness on support/resistance days
# ───────────────────────────────────────────────────────────────────────────────
# valid predictions where held is True/False
mask_pred = df['held'].notna()
valid     = df.loc[mask_pred, 'held'].astype(int)
accuracy  = valid.sum() / len(valid)

# narrow-level subset
small_mask = df['level_pct'].abs() <= 0.005
acc_small  = df.loc[small_mask, 'held'].astype(int).sum() / small_mask.sum()

# fill metrics
fill_mask = df['filled']
fill_acc  = df.loc[fill_mask, 'success'].astype(int).sum() / fill_mask.sum()
fill_rate = df.loc[mask_pred, 'filled'].sum() / mask_pred.sum()

# baseline (±0.4% range)
baseline_hits = (df['ret_open'].abs() <= 0.004) & mask_pred
baseline_acc  = baseline_hits.sum() / mask_pred.sum()
edge          = accuracy - baseline_acc

# directional correctness
support_mask      = mask_pred & (df['side'] == 'support')
resistance_mask   = mask_pred & (df['side'] == 'resistance')
support_corr      = (df.loc[support_mask, 'close'] > df.loc[support_mask, 'open']).sum() / support_mask.sum()
resistance_corr   = (df.loc[resistance_mask, 'close'] < df.loc[resistance_mask, 'open']).sum() / resistance_mask.sum()

# ───────────────────────────────────────────────────────────────────────────────
# 11. PRINT RESULTS
# ───────────────────────────────────────────────────────────────────────────────
print(f"Model accuracy:            {accuracy:.2%}")
print(f"Accuracy |pct|≤0.5%:        {acc_small:.2%}")
print(f"Fill accuracy:             {fill_acc:.2%}")
print(f"Fill rate:                 {fill_rate:.2%}")
print(f"\nBaseline accuracy (|ret_open|≤0.4%): {baseline_acc:.2%}")
print(f"Edge over baseline:                {edge:.2%}")
print(f"\nDirectional on support days (close>open):    {support_corr:.2%}")
print(f"Directional on resistance days (close<open): {resistance_corr:.2%}")

# ───────────────────────────────────────────────────────────────────────────────
# 12. SHOW MOST RECENT SIGNAL & LAST TRADES
# ───────────────────────────────────────────────────────────────────────────────
df_trades = df.dropna(subset=['held'])
recent    = df_trades.iloc[-1]
print("\nMost recent trade:")
print(f"Date:         {recent.name.date()}")
print(f" Side:        {recent.side}")
print(f" Level %:     {recent.level_pct:+.4%}")
print(f" Level price: {recent.level_price:.2f}")
print(f" Open:        {recent.open:.2f}")
print(f" Close:       {recent.close:.2f}")
print(f" Held:        {recent.held}")

print("\nLast 10 days:")
print("Date       | Side     | Level %  | LevelPrice |   Open   |  Close  | Held")
print("-----------|----------|----------|------------|----------|---------|-----")
for idx, r in df_trades.tail(10).iterrows():
    print(f"{idx.date()} | {r.side:<8} | {r.level_pct:+8.4%} |"
          f" {r.level_price:10.2f} | {r.open:8.2f} | {r.close:7.2f} | {r.held}")

Model accuracy:            81.19%
Accuracy |pct|≤0.5%:        81.19%
Fill accuracy:             49.53%
Fill rate:                 37.26%

Baseline accuracy (|ret_open|≤0.4%): 49.09%
Edge over baseline:                32.10%

Directional on support days (close>open):    60.02%
Directional on resistance days (close<open): 54.48%

Most recent trade:
Date:         2025-06-13
 Side:        resistance
 Level %:     +0.4000%
 Level price: 6024.56
 Open:        6000.56
 Close:       5976.97
 Held:        True

Last 10 days:
Date       | Side     | Level %  | LevelPrice |   Open   |  Close  | Held
-----------|----------|----------|------------|----------|---------|-----
2025-06-02 | resistance | +0.4000% |    5920.27 |  5896.68 | 5935.94 | False
2025-06-03 | support  | -0.4000% |    5914.81 |  5938.56 | 5970.37 | True
2025-06-04 | support  | -0.4000% |    5955.02 |  5978.94 | 5970.81 | True
2025-06-05 | support  | -0.4000% |    5961.73 |  5985.67 | 5939.30 | False
2025-06-06 | support  | -0.400