# 05 - Multiscale Detection

A single timeframe can give misleading signals. By running indicators at
**multiple scales** (daily, 3-day, weekly) and measuring their agreement,
we obtain a more robust crash probability.

This notebook:
1. Computes LPPLS, Hill, and EVT indicators at daily, 3-day, and weekly scales
2. Measures multiscale agreement
3. Shows how agreement strengthens before major crashes

## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from fatcrash.data.ingest import from_yahoo
from fatcrash.data.transforms import (
    log_returns, log_prices, time_index, resample_ohlcv, negative_returns,
)
from fatcrash.indicators.lppls_indicator import compute_confidence
from fatcrash.indicators.tail_indicator import rolling_tail_index, rolling_kappa
from fatcrash.indicators.evt_indicator import rolling_var_es
from fatcrash.aggregator.signals import multiscale_signals

plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams["figure.figsize"] = (14, 5)

## 1. Load and resample data

In [None]:
df = from_yahoo("BTC-USD", start="2015-01-01", end="2025-12-31")
df = time_index(df)

# Create three timeframes
df_daily = df.copy()
df_daily["log_return"] = log_returns(df_daily["close"].values)
df_daily["log_price"] = log_prices(df_daily["close"].values)

df_3day = resample_ohlcv(df, freq="3D")
df_3day["log_return"] = log_returns(df_3day["close"].values)
df_3day["log_price"] = log_prices(df_3day["close"].values)

df_weekly = resample_ohlcv(df, freq="W")
df_weekly["log_return"] = log_returns(df_weekly["close"].values)
df_weekly["log_price"] = log_prices(df_weekly["close"].values)

timeframes = {
    "daily": df_daily,
    "3-day": df_3day,
    "weekly": df_weekly,
}

for name, frame in timeframes.items():
    print(f"{name:>8s}: {len(frame)} observations")

## 2. Compute rolling Hill tail index at each scale

In [None]:
hill_results = {}

# Window sizes adjusted for each timeframe
window_map = {"daily": 500, "3-day": 170, "weekly": 75}

for name, frame in timeframes.items():
    rets = frame["log_return"].dropna().values
    window = window_map[name]
    result = rolling_tail_index(returns=rets, window=window, k_fraction=0.05, step=3)
    hill_results[name] = pd.Series(
        result.values,
        index=frame.index[result.indices][:len(result.values)],
        name=f"alpha_{name}",
    )
    print(f"{name}: {len(hill_results[name])} rolling estimates")

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df.index, df["close"], color="steelblue", linewidth=0.8)
axes[0].set_yscale("log")
axes[0].set_ylabel("Price (USD)")
axes[0].set_title("BTC/USD Price")

colors = {"daily": "blue", "3-day": "orange", "weekly": "green"}
for name, series in hill_results.items():
    axes[1].plot(series.index, series.values, color=colors[name],
                 linewidth=0.8, alpha=0.8, label=name)

axes[1].axhline(2, color="red", linestyle="--", alpha=0.5, label="alpha=2")
axes[1].set_ylabel("Tail Index (alpha)")
axes[1].set_title("Rolling Hill Estimator: Multiscale Comparison")
axes[1].set_ylim(0, 6)
axes[1].legend()

plt.tight_layout()
plt.show()

## 3. Compute LPPLS confidence at each scale

In [None]:
lppls_results = {}

# Window sizes for LPPLS at each scale (in data points, not calendar days)
lppls_window_map = {
    "daily": [120, 180, 250, 365],
    "3-day": [40, 60, 85, 120],
    "weekly": [17, 26, 36, 52],
}

for name, frame in timeframes.items():
    lp = frame["log_price"].dropna().values
    t = np.arange(len(lp), dtype=np.float64)
    conf = compute_confidence(
        t, lp,
        window_sizes=lppls_window_map[name],
        step=5,
    )
    lppls_results[name] = pd.Series(
        conf.positive[:len(frame)],
        index=frame.index[:len(conf.positive)],
        name=f"lppls_{name}",
    )
    print(f"{name}: max LPPLS confidence = {lppls_results[name].max():.3f}")

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df.index, df["close"], color="steelblue", linewidth=0.8)
axes[0].set_yscale("log")
axes[0].set_ylabel("Price (USD)")
axes[0].set_title("BTC/USD Price")

for name, series in lppls_results.items():
    axes[1].plot(series.index, series.values, color=colors[name],
                 linewidth=0.8, alpha=0.7, label=name)

axes[1].axhline(0.5, color="red", linestyle="--", alpha=0.5)
axes[1].set_ylabel("LPPLS Confidence")
axes[1].set_title("LPPLS Positive Confidence: Multiscale")
axes[1].set_ylim(0, 1)
axes[1].legend()

plt.tight_layout()
plt.show()

## 4. Multiscale agreement

When multiple timeframes simultaneously show elevated risk signals, the probability
of a genuine crash is higher than any single-scale signal alone.

In [None]:
# Use the fatcrash aggregator to compute multiscale agreement
ms_signals = multiscale_signals(
    daily_returns=df_daily["log_return"].dropna().values,
    daily_log_prices=df_daily["log_price"].dropna().values,
    daily_dates=df_daily.index,
    resample_freqs=["3D", "W"],
)

# The result contains an agreement score on the common (daily) time axis
ms_df = pd.DataFrame({
    "date": ms_signals.dates,
    "agreement": ms_signals.agreement,
}).set_index("date")

print(f"Multiscale agreement range: [{ms_df['agreement'].min():.3f}, {ms_df['agreement'].max():.3f}]")

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df.index, df["close"], color="steelblue", linewidth=0.8)
axes[0].set_yscale("log")
axes[0].set_ylabel("Price (USD)")
axes[0].set_title("BTC/USD Price")

# Color-code the agreement
axes[1].fill_between(
    ms_df.index, 0, ms_df["agreement"],
    where=ms_df["agreement"] > 0.5,
    color="red", alpha=0.4, label="High agreement (> 0.5)",
)
axes[1].fill_between(
    ms_df.index, 0, ms_df["agreement"],
    where=ms_df["agreement"] <= 0.5,
    color="gray", alpha=0.3, label="Low agreement",
)
axes[1].set_ylabel("Agreement Score")
axes[1].set_title("Multiscale Signal Agreement")
axes[1].set_ylim(0, 1)
axes[1].legend()

plt.tight_layout()
plt.show()

## 5. Manual cross-scale comparison table

Align all indicators on a common weekly grid and inspect periods of high agreement.

In [None]:
# Resample all signals to weekly for a clean comparison
comparison = pd.DataFrame(index=df_weekly.index)

for name, series in hill_results.items():
    weekly_aligned = series.resample("W").last()
    comparison[f"hill_{name}"] = weekly_aligned

for name, series in lppls_results.items():
    weekly_aligned = series.resample("W").last()
    comparison[f"lppls_{name}"] = weekly_aligned

comparison = comparison.dropna(how="all")

# Show periods where at least 2 LPPLS timeframes show confidence > 0.3
lppls_cols = [c for c in comparison.columns if c.startswith("lppls_")]
high_alert = comparison[(comparison[lppls_cols] > 0.3).sum(axis=1) >= 2]

print(f"Weeks with multi-timeframe LPPLS alert: {len(high_alert)}")
high_alert[lppls_cols].tail(20)

## Summary

- Indicators computed at multiple scales (daily, 3-day, weekly) provide independent
  views of crash risk.
- **Cross-scale agreement** is a powerful filter: when all scales show elevated risk,
  the signal is much more reliable.
- The `multiscale_signals` function from the aggregator automates this alignment.
- This multiscale agreement feeds into the composite crash signal (notebook 06).