# 09 â€” Methods Comparison

Compare all fatcrash methods (Hill, Kappa, EVT, LPPLS) against each other across:
- **Assets**: BTC, SPY, Gold, GBP/USD
- **Time periods**: Crisis vs calm
- **Edge cases**: Gaussian vs Cauchy vs real data

Goal: understand which methods detect what, and where they agree or disagree.

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from fatcrash.data.ingest import from_sample, from_csv
from fatcrash.data.transforms import log_returns, log_prices, time_index, block_maxima
from fatcrash._core import (
    hill_estimator, hill_rolling,
    kappa_metric, kappa_rolling,
    gpd_fit, gpd_var_es, gev_fit,
    lppls_fit,
)

## Load all assets

In [None]:
btc = from_sample('btc')
spy = from_sample('spy')
gold = from_sample('gold')
gbpusd = from_csv('data/sample/gbpusd_daily.csv')

assets = {'BTC': btc, 'SPY': spy, 'Gold': gold, 'GBP/USD': gbpusd}

for name, df in assets.items():
    print(f'{name}: {len(df)} days, {df.index[0].date()} to {df.index[-1].date()}')

## 1. Hill Tail Index Comparison

Lower alpha = fatter tails = more extreme events.

In [None]:
results = []
for name, df in assets.items():
    ret = log_returns(df)
    alpha = hill_estimator(ret)
    results.append({'Asset': name, 'Hill alpha': alpha, 'N': len(ret)})

hill_df = pd.DataFrame(results).sort_values('Hill alpha')
print(hill_df.to_string(index=False))

fig = go.Figure(go.Bar(x=hill_df['Asset'], y=hill_df['Hill alpha'],
                       marker_color=['red' if a < 3 else 'orange' if a < 4 else 'green'
                                     for a in hill_df['Hill alpha']]))
fig.add_hline(y=2, line_dash='dash', line_color='red', annotation_text='alpha=2 (infinite variance)')
fig.add_hline(y=4, line_dash='dash', line_color='green', annotation_text='alpha=4 (thin tail boundary)')
fig.update_layout(template='plotly_dark', title='Hill Tail Index by Asset',
                  yaxis_title='alpha (lower = fatter tails)')
fig.show()

## 2. Kappa Metric Comparison

Kappa < Gaussian benchmark = fatter tails than normal distribution.

In [None]:
kappa_results = []
for name, df in assets.items():
    ret = log_returns(df)
    kappa, benchmark = kappa_metric(ret, n_subsamples=10, n_sims=500)
    kappa_results.append({
        'Asset': name,
        'Kappa': kappa,
        'Gaussian Benchmark': benchmark,
        'Ratio (kappa/bench)': kappa / benchmark,
        'Fat tails?': 'YES' if kappa < benchmark else 'no'
    })

kappa_df = pd.DataFrame(kappa_results)
print(kappa_df.to_string(index=False))

fig = go.Figure()
fig.add_trace(go.Bar(name='Kappa', x=kappa_df['Asset'], y=kappa_df['Kappa']))
fig.add_trace(go.Bar(name='Gaussian Benchmark', x=kappa_df['Asset'], y=kappa_df['Gaussian Benchmark']))
fig.update_layout(template='plotly_dark', title='Kappa vs Gaussian Benchmark',
                  barmode='group', yaxis_title='Kappa')
fig.show()

## 3. EVT: VaR and Expected Shortfall Comparison

In [None]:
evt_results = []
for name, df in assets.items():
    ret = log_returns(df)
    try:
        sigma, xi, threshold, n_exc = gpd_fit(ret, quantile=0.95)
        var99, es99 = gpd_var_es(ret, p=0.99, quantile=0.95)
        var95, es95 = gpd_var_es(ret, p=0.95, quantile=0.95)
        evt_results.append({
            'Asset': name,
            'GPD sigma': f'{sigma:.5f}',
            'GPD xi': f'{xi:.3f}',
            'VaR 95%': f'{var95:.4f}',
            'VaR 99%': f'{var99:.4f}',
            'ES 99%': f'{es99:.4f}',
            'Exceedances': n_exc,
        })
    except Exception as e:
        evt_results.append({'Asset': name, 'Error': str(e)})

evt_df = pd.DataFrame(evt_results)
print(evt_df.to_string(index=False))

In [None]:
# VaR comparison bar chart
var_data = [(r['Asset'], float(r['VaR 99%']), float(r['ES 99%']))
            for r in evt_results if 'VaR 99%' in r]
var_df = pd.DataFrame(var_data, columns=['Asset', 'VaR 99%', 'ES 99%'])

fig = go.Figure()
fig.add_trace(go.Bar(name='VaR 99%', x=var_df['Asset'], y=var_df['VaR 99%']))
fig.add_trace(go.Bar(name='ES 99%', x=var_df['Asset'], y=var_df['ES 99%']))
fig.update_layout(template='plotly_dark', title='Tail Risk: VaR and ES by Asset',
                  barmode='group', yaxis_title='Loss (log returns)')
fig.show()

## 4. GEV: Block Maxima Shape Parameter

xi > 0 = Frechet (fat tail), xi ~ 0 = Gumbel, xi < 0 = bounded tail.

In [None]:
gev_results = []
for name, df in assets.items():
    ret = log_returns(df)
    bm = block_maxima(ret, block_size=21)
    mu, sigma, xi = gev_fit(bm)
    gev_results.append({'Asset': name, 'mu': mu, 'sigma': sigma, 'xi': xi,
                        'Tail type': 'Frechet (fat)' if xi > 0.05 else 'Gumbel' if xi > -0.05 else 'Weibull (bounded)'})

gev_df = pd.DataFrame(gev_results)
print(gev_df.to_string(index=False))

## 5. Rolling Hill Alpha: How Tails Evolve Over Time

In [None]:
fig = make_subplots(rows=2, cols=2, subplot_titles=['BTC', 'SPY', 'Gold', 'GBP/USD'],
                    shared_yaxes=True)

for i, (name, df) in enumerate(assets.items()):
    ret = log_returns(df)
    alpha_rolling = np.asarray(hill_rolling(ret, window=252))
    dates = df.index[1:]  # returns are 1 shorter
    row, col = divmod(i, 2)
    fig.add_trace(go.Scatter(x=dates, y=alpha_rolling, name=name,
                             line=dict(width=1)), row=row+1, col=col+1)
    fig.add_hline(y=2, line_dash='dash', line_color='red', row=row+1, col=col+1)

fig.update_layout(template='plotly_dark', title='Rolling Hill Alpha (252-day window)',
                  height=600, showlegend=False)
fig.update_yaxes(title_text='alpha', range=[0, 8])
fig.show()

## 6. Crisis Period Deep Dive: 2017 BTC Bubble

Compare all indicators on the same period.

In [None]:
btc_2017 = btc.loc['2017-01-01':'2018-03-01']
ret_2017 = log_returns(btc_2017)
dates_2017 = btc_2017.index[1:]

# Rolling indicators
hill_2017 = np.asarray(hill_rolling(ret_2017, window=60))
kappa_2017, kbench = kappa_rolling(ret_2017, window=60, n_subsamples=5, n_sims=50)
kappa_2017 = np.asarray(kappa_2017)

fig = make_subplots(rows=3, cols=1, shared_xaxes=True,
                    subplot_titles=['BTC Price', 'Rolling Hill Alpha (60d)', 'Rolling Kappa (60d)'],
                    vertical_spacing=0.08)

fig.add_trace(go.Scatter(x=btc_2017.index, y=btc_2017['close'], name='Price',
                         line=dict(color='white')), row=1, col=1)
fig.add_trace(go.Scatter(x=dates_2017, y=hill_2017, name='Hill alpha',
                         line=dict(color='cyan')), row=2, col=1)
fig.add_hline(y=2, line_dash='dash', line_color='red', row=2, col=1)
fig.add_trace(go.Scatter(x=dates_2017, y=kappa_2017, name='Kappa',
                         line=dict(color='magenta')), row=3, col=1)
fig.add_hline(y=kbench, line_dash='dash', line_color='yellow', row=3, col=1,
              annotation_text='Gaussian')

# Mark the crash
for r in range(1, 4):
    fig.add_vline(x='2017-12-17', line_dash='dash', line_color='red', row=r, col=1)

fig.update_layout(template='plotly_dark', title='2017 BTC Bubble: All Indicators',
                  height=700)
fig.show()

## 7. Method Agreement Matrix

For each asset, do all methods agree on the tail characterization?

In [None]:
agreement = []
for name, df in assets.items():
    ret = log_returns(df)
    
    # Hill: fat tail if alpha < 4
    alpha = hill_estimator(ret)
    hill_fat = alpha < 4
    
    # Kappa: fat tail if below benchmark
    kappa, bench = kappa_metric(ret, n_subsamples=10, n_sims=200)
    kappa_fat = kappa < bench
    
    # GPD xi: fat tail if xi > 0
    try:
        _, xi, _, _ = gpd_fit(ret, quantile=0.95)
        gpd_fat = xi > 0
    except:
        gpd_fat = None
    
    # GEV xi: fat tail if xi > 0
    bm = block_maxima(ret, block_size=21)
    _, _, gev_xi = gev_fit(bm)
    gev_fat = gev_xi > 0
    
    methods_agree = sum([hill_fat, kappa_fat, gpd_fat or False, gev_fat])
    
    agreement.append({
        'Asset': name,
        'Hill (alpha<4)': 'FAT' if hill_fat else 'thin',
        'Kappa (<bench)': 'FAT' if kappa_fat else 'thin',
        'GPD (xi>0)': 'FAT' if gpd_fat else ('thin' if gpd_fat is not None else 'N/A'),
        'GEV (xi>0)': 'FAT' if gev_fat else 'thin',
        'Agreement': f'{methods_agree}/4',
    })

agree_df = pd.DataFrame(agreement)
print(agree_df.to_string(index=False))

## 8. Edge Case Comparison: Known Distributions

In [None]:
rng = np.random.default_rng(42)

distributions = {
    'Gaussian (alpha=inf)': rng.standard_normal(5000),
    'Student-t df=3 (alpha=3)': rng.standard_t(3, 5000),
    'Cauchy (alpha=1)': rng.standard_cauchy(5000),
    'Pareto (alpha=2)': rng.pareto(2, 5000),
}

dist_results = []
for name, data in distributions.items():
    alpha = hill_estimator(data)
    kappa, bench = kappa_metric(data, n_subsamples=10, n_sims=200)
    dist_results.append({
        'Distribution': name,
        'Hill alpha': f'{alpha:.2f}',
        'Kappa': f'{kappa:.3f}',
        'Benchmark': f'{bench:.3f}',
        'Kappa/Bench': f'{kappa/bench:.2f}',
    })

dist_df = pd.DataFrame(dist_results)
print(dist_df.to_string(index=False))

## 9. Summary: Which method detects what?

| Method | Best for | Limitation |
|--------|----------|------------|
| **Hill alpha** | Measuring tail heaviness | Sensitive to k choice |
| **Kappa** | Comparing to Gaussian benchmark | Needs enough data |
| **GPD/EVT** | VaR and ES estimation | Threshold selection matters |
| **GEV** | Block maxima characterization | Block size affects results |
| **LPPLS** | Bubble timing (predicting tc) | Only works in bubble regime |