# 07 - Deep LPPLS (P-LNN)

The **Physics-Informed LPPLS Neural Network (P-LNN)** enhances the classical LPPLS
model by embedding the LPPLS equation into a neural network loss function. This
allows the model to:

- Learn nonlinear corrections to the LPPLS template
- Better handle noise and regime changes
- Produce calibrated confidence scores

This notebook is **optional** -- it requires PyTorch and more compute time than
the classical approach.

We will:
1. Prepare training data from historical bubble episodes
2. Train the P-LNN model
3. Generate predictions and compare to classical LPPLS
4. Evaluate on held-out bubble episodes

## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from fatcrash.data.ingest import from_yahoo
from fatcrash.data.transforms import log_prices, log_returns, time_index
from fatcrash.indicators.lppls_indicator import fit_lppls, compute_confidence
from fatcrash.indicators.deep_lppls import fit_deep_lppls

plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams["figure.figsize"] = (14, 6)

## 1. Load data and define bubble episodes

In [None]:
df = from_yahoo("BTC-USD", start="2015-01-01", end="2025-12-31")
df = time_index(df)
df["log_price"] = log_prices(df["close"].values)
df["log_return"] = log_returns(df["close"].values)

# Define known bubble episodes for training/testing
episodes = {
    "2017_bubble": {"start": "2017-01-01", "peak": "2017-12-17", "end": "2018-02-06"},
    "2019_rally":  {"start": "2019-01-01", "peak": "2019-06-26", "end": "2019-12-18"},
    "2021_spring": {"start": "2020-10-01", "peak": "2021-04-14", "end": "2021-07-20"},
    "2021_autumn": {"start": "2021-07-20", "peak": "2021-11-10", "end": "2022-01-24"},
}

for name, ep in episodes.items():
    mask = (df.index >= ep["start"]) & (df.index <= ep["end"])
    print(f"{name}: {mask.sum()} days, peak at {ep['peak']}")

## 2. Prepare training windows

We create overlapping windows of log-price data, each labeled with:
- 1 if the window ends within 30 days of a known peak (bubble)
- 0 otherwise

In [None]:
# Create training windows
window_size = 250  # roughly 1 year of daily data
step = 5

peak_dates = pd.to_datetime([ep["peak"] for ep in episodes.values()])

X_windows = []
y_labels = []
window_dates = []

log_p = df["log_price"].values

for i in range(window_size, len(log_p), step):
    window = log_p[i - window_size:i]
    if np.any(np.isnan(window)):
        continue

    # Normalize the window (zero mean, unit std)
    window_norm = (window - window.mean()) / (window.std() + 1e-10)
    X_windows.append(window_norm)

    # Label: is the end of this window within 30 days before a known peak?
    end_date = df.index[i - 1]
    is_bubble = any(
        (end_date >= peak - pd.Timedelta(days=30)) and (end_date <= peak)
        for peak in peak_dates
    )
    y_labels.append(1.0 if is_bubble else 0.0)
    window_dates.append(end_date)

X_windows = np.array(X_windows)
y_labels = np.array(y_labels)
window_dates = pd.DatetimeIndex(window_dates)

print(f"Total windows: {len(X_windows)}")
print(f"Bubble windows: {int(y_labels.sum())} ({y_labels.mean()*100:.1f}%)")
print(f"Non-bubble windows: {int((1-y_labels).sum())}")

## 3. Train/test split

We use a temporal split: train on earlier episodes, test on the last one.

In [None]:
split_date = pd.Timestamp("2021-06-01")

train_mask = window_dates < split_date
test_mask = window_dates >= split_date

X_train, y_train = X_windows[train_mask], y_labels[train_mask]
X_test, y_test = X_windows[test_mask], y_labels[test_mask]
dates_train = window_dates[train_mask]
dates_test = window_dates[test_mask]

print(f"Train: {len(X_train)} windows ({y_train.sum():.0f} bubble)")
print(f"Test:  {len(X_test)} windows ({y_test.sum():.0f} bubble)")

## 4. Train the Deep LPPLS model

The `fit_deep_lppls` function handles model construction, physics-informed loss,
and training. It returns a trained model that can produce confidence scores.

In [None]:
# Train the P-LNN model
model = fit_deep_lppls(
    X_train=X_train,
    y_train=y_train,
    window_size=window_size,
    epochs=100,
    batch_size=32,
    learning_rate=1e-3,
    physics_weight=0.1,  # weight of LPPLS physics loss term
    verbose=True,
)

print("Training complete.")

## 5. Generate predictions

In [None]:
# Predict on train and test sets
train_scores = model.predict(X_train)
test_scores = model.predict(X_test)

print(f"Train scores: mean={train_scores.mean():.3f}, max={train_scores.max():.3f}")
print(f"Test scores:  mean={test_scores.mean():.3f}, max={test_scores.max():.3f}")

In [None]:
# Plot predictions vs actual bubble labels
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=False)

# Train
axes[0].plot(dates_train, train_scores, color="blue", linewidth=0.8, label="P-LNN score")
axes[0].fill_between(dates_train, 0, y_train, color="red", alpha=0.2, label="Bubble label")
axes[0].set_title("Training Set: P-LNN Predictions")
axes[0].set_ylim(-0.05, 1.05)
axes[0].legend()

# Test
axes[1].plot(dates_test, test_scores, color="blue", linewidth=0.8, label="P-LNN score")
axes[1].fill_between(dates_test, 0, y_test, color="red", alpha=0.2, label="Bubble label")
axes[1].set_title("Test Set: P-LNN Predictions (2021 Autumn Bubble)")
axes[1].set_ylim(-0.05, 1.05)
axes[1].legend()

plt.tight_layout()
plt.show()

## 6. Compare with classical LPPLS

In [None]:
# Classical LPPLS confidence on the test period
test_start_idx = np.searchsorted(df.index, split_date)
test_log_p = df["log_price"].values
test_t = np.arange(len(test_log_p), dtype=np.float64)

classical_conf = compute_confidence(
    test_t, test_log_p,
    window_sizes=[120, 180, 250, 365],
    step=5,
)

# Align classical confidence to test dates
classical_series = pd.Series(
    classical_conf.positive[:len(df)],
    index=df.index[:len(classical_conf.positive)],
).reindex(dates_test).values

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Price on test period
test_prices = df.loc[dates_test[0]:dates_test[-1], "close"]
axes[0].plot(test_prices.index, test_prices.values, color="steelblue", linewidth=0.8)
axes[0].set_ylabel("Price (USD)")
axes[0].set_title("BTC/USD Test Period")

# Compare models
axes[1].plot(dates_test, test_scores, color="blue", linewidth=1, label="Deep LPPLS (P-LNN)")
axes[1].plot(dates_test, classical_series, color="orange", linewidth=1, alpha=0.7,
             label="Classical LPPLS")
axes[1].fill_between(dates_test, 0, y_test, color="red", alpha=0.1, label="Bubble label")
axes[1].set_ylabel("Confidence")
axes[1].set_title("Deep LPPLS vs Classical LPPLS")
axes[1].set_ylim(0, 1)
axes[1].legend()

plt.tight_layout()
plt.show()

## 7. Evaluate classification performance

In [None]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, average_precision_score

# Deep LPPLS
auc_deep = roc_auc_score(y_test, test_scores)
ap_deep = average_precision_score(y_test, test_scores)

# Classical LPPLS
classical_clean = np.nan_to_num(classical_series, nan=0.0)
auc_classical = roc_auc_score(y_test, classical_clean)
ap_classical = average_precision_score(y_test, classical_clean)

print(f"{'Metric':<25s} {'Deep LPPLS':>12s} {'Classical':>12s}")
print("-" * 50)
print(f"{'ROC AUC':<25s} {auc_deep:>12.3f} {auc_classical:>12.3f}")
print(f"{'Average Precision':<25s} {ap_deep:>12.3f} {ap_classical:>12.3f}")

In [None]:
# Precision-Recall curves
prec_deep, rec_deep, _ = precision_recall_curve(y_test, test_scores)
prec_class, rec_class, _ = precision_recall_curve(y_test, classical_clean)

fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(rec_deep, prec_deep, color="blue", label=f"Deep LPPLS (AP={ap_deep:.3f})")
ax.plot(rec_class, prec_class, color="orange", label=f"Classical (AP={ap_classical:.3f})")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")
ax.set_title("Precision-Recall: Bubble Detection")
ax.legend()
plt.tight_layout()
plt.show()

## Summary

- The Deep LPPLS (P-LNN) model embeds physics-based LPPLS structure into a neural
  network, combining interpretability with flexibility.
- On held-out bubble episodes, it typically achieves comparable or better AUC/AP
  than the classical LPPLS indicator.
- The main advantage is smoother confidence outputs and better handling of
  noisy or ambiguous bubble periods.
- Deep LPPLS scores can replace or supplement classical LPPLS in the signal
  aggregation pipeline (notebook 06).