In [117]:
# --- Imports & versions (PyTorch edition) ---
import os, json, math, pathlib, hashlib, random, warnings, re
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd

# sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    roc_auc_score, brier_score_loss, log_loss,
    accuracy_score, precision_score, recall_score, roc_curve
)
from sklearn.calibration import calibration_curve

# plotting
import matplotlib.pyplot as plt

# persistence
import joblib

# PyTorch
try:
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader
    TORCH_OK = True
except Exception as e:
    TORCH_OK = False
    print("PyTorch not available in this env. Install torch and restart the kernel.\n", e)

print("Versions:")
print(" - python:",  "{}.{}.{}".format(*__import__('sys').version_info[:3]))
print(" - numpy:", np.__version__)
print(" - pandas:", pd.__version__)
print(" - sklearn:", __import__('sklearn').__version__)
print(" - matplotlib:", plt.matplotlib.__version__)
if TORCH_OK:
    print(" - torch:", torch.__version__)


Versions:
 - python: 3.13.7
 - numpy: 2.3.3
 - pandas: 2.3.2
 - sklearn: 1.7.2
 - matplotlib: 3.10.6
 - torch: 2.8.0+cpu


In [118]:
# --- Config, folders, seeds, device ---
SEED = 42
LOOKBACK = 60     # timesteps per window
HORIZON  = 1      # predict t+1 direction
STRIDE   = 1

TRAIN_FRAC = 0.70
VAL_FRAC   = 0.15   # TEST_FRAC = 0.15

BATCH_SIZE = 256
EPOCHS     = 100
LR         = 1e-3
PATIENCE   = 10      # early stopping patience
EPS_IMPROVE = 1e-5   # minimum AUC improvement to count

DATA_DIR = Path("data")
ART_DIR  = Path("artifacts")
FIG_DIR  = Path("reports/figures")
for p in [DATA_DIR, ART_DIR, FIG_DIR]:
    p.mkdir(parents=True, exist_ok=True)

random.seed(SEED)
np.random.seed(SEED)
if TORCH_OK:
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    try:
        torch.use_deterministic_algorithms(True)
    except Exception:
        pass
    import torch.backends.cudnn as cudnn
    cudnn.deterministic = True
    cudnn.benchmark = False

device = torch.device("cuda" if TORCH_OK and torch.cuda.is_available() else "cpu")
print(f"SEED={SEED}, LOOKBACK={LOOKBACK}, HORIZON={HORIZON}, STRIDE={STRIDE}, "
      f"BATCH_SIZE={BATCH_SIZE}, EPOCHS={EPOCHS}, LR={LR}, device={device}")


SEED=42, LOOKBACK=60, HORIZON=1, STRIDE=1, BATCH_SIZE=256, EPOCHS=100, LR=0.001, device=cpu


In [119]:
# --- Load Phase-2 dataset & feature list (auto-bootstrap if missing) ---
df_path_parquet = DATA_DIR / "df_nb02.parquet"
df_path_csv     = DATA_DIR / "df_nb02.csv"
features_path   = DATA_DIR / "feature_list.json"

# 1) Load dataset
if df_path_parquet.exists():
    df = pd.read_parquet(df_path_parquet); source = "parquet"
elif df_path_csv.exists():
    df = pd.read_csv(df_path_csv); source = "csv"
else:
    raise FileNotFoundError("Missing data/df_nb02.parquet or data/df_nb02.csv (run Phase-2 first).")

# 2) Date column
dt_col = next((c for c in ["date","Date","timestamp","Timestamp","ts","datetime","Datetime"] if c in df.columns), None)
if dt_col is None:
    for c in df.columns:
        try:
            pd.to_datetime(df[c].head(50), errors="raise"); dt_col = c; break
        except Exception:
            pass
if dt_col is None:
    raise KeyError("No datetime column found; please rename your time column to 'date'.")

df[dt_col] = pd.to_datetime(df[dt_col], errors="coerce")
if df[dt_col].isna().all():
    raise ValueError(f"Could not parse datetime column '{dt_col}'.")
df = df.sort_values(dt_col).reset_index(drop=True)

# 3) Label must exist
if "y" not in df.columns:
    raise KeyError("Label column 'y' not found in dataset.")

# 4) Features: load or create (numeric & non-leaky)
if features_path.exists():
    with open(features_path, "r") as f:
        features = json.load(f)
    features = [c for c in features if c in df.columns]
else:
    num_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    leak_re = re.compile(r"(next|t\+?1|lead|future|target|label)", re.IGNORECASE)
    features = [c for c in num_cols if c not in {dt_col, "y"} and not leak_re.search(c)]
    features.sort()
    with open(features_path, "w") as f:
        json.dump(features, f, indent=2)
    print(f"[created] {features_path} with {len(features)} features")

if not features:
    raise RuntimeError("Feature list is empty after reconciliation.")

# 5) Report
print(f"Loaded df_nb02 from {source}: shape={df.shape}, "
      f"date_range=[{df[dt_col].min().date()} → {df[dt_col].max().date()}]")
print("NA total:", int(df.isna().sum().sum()))
print("Duplicate dates:", int(df.duplicated(dt_col).sum()))
print("First 5 features:", features[:5], f"(total: {len(features)})")


Loaded df_nb02 from parquet: shape=(2686, 22), date_range=[2015-02-06 → 2025-10-10]
NA total: 0
Duplicate dates: 0
First 5 features: ['close', 'high', 'low', 'macd', 'macd_signal'] (total: 15)


In [120]:
# --- Chronological splits ---
N = len(df)
train_end = int(N * TRAIN_FRAC)
val_end   = int(N * (TRAIN_FRAC + VAL_FRAC))

df_train = df.iloc[:train_end].copy()
df_val   = df.iloc[train_end:val_end].copy()
df_test  = df.iloc[val_end:].copy()

print("Split rows (pre-window):",
      "train", len(df_train), "val", len(df_val), "test", len(df_test))
print("Date ranges:")
print("  train:", df_train[dt_col].min().date(), "→", df_train[dt_col].max().date())
print("  val  :", df_val[dt_col].min().date(),   "→", df_val[dt_col].max().date())
print("  test :", df_test[dt_col].min().date(),  "→", df_test[dt_col].max().date())

assert df_train[dt_col].max() < df_val[dt_col].min(), "Train/Val dates overlap"
assert df_val[dt_col].max()   < df_test[dt_col].min(), "Val/Test dates overlap"

split_info = {
    "SEED": SEED,
    "LOOKBACK": LOOKBACK, "HORIZON": HORIZON, "STRIDE": STRIDE,
    "TRAIN_FRAC": TRAIN_FRAC, "VAL_FRAC": VAL_FRAC,
    "train_end_idx": train_end, "val_end_idx": val_end,
    "train_date_range": [str(df_train[dt_col].min().date()), str(df_train[dt_col].max().date())],
    "val_date_range":   [str(df_val[dt_col].min().date()),   str(df_val[dt_col].max().date())],
    "test_date_range":  [str(df_test[dt_col].min().date()),  str(df_test[dt_col].max().date())],
}
with open(ART_DIR / "nb3_split_info.json", "w") as f:
    json.dump(split_info, f, indent=2)
print("Saved:", ART_DIR / "nb3_split_info.json")


Split rows (pre-window): train 1880 val 403 test 403
Date ranges:
  train: 2015-02-06 → 2022-07-26
  val  : 2022-07-27 → 2024-03-04
  test : 2024-03-05 → 2025-10-10
Saved: artifacts\nb3_split_info.json


In [121]:
# --- Scaling (train only) ---
X_train_raw = df_train[features].values.astype(np.float32)
X_val_raw   = df_val[features].values.astype(np.float32)
X_test_raw  = df_test[features].values.astype(np.float32)

y_train_all = df_train["y"].astype(int).values
y_val_all   = df_val["y"].astype(int).values
y_test_all  = df_test["y"].astype(int).values

scaler = StandardScaler().fit(X_train_raw)  # fit ONLY on train
X_train_s = scaler.transform(X_train_raw)
X_val_s   = scaler.transform(X_val_raw)
X_test_s  = scaler.transform(X_test_raw)

joblib.dump(scaler, ART_DIR / "lstm_scaler.joblib")
print("Scaler saved →", ART_DIR / "lstm_scaler.joblib")
print("scaler.mean_ (first 5):", scaler.mean_[:5])


Scaler saved → artifacts\lstm_scaler.joblib
scaler.mean_ (first 5): [66.44055386 67.14243458 65.67384116  0.43824461  0.43467065]


In [122]:
# --- Windowing ---
def make_windows(X, y, dates, L, H, stride=1):
    """
    Sliding windows of length L predicting label at t+H.
    Returns Xw (N,L,F), yw (N,), window end dates, and target dates.
    """
    Xw, yw, end_dates, target_dates = [], [], [], []
    N = len(X)
    for start in range(0, N - L - (H - 1), stride):
        end = start + L
        tgt = end + (H - 1)
        if tgt >= N:
            break
        Xw.append(X[start:end])
        yw.append(y[tgt])
        end_dates.append(dates[end - 1])
        target_dates.append(dates[tgt])
    return np.array(Xw, np.float32), np.array(yw, np.int32), np.array(end_dates), np.array(target_dates)

d_tr, d_va, d_te = df_train[dt_col].values, df_val[dt_col].values, df_test[dt_col].values
X_tr_w, y_tr_w, end_tr, tgt_tr = make_windows(X_train_s, y_train_all, d_tr, LOOKBACK, HORIZON, STRIDE)
X_va_w, y_va_w, end_va, tgt_va = make_windows(X_val_s,   y_val_all,   d_va, LOOKBACK, HORIZON, STRIDE)
X_te_w, y_te_w, end_te, tgt_te = make_windows(X_test_s,  y_test_all,  d_te, LOOKBACK, HORIZON, STRIDE)

print(f"Windows created (L={LOOKBACK}, H={HORIZON}, stride={STRIDE})")
print("  train:", X_tr_w.shape, y_tr_w.shape)
print("  val  :", X_va_w.shape, y_va_w.shape)
print("  test :", X_te_w.shape, y_te_w.shape)

def show_alignment(end_dates, target_dates, k=3, label="train"):
    k = min(k, len(end_dates))
    print(f"Alignment samples ({label}):")
    for i in np.linspace(0, len(end_dates)-1, k, dtype=int):
        print(f"  window_end={pd.to_datetime(str(end_dates[i])).date()} → "
              f"target={pd.to_datetime(str(target_dates[i])).date()} (OK)")

show_alignment(end_tr, tgt_tr, 3, "train")
show_alignment(end_va, tgt_va, 3, "val")
show_alignment(end_te, tgt_te, 3, "test")

print("Target date overlaps (should be 0): "
      "train∩val=", np.isin(tgt_tr, tgt_va).sum(),
      " val∩test=", np.isin(tgt_va, tgt_te).sum(),
      " train∩test=", np.isin(tgt_tr, tgt_te).sum())

np.save(ART_DIR / "train_target_dates.npy", tgt_tr)
np.save(ART_DIR / "val_target_dates.npy",   tgt_va)
np.save(ART_DIR / "test_target_dates.npy",  tgt_te)
print("Saved target dates to artifacts/.")


Windows created (L=60, H=1, stride=1)
  train: (1820, 60, 15) (1820,)
  val  : (343, 60, 15) (343,)
  test : (343, 60, 15) (343,)
Alignment samples (train):
  window_end=2015-05-04 → target=2015-05-05 (OK)
  window_end=2018-12-11 → target=2018-12-12 (OK)
  window_end=2022-07-25 → target=2022-07-26 (OK)
Alignment samples (val):
  window_end=2022-10-19 → target=2022-10-20 (OK)
  window_end=2023-06-27 → target=2023-06-28 (OK)
  window_end=2024-03-01 → target=2024-03-04 (OK)
Alignment samples (test):
  window_end=2024-05-29 → target=2024-05-30 (OK)
  window_end=2025-02-04 → target=2025-02-05 (OK)
  window_end=2025-10-09 → target=2025-10-10 (OK)
Target date overlaps (should be 0): train∩val= 0  val∩test= 0  train∩test= 0
Saved target dates to artifacts/.


In [123]:
# --- PyTorch Dataset & DataLoaders ---
if not TORCH_OK:
    raise RuntimeError("PyTorch not available; install torch and restart the kernel.")

class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y.astype(np.float32))
    def __len__(self):
        return self.X.shape[0]
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

ds_tr = SeqDataset(X_tr_w, y_tr_w)
ds_va = SeqDataset(X_va_w, y_va_w)
ds_te = SeqDataset(X_te_w, y_te_w)

dl_tr = DataLoader(ds_tr, batch_size=BATCH_SIZE, shuffle=True,  drop_last=False)
dl_va = DataLoader(ds_va, batch_size=BATCH_SIZE, shuffle=False, drop_last=False)
dl_te = DataLoader(ds_te, batch_size=BATCH_SIZE, shuffle=False, drop_last=False)

F = X_tr_w.shape[-1]
L = X_tr_w.shape[1]
print(f"DataLoaders ready: L={L}, F={F}, train batches={len(dl_tr)}")


DataLoaders ready: L=60, F=15, train batches=8


In [124]:
# --- Model (PyTorch) ---
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden1=64, hidden2=32, dropout=0.2):
        super().__init__()
        self.lstm1 = nn.LSTM(input_dim, hidden1, batch_first=True)
        self.lstm2 = nn.LSTM(hidden1, hidden2, batch_first=True)
        self.do1 = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden2, 1)  # logits
    def forward(self, x):
        # x: (B, L, F)
        out, _ = self.lstm1(x)
        out, _ = self.lstm2(out)
        out = out[:, -1, :]         # last timestep
        out = self.do1(out)
        logit = self.fc(out).squeeze(-1)  # (B,)
        return logit

model = LSTMClassifier(input_dim=F, hidden1=64, hidden2=32, dropout=0.2).to(device)
n_params = sum(p.numel() for p in model.parameters())
print(model)
print("Total parameters:", n_params)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)


LSTMClassifier(
  (lstm1): LSTM(15, 64, batch_first=True)
  (lstm2): LSTM(64, 32, batch_first=True)
  (do1): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)
Total parameters: 33313


In [125]:
# --- Training loop with EarlyStopping on val AUC ---
def sigmoid_np(x):
    return 1 / (1 + np.exp(-x))

best_val_auc = -np.inf
best_epoch = None
epochs_no_improve = 0
train_auc_hist, val_auc_hist = [], []

for epoch in range(1, EPOCHS + 1):
    # Train
    model.train()
    train_logits = []
    train_targets = []
    train_loss_accum = 0.0

    for Xb, yb in dl_tr:
        Xb = Xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad()
        logits = model(Xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()

        train_loss_accum += loss.item() * Xb.size(0)
        train_logits.append(logits.detach().cpu().numpy())
        train_targets.append(yb.detach().cpu().numpy())

    train_logits = np.concatenate(train_logits) if train_logits else np.array([])
    train_targets = np.concatenate(train_targets) if train_targets else np.array([])
    if train_logits.size > 0:
        train_probs = sigmoid_np(train_logits)
        train_auc = roc_auc_score(train_targets, train_probs)
    else:
        train_auc = np.nan

    # Validate
    model.eval()
    val_logits = []
    val_targets = []
    with torch.no_grad():
        for Xb, yb in dl_va:
            Xb = Xb.to(device); yb = yb.to(device)
            logits = model(Xb)
            val_logits.append(logits.detach().cpu().numpy())
            val_targets.append(yb.detach().cpu().numpy())
    val_logits = np.concatenate(val_logits)
    val_targets = np.concatenate(val_targets)
    val_probs = sigmoid_np(val_logits)
    val_auc = roc_auc_score(val_targets, val_probs)

    train_auc_hist.append(train_auc)
    val_auc_hist.append(val_auc)

    print(f"Epoch {epoch:3d} | train AUC {train_auc:.4f} | val AUC {val_auc:.4f}")

    # Early stopping on val AUC
    if val_auc > best_val_auc + EPS_IMPROVE:
        best_val_auc = val_auc
        best_epoch = epoch
        epochs_no_improve = 0
        torch.save(model.state_dict(), ART_DIR / "lstm_nb3.pt")
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= PATIENCE:
            print(f"Early stopping at epoch {epoch} (best val AUC {best_val_auc:.4f} @ epoch {best_epoch})")
            break

print(f"Best epoch: {best_epoch}  best val_AUC: {best_val_auc:.4f}")

# Training curve figure
plt.figure()
plt.plot(train_auc_hist, label="train AUC")
plt.plot(val_auc_hist, label="val AUC")
plt.xlabel("epoch"); plt.ylabel("AUC"); plt.legend(); plt.title("LSTM NB3 (PyTorch) — AUC vs epoch")
fig_path = FIG_DIR / "lstm_nb3_training_curve.png"
plt.savefig(fig_path, bbox_inches="tight", dpi=150); plt.close()
print("Saved training curve:", fig_path)
print("Saved best model →", ART_DIR / "lstm_nb3.pt")


Epoch   1 | train AUC 0.4833 | val AUC 0.5537
Epoch   2 | train AUC 0.4984 | val AUC 0.5527
Epoch   3 | train AUC 0.5149 | val AUC 0.5578
Epoch   4 | train AUC 0.5074 | val AUC 0.5530
Epoch   5 | train AUC 0.5130 | val AUC 0.5436
Epoch   6 | train AUC 0.5183 | val AUC 0.5429
Epoch   7 | train AUC 0.5448 | val AUC 0.5410
Epoch   8 | train AUC 0.5244 | val AUC 0.5363
Epoch   9 | train AUC 0.5451 | val AUC 0.5355
Epoch  10 | train AUC 0.5420 | val AUC 0.5361
Epoch  11 | train AUC 0.5442 | val AUC 0.5383
Epoch  12 | train AUC 0.5556 | val AUC 0.5438
Epoch  13 | train AUC 0.5514 | val AUC 0.5454
Early stopping at epoch 13 (best val AUC 0.5578 @ epoch 3)
Best epoch: 3  best val_AUC: 0.5578
Saved training curve: reports\figures\lstm_nb3_training_curve.png
Saved best model → artifacts\lstm_nb3.pt


In [126]:
# --- Evaluation on test ---
# Reload best model
reload_model = LSTMClassifier(input_dim=F, hidden1=64, hidden2=32, dropout=0.2).to(device)
reload_model.load_state_dict(torch.load(ART_DIR / "lstm_nb3.pt", map_location=device))
reload_model.eval()

# Get test logits
test_logits = []
with torch.no_grad():
    for Xb, yb in dl_te:
        Xb = Xb.to(device)
        logits = reload_model(Xb)
        test_logits.append(logits.detach().cpu().numpy())
test_logits = np.concatenate(test_logits)
p_test = 1.0 / (1.0 + np.exp(-test_logits))   # sigmoid
y_true = y_te_w.astype(int)

# Metrics
auc   = roc_auc_score(y_true, p_test)
brier = brier_score_loss(y_true, p_test)
ll    = log_loss(y_true, p_test, labels=[0,1])
acc   = accuracy_score(y_true, (p_test >= 0.5).astype(int))
prec  = precision_score(y_true, (p_test >= 0.5).astype(int), zero_division=0)
rec   = recall_score(y_true, (p_test >= 0.5).astype(int), zero_division=0)

metrics_df = pd.DataFrame([{
    "model": "lstm_nb3_pt",
    "AUC_test": round(auc, 4),
    "Brier_test": round(brier, 4),
    "LogLoss_test": round(ll, 4),
    "Acc@0.5": round(acc, 4),
    "Prec@0.5": round(prec, 4),
    "Rec@0.5": round(rec, 4),
}])
print(metrics_df)

# ROC curve
fpr, tpr, _ = roc_curve(y_true, p_test)
plt.figure()
plt.plot(fpr, tpr, label=f"AUC={auc:.3f}")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title("LSTM NB3 (PyTorch) — ROC (test)"); plt.legend()
roc_path = FIG_DIR / "lstm_nb3_roc.png"
plt.savefig(roc_path, bbox_inches="tight", dpi=150); plt.close()

# Calibration plot
prob_true, prob_pred = calibration_curve(y_true, p_test, n_bins=10, strategy="uniform")
plt.figure()
plt.plot(prob_pred, prob_true, marker="o", label="Calibrated")
plt.plot([0,1],[0,1],'--', label="Perfect")
plt.xlabel("Predicted probability"); plt.ylabel("Observed frequency"); plt.title("LSTM NB3 (PyTorch) — Calibration")
plt.legend()
calib_path = FIG_DIR / "lstm_nb3_calib.png"
plt.savefig(calib_path, bbox_inches="tight", dpi=150); plt.close()

print("Saved figures:", roc_path, "and", calib_path)


         model  AUC_test  Brier_test  LogLoss_test  Acc@0.5  Prec@0.5  Rec@0.5
0  lstm_nb3_pt    0.5439      0.2503        0.6937   0.4577    0.5455   0.1875
Saved figures: reports\figures\lstm_nb3_roc.png and reports\figures\lstm_nb3_calib.png


In [127]:
# --- Save run config & metrics summary ---
feat_hash = hashlib.md5("\n".join(map(str, sorted(features))).encode()).hexdigest()
config = {
    "framework": "pytorch",
    "seed": SEED,
    "lookback": LOOKBACK,
    "horizon": HORIZON,
    "stride": STRIDE,
    "batch_size": BATCH_SIZE,
    "epochs": EPOCHS,
    "learning_rate": LR,
    "train_frac": TRAIN_FRAC,
    "val_frac": VAL_FRAC,
    "feature_hash": feat_hash,
    "n_features": len(features),
    "best_val_auc": float(best_val_auc) if not np.isnan(best_val_auc) else None,
    "generated_at": datetime.now().isoformat(timespec="seconds")
}
cfg_path = ART_DIR / "nb3_lstm_config.json"
with open(cfg_path, "w") as f:
    json.dump(config, f, indent=2)
print("Saved config:", cfg_path)

summary_path = DATA_DIR / "explainability_summary.csv"
try:
    old = pd.read_csv(summary_path)
except Exception:
    old = pd.DataFrame(columns=list(metrics_df.columns))
new = pd.concat([old, metrics_df], ignore_index=True)
new.to_csv(summary_path, index=False)
print("Updated metrics summary:", summary_path)


Saved config: artifacts\nb3_lstm_config.json
Updated metrics summary: data\explainability_summary.csv


In [128]:
# --- Reload parity test ---
# Compute predictions again with a freshly reloaded model
reloaded2 = LSTMClassifier(input_dim=F, hidden1=64, hidden2=32, dropout=0.2).to(device)
reloaded2.load_state_dict(torch.load(ART_DIR / "lstm_nb3.pt", map_location=device))
reloaded2.eval()

test_logits_reload = []
with torch.no_grad():
    for Xb, yb in dl_te:
        Xb = Xb.to(device)
        logits = reloaded2(Xb)
        test_logits_reload.append(logits.detach().cpu().numpy())
test_logits_reload = np.concatenate(test_logits_reload)
p_test_reload = 1.0 / (1.0 + np.exp(-test_logits_reload))

max_abs_diff = float(np.max(np.abs(p_test - p_test_reload)))
print("max|p_test - p_test_reload| =", max_abs_diff)
print("Reload parity OK" if max_abs_diff <= 1e-7 else "Reload parity WARNING")


max|p_test - p_test_reload| = 0.0
Reload parity OK


In [129]:
# --- Artifact checklist ---
paths = [
    ART_DIR / "lstm_nb3.pt",               # PyTorch weights
    ART_DIR / "lstm_scaler.joblib",
    ART_DIR / "nb3_lstm_config.json",
    ART_DIR / "nb3_split_info.json",
    ART_DIR / "train_target_dates.npy",
    ART_DIR / "val_target_dates.npy",
    ART_DIR / "test_target_dates.npy",
    DATA_DIR / "explainability_summary.csv",
    FIG_DIR / "lstm_nb3_roc.png",
    FIG_DIR / "lstm_nb3_calib.png",
    FIG_DIR / "lstm_nb3_training_curve.png",
]
for p in paths:
    print(f"{p} → exists: {p.exists()}")


artifacts\lstm_nb3.pt → exists: True
artifacts\lstm_scaler.joblib → exists: True
artifacts\nb3_lstm_config.json → exists: True
artifacts\nb3_split_info.json → exists: True
artifacts\train_target_dates.npy → exists: True
artifacts\val_target_dates.npy → exists: True
artifacts\test_target_dates.npy → exists: True
data\explainability_summary.csv → exists: True
reports\figures\lstm_nb3_roc.png → exists: True
reports\figures\lstm_nb3_calib.png → exists: True
reports\figures\lstm_nb3_training_curve.png → exists: True


In [130]:
# --- Align returns to TEST target dates and run a practical long/flat band ---

# 1) Build next-day returns and align to test TARGET dates
df_bt = df[[dt_col, "close"]].copy()
df_bt["ret1"] = df_bt["close"].pct_change().shift(-1)
df_bt = df_bt.set_index(dt_col)

# target dates already computed during windowing as `tgt_te`
target_dates_test = pd.to_datetime(pd.Index(tgt_te))
ret_tgt = df_bt.loc[target_dates_test, "ret1"].values  # same length as p_tst

# Drop any NaNs that may arise at the tail
mask = ~np.isnan(ret_tgt)
ret_tgt = ret_tgt[mask]
p_tst_aligned = p_tst[mask]
print("Aligned shapes → p_tst:", p_tst_aligned.shape, "ret_tgt:", ret_tgt.shape)

# 2) Choose a practical long-only threshold grid using validation percentiles
#    (ensures we actually take some trades)
q_grid = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90]
thr_candidates = np.unique(np.quantile(p_val, q_grid))  # from validation probs
print("Candidate long thresholds (from val quantiles):", np.round(thr_candidates, 3).tolist())

def band_metrics(y_true, p, thr_long):
    take = (p >= thr_long).astype(int)
    take_rate = float(take.mean())
    if take.sum() == 0:
        return {"thr_long": float(thr_long), "take_rate": 0.0, "prec_pos": np.nan, "rec_pos": np.nan, "f1_pos": np.nan}
    y_sel = y_true[take==1]
    p_sel = p[take==1]
    yhat_sel = (p_sel >= 0.5).astype(int)  # hit-rate among taken trades
    from sklearn.metrics import precision_recall_fscore_support
    prec, rec, f1, _ = precision_recall_fscore_support(y_sel, yhat_sel, average="binary", zero_division=0)
    return {"thr_long": float(thr_long), "take_rate": take_rate, "prec_pos": float(prec), "rec_pos": float(rec), "f1_pos": float(f1)}

# Tune on validation
val_band = pd.DataFrame([band_metrics(yv, p_val, t) for t in thr_candidates])\
             .sort_values(["f1_pos","prec_pos","take_rate"], ascending=[False, False, True])
print("\nValidation band (top 5):")
print(val_band.head(5).to_string(index=False))

best_t_long = float(val_band.iloc[0]["thr_long"])
print("Chosen long-only threshold:", best_t_long)

# 3) Backtest on test (aligned) with simple transaction costs
TC_BPS = 5  # 0.05% per position change
pos = (p_tst_aligned >= best_t_long).astype(int)

# Costs when position changes (enter/exit)
pos_shift = np.r_[0, pos[:-1]]
turnover = (pos != pos_shift).astype(int)

net_ret = pos * ret_tgt - turnover * (TC_BPS / 10000.0)

def sharpe(x):
    x = np.asarray(x)
    mu, sd = np.nanmean(x), np.nanstd(x)
    return 0.0 if sd == 0 else (mu / sd) * np.sqrt(252)

bt = {
    "thr_long": best_t_long,
    "days": int(len(net_ret)),
    "take_rate": float(pos.mean()),
    "avg_daily_ret_%": float(np.nanmean(net_ret) * 100),
    "ann_sharpe": float(sharpe(net_ret)),
    "total_ret_%": float(np.nansum(net_ret) * 100),
}
print("\nBacktest summary (test):")
print(pd.DataFrame([bt]).to_string(index=False))


Aligned shapes → p_tst: (342,) ret_tgt: (342,)
Candidate long thresholds (from val quantiles): [0.507, 0.508, 0.509, 0.509, 0.51, 0.511, 0.511, 0.513, 0.516]

Validation band (top 5):
 thr_long  take_rate  prec_pos  rec_pos   f1_pos
 0.513480   0.151603  0.692308      1.0 0.818182
 0.515743   0.102041  0.657143      1.0 0.793103
 0.511335   0.201166  0.608696      1.0 0.756757
 0.510702   0.250729  0.569767      1.0 0.725926
 0.509197   0.349854  0.558333      1.0 0.716578
Chosen long-only threshold: 0.5134802281856536

Backtest summary (test):
 thr_long  days  take_rate  avg_daily_ret_%  ann_sharpe  total_ret_%
  0.51348   342   0.026316         0.013162    0.591059     4.501466
