## Step 0

In [50]:
# ЛОГИЧЕСКИЙ БЛОК: imports + reproducibility + GLOBAL config
# ИСПОЛНЕНИЕ БЛОКА:

import os, math, random
import numpy as np
import pandas as pd
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import RobustScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, roc_auc_score

def seed_everything(seed=1234):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(100)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE)

# -------------------------------
# GLOBAL CONFIG (всё тут)
# -------------------------------
CFG = {
    # data
    "freq": "5min",
    "data_dir": Path("../dataset"),
    # NEW: holdout final test split (по времени, на sample-space)
    "final_test_frac": 0.10,

    "book_levels": 15,         # сколько уровней стакана грузим
    "top_levels": 5,           # DI_L0..DI_L4
    "near_levels": 5,          # near=0..4, far=5..14

    # walk-forward windows (в sample-space)
    "train_min_frac": 0.50,
    "val_window_frac": 0.10,
    "test_window_frac": 0.10,
    "step_window_frac": 0.10,

    # scaling
    "max_abs_feat": 10.0,

    # correlations
    "corr_windows": [6, 12, 24, 48, 84],  # 30m,1h,2h,4h,7h
    "edges": [("ADA","BTC"), ("ADA","ETH"), ("ETH","BTC")],

    # triple-barrier (labels)
    "tb_horizon": 1*12,       # 1h     # нужен для sample_t (чтобы TB-exit не вылезал за конец)
    "lookback": 7*12,
    "tb_pt_mult": 1.2,
    "tb_sl_mult": 1.1,
    "tb_min_barrier": 0.001,
    "tb_max_barrier": 0.006,

    # training (общие)
    "batch_size": 64,
    "epochs": 30,
    "lr": 2e-4,
    "weight_decay": 1e-3,
    "grad_clip": 1.0,
    "dropout": 0.2,
    "hidden": 64,
    "gnn_layers": 2,

    # --- old Attn params kept for compatibility: now used as ATTENTION dims ---
    # Attn_hidden -> d_model (размер темпорального представления)
    # Attn_layers -> n_layers (кол-во attention/transformer layers, если режим их использует)
    "Attn_hidden": 64,
    "Attn_layers": 1,

    "use_amp": True,

    # trading eval
    "cost_bps": 2.0,

    # confidence thresholds (для PnL по порогу)
    "thr_trade_grid": [0.50, 0.55, 0.60, 0.65, 0.70],
    "thr_dir_grid":   [0.50, 0.55, 0.60, 0.65, 0.70],

    # ---- PnL proxy during DIR training (grid selector)
    "proxy_thr_trade_grid": None,  # None -> использовать thr_trade_grid
    "proxy_thr_dir_grid":   None,  # None -> использовать thr_dir_grid
    "proxy_min_trades": 30,

    # -------------------------------
    # NEW: attention-based temporal encoder config
    # -------------------------------
    # ДВА режима (по твоему требованию):
    # 1) "xformer_cls"  -> TransformerEncoder + [CLS] token, берём CLS как summary
    # 2) "attn_pool"    -> learnable query + MultiHeadAttention pooling (быстрый и стабильный)
    "temporal_mode": "xformer_cls",   # "xformer_cls" | "attn_pool"
    "attn_heads": 4,                  # будет автоматически приведено к делителю d_model
    "attn_ff_mult": 4,                # FFN dim = ff_mult * d_model (для xformer_cls)
    "attn_dropout": None,             # None -> использовать CFG["dropout"]
    "attn_causal": False,             # опционально для xformer (обычно False, т.к. окно past-only)
    "attn_use_pos_emb": True,         # learned positional embeddings
}

ASSETS = ["ADA", "BTC", "ETH"]
ASSET2IDX = {a:i for i,a in enumerate(ASSETS)}
TARGET_ASSET = "ETH"
TARGET_NODE = ASSET2IDX[TARGET_ASSET]

EDGES = CFG["edges"]
EDGE_INDEX = torch.tensor([[ASSET2IDX[s], ASSET2IDX[t]] for (s,t) in EDGES], dtype=torch.long)  # [E,2]
print("EDGE_INDEX:", EDGE_INDEX.tolist())

print("TEMPORAL:", CFG["temporal_mode"], "| heads=", CFG["attn_heads"], "| d_model=", CFG["Attn_hidden"])


DEVICE: cpu
EDGE_INDEX: [[0, 1], [0, 2], [2, 1]]
TEMPORAL: xformer_cls | heads= 4 | d_model= 64


## 1. load data + basic returns

In [51]:
# ЛОГИЧЕСКИЙ БЛОК: load data + log returns (без target) + все уровни стакана
# ИСПОЛНЕНИЕ БЛОКА:

def load_asset(asset: str, freq: str, data_dir: Path, book_levels: int, part = [0,100]) -> pd.DataFrame:
    path = data_dir / f"{asset}_{freq}.csv"
    df = pd.read_csv(path)
    df = df.iloc[int(len(df)*part[0]/100) : int(len(df)*part[1]/100)]
    df["timestamp"] = pd.to_datetime(df["system_time"]).dt.round("min")
    df = df.sort_values("timestamp").set_index("timestamp")

    bid_cols = [f"bids_notional_{i}" for i in range(book_levels)]
    ask_cols = [f"asks_notional_{i}" for i in range(book_levels)]

    needed = ["midpoint", "spread", "buys", "sells"] + bid_cols + ask_cols
    missing = [c for c in needed if c not in df.columns]
    if missing:
        raise ValueError(f"{asset}: missing columns in CSV: {missing[:10]}{'...' if len(missing) > 10 else ''}")

    return df[needed]


def load_all_assets() -> pd.DataFrame:
    freq = CFG["freq"]
    data_dir = CFG["data_dir"]
    book_levels = CFG["book_levels"]

    def rename_asset_cols(df_one: pd.DataFrame, asset: str) -> pd.DataFrame:
        rename_map = {
            "midpoint": asset,
            "buys": f"buys_{asset}",
            "sells": f"sells_{asset}",
            "spread": f"spread_{asset}",
        }
        for i in range(book_levels):
            rename_map[f"bids_notional_{i}"] = f"bids_vol_{asset}_{i}"
            rename_map[f"asks_notional_{i}"] = f"asks_vol_{asset}_{i}"
        return df_one.rename(columns=rename_map)

    df_ADA = rename_asset_cols(load_asset("ADA", freq, data_dir, book_levels, part = [0, 80]), "ADA")
    df_BTC = rename_asset_cols(load_asset("BTC", freq, data_dir, book_levels, part = [0, 80]), "BTC")
    df_ETH = rename_asset_cols(load_asset("ETH", freq, data_dir, book_levels, part = [0, 80]), "ETH")

    df = df_ADA.join(df_BTC).join(df_ETH)
    df = df.reset_index()  # timestamp column remains
    return df


df = load_all_assets()
T = len(df)

# log returns
for a in ASSETS:
    df[f"lr_{a}"] = np.log(df[a]).diff().fillna(0.0)

print("Loaded df:", df.shape)
print("Example columns:", df.columns[:25].tolist())


Loaded df: (2693, 106)
Example columns: ['timestamp', 'ADA', 'spread_ADA', 'buys_ADA', 'sells_ADA', 'bids_vol_ADA_0', 'bids_vol_ADA_1', 'bids_vol_ADA_2', 'bids_vol_ADA_3', 'bids_vol_ADA_4', 'bids_vol_ADA_5', 'bids_vol_ADA_6', 'bids_vol_ADA_7', 'bids_vol_ADA_8', 'bids_vol_ADA_9', 'bids_vol_ADA_10', 'bids_vol_ADA_11', 'bids_vol_ADA_12', 'bids_vol_ADA_13', 'bids_vol_ADA_14', 'asks_vol_ADA_0', 'asks_vol_ADA_1', 'asks_vol_ADA_2', 'asks_vol_ADA_3', 'asks_vol_ADA_4']


## 2. multi-window correlations → edge features (T,E,W)

In [52]:
# ЛОГИЧЕСКИЙ БЛОК: multi-window correlations -> corr_array (T,E,W)
# ИСПОЛНЕНИЕ БЛОКА:

candidate_windows = CFG["corr_windows"]
edges = EDGES

n_w = len(candidate_windows)
n_edges = len(edges)
T = len(df)

corr_array = np.zeros((T, n_edges, n_w), dtype=np.float32)

for wi, w in enumerate(candidate_windows):
    r_ADA_BTC = df["lr_ADA"].rolling(w, min_periods=1).corr(df["lr_BTC"])
    r_ADA_ETH = df["lr_ADA"].rolling(w, min_periods=1).corr(df["lr_ETH"])
    r_ETH_BTC = df["lr_ETH"].rolling(w, min_periods=1).corr(df["lr_BTC"])

    corr_array[:, 0, wi] = np.nan_to_num(r_ADA_BTC)
    corr_array[:, 1, wi] = np.nan_to_num(r_ADA_ETH)
    corr_array[:, 2, wi] = np.nan_to_num(r_ETH_BTC)

print("corr_array shape:", corr_array.shape)  # (T,E,W)


corr_array shape: (2693, 3, 5)


## 3. triple-barrier → y_tb + exit_ret → two-stage labels

In [53]:
# ЛОГИЧЕСКИЙ БЛОК: triple-barrier labels -> y_tb + exit_ret + two-stage labels
# ИСПОЛНЕНИЕ БЛОКА:

def triple_barrier_labels_from_lr(
    lr: pd.Series,
    horizon: int,
    vol_window: int,
    pt_mult: float,
    sl_mult: float,
    min_barrier: float,
    max_barrier: float,
):
    """
    Returns:
      y_tb: {0=down, 1=flat/no-trade, 2=up}
      exit_ret: realized log-return to exit (tp/sl/timeout)
      exit_t: exit index
      thr: barrier per t
    No leakage: vol is shift(1).
    """
    lr = lr.astype(float).copy()
    T = len(lr)

    vol = lr.rolling(vol_window, min_periods=max(10, vol_window//10)).std().shift(1)
    thr = (vol * np.sqrt(horizon)).clip(lower=min_barrier, upper=max_barrier)

    y = np.ones(T, dtype=np.int64)
    exit_ret = np.zeros(T, dtype=np.float32)
    exit_t = np.arange(T, dtype=np.int64)

    lr_np = lr.fillna(0.0).to_numpy(dtype=np.float64)
    thr_np = thr.fillna(min_barrier).to_numpy(dtype=np.float64)

    for t in range(T - horizon - 1):
        up = pt_mult * thr_np[t]
        dn = -sl_mult * thr_np[t]

        cum = 0.0
        hit = 1
        et = t + horizon
        er = 0.0

        for dt in range(1, horizon + 1):
            cum += lr_np[t + dt]
            if cum >= up:
                hit = 2
                et = t + dt
                er = cum
                break
            if cum <= dn:
                hit = 0
                et = t + dt
                er = cum
                break

        if hit == 1:
            er = float(np.sum(lr_np[t+1:t+horizon+1]))
            et = t + horizon

        y[t] = hit
        exit_ret[t] = er
        exit_t[t] = et

    return y, exit_ret, exit_t, thr_np

# --- build TB on ETH ---
y_tb, exit_ret, exit_t, thr = triple_barrier_labels_from_lr(
    df["lr_ETH"],
    horizon=1*12, 
    vol_window=7*12,
    pt_mult=1.2,
    sl_mult=1.1,
    min_barrier=0.001,
    max_barrier=0.006,
)

# two-stage labels
y_trade = (y_tb != 1).astype(np.int64)      # 1=trade, 0=no-trade
y_dir   = (y_tb == 2).astype(np.int64)      # 1=up, 0=down (для trade-сэмплов)

print("TB dist [down,flat,up]:", np.bincount(y_tb, minlength=3))
print("Trade ratio:", y_trade.mean())


TB dist [down,flat,up]: [ 655 1311  727]
Trade ratio: 0.5131823245451169


## 4. build node tensor + edge tensor + sample_t

In [54]:
# ЛОГИЧЕСКИЙ БЛОК: build node features (T,N,F) + edge features (T,E,W) + sample_t
# ИСПОЛНЕНИЕ БЛОКА:

EPS = 1e-6

def safe_log1p(x: np.ndarray) -> np.ndarray:
    return np.log1p(np.maximum(x, 0.0))

def build_node_tensor(df: pd.DataFrame):
    """
    Фичи на asset:
      lr, spread,
      log_buys, log_sells, ofi,
      DI_15,
      DI_L0..DI_L4,
      near_ratio_bid, near_ratio_ask,
      di_near, di_far
    """
    feats = []
    feat_names = [
        "lr", "spread",
        "log_buys", "log_sells", "ofi",
        "DI_15",
        "DI_L0", "DI_L1", "DI_L2", "DI_L3", "DI_L4",
        "near_ratio_bid", "near_ratio_ask",
        "di_near", "di_far",
    ]

    book_levels = CFG["book_levels"]
    top_k = CFG["top_levels"]     # 5
    near_k = CFG["near_levels"]   # 5
    far_k = book_levels - near_k
    if far_k <= 0:
        raise ValueError("CFG['near_levels'] must be < CFG['book_levels']")

    for a in ASSETS:
        lr = df[f"lr_{a}"].values.astype(np.float32)
        spread = df[f"spread_{a}"].values.astype(np.float32)

        buys = df[f"buys_{a}"].values.astype(np.float32)
        sells = df[f"sells_{a}"].values.astype(np.float32)

        log_buys = safe_log1p(buys).astype(np.float32)
        log_sells = safe_log1p(sells).astype(np.float32)

        ofi = ((buys - sells) / (buys + sells + EPS)).astype(np.float32)

        # уровни стакана
        bids_lvls = np.stack([df[f"bids_vol_{a}_{i}"].values.astype(np.float32) for i in range(book_levels)], axis=1)  # (T,15)
        asks_lvls = np.stack([df[f"asks_vol_{a}_{i}"].values.astype(np.float32) for i in range(book_levels)], axis=1)  # (T,15)

        bid_sum_15 = bids_lvls.sum(axis=1)
        ask_sum_15 = asks_lvls.sum(axis=1)
        DI_15 = ((bid_sum_15 - ask_sum_15) / (bid_sum_15 + ask_sum_15 + EPS)).astype(np.float32)

        # DI_L0..DI_L4
        di_levels = []
        for i in range(top_k):
            b = bids_lvls[:, i]
            s = asks_lvls[:, i]
            di_levels.append(((b - s) / (b + s + EPS)).astype(np.float32))
        DI_L0_4 = np.stack(di_levels, axis=1)  # (T,5)

        # near vs far
        bid_near = bids_lvls[:, :near_k].sum(axis=1)
        ask_near = asks_lvls[:, :near_k].sum(axis=1)
        bid_far = bids_lvls[:, near_k:].sum(axis=1)
        ask_far = asks_lvls[:, near_k:].sum(axis=1)

        near_ratio_bid = (bid_near / (bid_far + EPS)).astype(np.float32)
        near_ratio_ask = (ask_near / (ask_far + EPS)).astype(np.float32)

        di_near = ((bid_near - ask_near) / (bid_near + ask_near + EPS)).astype(np.float32)
        di_far = ((bid_far - ask_far) / (bid_far + ask_far + EPS)).astype(np.float32)

        Xa = np.column_stack([
            lr, spread,
            log_buys, log_sells, ofi,
            DI_15,
            DI_L0_4[:, 0], DI_L0_4[:, 1], DI_L0_4[:, 2], DI_L0_4[:, 3], DI_L0_4[:, 4],
            near_ratio_bid, near_ratio_ask,
            di_near, di_far
        ]).astype(np.float32)

        feats.append(Xa)

    X = np.stack(feats, axis=1).astype(np.float32)  # (T,N,F)
    return X, feat_names


X_node_raw, node_feat_names = build_node_tensor(df)
edge_feat = np.nan_to_num(corr_array.astype(np.float32), nan=0.0, posinf=0.0, neginf=0.0)

T = len(df)
L = CFG["lookback"]
H = CFG["tb_horizon"]

# sample_t: чтобы можно было брать окно [t-L+1 ... t] и иметь будущий TB-exit без выхода за данные
t_min = L - 1
t_max = T - H - 2
sample_t = np.arange(t_min, t_max + 1)
n_samples = len(sample_t)

print("X_node_raw:", X_node_raw.shape, "edge_feat:", edge_feat.shape)
print("node_feat_names:", node_feat_names)
print("n_samples:", n_samples, "t range:", sample_t[0], sample_t[-1])


X_node_raw: (2693, 3, 15) edge_feat: (2693, 3, 5)
node_feat_names: ['lr', 'spread', 'log_buys', 'log_sells', 'ofi', 'DI_15', 'DI_L0', 'DI_L1', 'DI_L2', 'DI_L3', 'DI_L4', 'near_ratio_bid', 'near_ratio_ask', 'di_near', 'di_far']
n_samples: 2597 t range: 83 2679


## Train (folds) - Test split

In [55]:
# ЛОГИЧЕСКИЙ БЛОК: final holdout split (90% CV + 10% final test), time-ordered
# ИСПОЛНЕНИЕ БЛОКА:

def make_final_holdout_split(n_samples: int, final_test_frac: float):
    if not (0.0 < final_test_frac < 0.5):
        raise ValueError("final_test_frac should be in (0, 0.5)")

    n_final = max(1, int(round(final_test_frac * n_samples)))
    n_cv = n_samples - n_final
    if n_cv <= 10:
        raise ValueError("Too few samples left for CV after holdout split.")

    idx_cv = np.arange(0, n_cv, dtype=np.int64)
    idx_final = np.arange(n_cv, n_samples, dtype=np.int64)
    return idx_cv, idx_final, n_cv, n_final

idx_cv_all, idx_final_test, n_samples_cv, n_samples_final = make_final_holdout_split(
    n_samples=n_samples,
    final_test_frac=CFG["final_test_frac"],
)

print("Holdout split:")
print("  n_samples total:", n_samples)
print("  n_samples CV   :", n_samples_cv, f"({100*(n_samples_cv/n_samples):.1f}%)")
print("  n_samples FINAL:", n_samples_final, f"({100*(n_samples_final/n_samples):.1f}%)")
print("  CV range   :", idx_cv_all[0], idx_cv_all[-1])
print("  FINAL range:", idx_final_test[0], idx_final_test[-1])


Holdout split:
  n_samples total: 2597
  n_samples CV   : 2337 (90.0%)
  n_samples FINAL: 260 (10.0%)
  CV range   : 0 2336
  FINAL range: 2337 2596



## 5. walk-forward splits (с глобальными окнами)

In [56]:
# ЛОГИЧЕСКИЙ БЛОК: walk-forward splits (expanding train + fixed val/test) on CV-part only
# ИСПОЛНЕНИЕ БЛОКА:

def make_walk_forward_splits(n_samples: int,
                             train_min_frac: float,
                             val_window_frac: float,
                             test_window_frac: float,
                             step_window_frac: float):
    train_min = int(train_min_frac * n_samples)
    val_w  = max(1, int(val_window_frac * n_samples))
    test_w = max(1, int(test_window_frac * n_samples))
    step_w = max(1, int(step_window_frac * n_samples))

    splits = []
    start = train_min
    while True:
        tr_end = start
        va_end = tr_end + val_w
        te_end = va_end + test_w
        if te_end > n_samples:
            break

        idx_train = np.arange(0, tr_end, dtype=np.int64)
        idx_val   = np.arange(tr_end, va_end, dtype=np.int64)
        idx_test  = np.arange(va_end, te_end, dtype=np.int64)

        splits.append((idx_train, idx_val, idx_test))
        start += step_w

    return splits

# IMPORTANT: строим сплиты только на 90% (CV-part)
walk_splits = make_walk_forward_splits(
    n_samples=n_samples_cv,
    train_min_frac=CFG["train_min_frac"],
    val_window_frac=CFG["val_window_frac"],
    test_window_frac=CFG["test_window_frac"],
    step_window_frac=CFG["step_window_frac"],
)

print("n_folds:", len(walk_splits))
for i, (a, b, c) in enumerate(walk_splits):
    print(f" fold {i+1}: train {len(a)} | val {len(b)} | test {len(c)}")

print("\nFINAL HOLDOUT:")
print(" final_test size:", len(idx_final_test))


n_folds: 4
 fold 1: train 1168 | val 233 | test 233
 fold 2: train 1401 | val 233 | test 233
 fold 3: train 1634 | val 233 | test 233
 fold 4: train 1867 | val 233 | test 233

FINAL HOLDOUT:
 final_test size: 260


## 6. Dataset + scaling 

In [57]:
# =========================
# Step 8. Dataset + Scaling
# =========================
# ЛОГИЧЕСКИЙ БЛОК: подготовка y для sample_t + scaler fit on train only + Dataset/Dataloader
# ИСПОЛНЕНИЕ БЛОКА:

from dataclasses import dataclass

def fit_robust_scaler_past_only(X_node_raw: np.ndarray, max_t_fit: int):
    """
    Fit scaler on ALL nodes/features using only times [0..max_t_fit] (past only).
    X_node_raw: (T,N,F)
    """
    scaler = RobustScaler(with_centering=True, with_scaling=True, quantile_range=(25.0, 75.0))
    X_fit = X_node_raw[:max_t_fit+1]  # (T_fit,N,F)
    T_fit, N, F = X_fit.shape
    scaler.fit(X_fit.reshape(-1, F))
    return scaler

def transform_and_clip(X_node_raw: np.ndarray, scaler: RobustScaler, max_abs_feat: float):
    """
    Transform whole timeline with scaler fitted on past and clip.
    """
    T, N, F = X_node_raw.shape
    X = scaler.transform(X_node_raw.reshape(-1, F)).reshape(T, N, F).astype(np.float32)
    if max_abs_feat is not None:
        X = np.clip(X, -max_abs_feat, max_abs_feat)
    return X

@dataclass
class Batch:
    x: torch.Tensor        # (B,L,N,F)
    e: torch.Tensor        # (B,L,E,W)
    y_trade: torch.Tensor  # (B,)
    y_dir: torch.Tensor    # (B,) valid only if y_trade==1
    y_tb: torch.Tensor     # (B,) {0,1,2}
    exit_ret: torch.Tensor # (B,)

class WindowGraphDataset(Dataset):
    def __init__(self,
                 X_node: torch.Tensor,      # (T,N,F) scaled
                 E_feat: torch.Tensor,      # (T,E,W)
                 sample_t_list: np.ndarray, # (n_samples,)
                 y_trade_t: np.ndarray,
                 y_dir_t: np.ndarray,
                 y_tb_t: np.ndarray,
                 exit_ret_t: np.ndarray,
                 lookback: int):
        self.X_node = X_node
        self.E_feat = E_feat
        self.sample_t = sample_t_list.astype(np.int64)
        self.y_trade = y_trade_t.astype(np.int64)
        self.y_dir = y_dir_t.astype(np.int64)
        self.y_tb = y_tb_t.astype(np.int64)
        self.exit_ret = exit_ret_t.astype(np.float32)
        self.L = int(lookback)

    def __len__(self):
        return len(self.sample_t)

    def __getitem__(self, idx: int):
        t = int(self.sample_t[idx])
        L = self.L
        s = t - L + 1
        x_win = self.X_node[s:t+1]   # (L,N,F)
        e_win = self.E_feat[s:t+1]   # (L,E,W)

        return (
            x_win,
            e_win,
            torch.tensor(self.y_trade[idx], dtype=torch.long),
            torch.tensor(self.y_dir[idx], dtype=torch.long),
            torch.tensor(self.y_tb[idx], dtype=torch.long),
            torch.tensor(self.exit_ret[idx], dtype=torch.float32),
        )

def make_loaders_for_fold(
    X_node_raw: np.ndarray,
    edge_feat: np.ndarray,
    sample_t: np.ndarray,
    y_tb: np.ndarray,
    y_trade: np.ndarray,
    y_dir: np.ndarray,
    exit_ret: np.ndarray,
    idx_train: np.ndarray,
    idx_val: np.ndarray,
    idx_test: np.ndarray,
    batch_size: int,
):
    """
    Важно: idx_* — индексы в пространстве CV-сэмплов (0..n_samples_cv-1).
    Здесь sample_t передаём уже CV-часть (т.е. sample_t_cv).
    """
    # fit scaler on past only (up to max train t)
    max_train_t = int(sample_t[idx_train][-1])
    scaler = fit_robust_scaler_past_only(X_node_raw, max_t_fit=max_train_t)
    X_scaled = transform_and_clip(X_node_raw, scaler, CFG["max_abs_feat"])

    # torch tensors (cpu)
    X_t = torch.from_numpy(X_scaled)                         # (T,N,F)
    E_t = torch.from_numpy(edge_feat.astype(np.float32))     # (T,E,W)

    # labels aligned to dataset index-space (same length as sample_t)
    y_tb_t    = y_tb[sample_t]
    y_trade_t = y_trade[sample_t]
    y_dir_t   = y_dir[sample_t]
    exit_t    = exit_ret[sample_t]

    ds_train = WindowGraphDataset(X_t, E_t, sample_t[idx_train], y_trade_t[idx_train], y_dir_t[idx_train], y_tb_t[idx_train], exit_t[idx_train], CFG["lookback"])
    ds_val   = WindowGraphDataset(X_t, E_t, sample_t[idx_val],   y_trade_t[idx_val],   y_dir_t[idx_val],   y_tb_t[idx_val],   exit_t[idx_val],   CFG["lookback"])
    ds_test  = WindowGraphDataset(X_t, E_t, sample_t[idx_test],  y_trade_t[idx_test],  y_dir_t[idx_test],  y_tb_t[idx_test],  exit_t[idx_test],  CFG["lookback"])

    pin = (DEVICE.type == "cuda")
    dl_train = DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=0, pin_memory=pin)
    dl_val   = DataLoader(ds_val,   batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, pin_memory=pin)
    dl_test  = DataLoader(ds_test,  batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, pin_memory=pin)

    return dl_train, dl_val, dl_test, scaler

# --- quick sanity (не тренируем, просто проверяем что fold 1 loader собирается) ---
sample_t_cv = sample_t[:n_samples_cv]  # CV-part only (time-ordered)
(idx_train0, idx_val0, idx_test0) = walk_splits[0]
dl_tr0, dl_va0, dl_te0, _ = make_loaders_for_fold(
    X_node_raw=X_node_raw,
    edge_feat=edge_feat,
    sample_t=sample_t_cv,
    y_tb=y_tb,
    y_trade=y_trade,
    y_dir=y_dir,
    exit_ret=exit_ret,
    idx_train=idx_train0,
    idx_val=idx_val0,
    idx_test=idx_test0,
    batch_size=CFG["batch_size"],
)
xb, eb, ytr, ydr, ytb_b, exb = next(iter(dl_tr0))
print("SANITY batch shapes:",
      xb.shape, eb.shape, ytr.shape, ydr.shape, ytb_b.shape, exb.shape)


SANITY batch shapes: torch.Size([64, 84, 3, 15]) torch.Size([64, 84, 3, 5]) torch.Size([64]) torch.Size([64]) torch.Size([64]) torch.Size([64])


## 7.Model (один класс, n_classes=2) + EdgeGatedMP

In [58]:
# ============================================
# Step 9 (REPLACE). GNN + Temporal Attention + 3-class head
# ============================================
# ЛОГИЧЕСКИЙ БЛОК: spatial GNN + temporal attention + single 3-class logits
# ИСПОЛНЕНИЕ БЛОКА:

import torch
import torch.nn as nn
import torch.nn.functional as F

def _fix_heads(d_model: int, heads: int) -> int:
    for h in range(heads, 0, -1):
        if d_model % h == 0:
            return h
    return 1

class EdgeGatedMPLayer(nn.Module):
    def __init__(self, hidden: int, edge_dim: int, dropout: float):
        super().__init__()
        self.msg_lin = nn.Linear(hidden, hidden, bias=False)
        self.gate_mlp = nn.Sequential(
            nn.Linear(2 * hidden + edge_dim, hidden),
            nn.GELU(),
            nn.Linear(hidden, hidden),
        )
        self.ln1 = nn.LayerNorm(hidden)
        self.ff = nn.Sequential(
            nn.Linear(hidden, 4 * hidden),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(4 * hidden, hidden),
        )
        self.ln2 = nn.LayerNorm(hidden)
        self.drop = nn.Dropout(dropout)

    def forward(self, h: torch.Tensor, edge_index: torch.Tensor, e: torch.Tensor):
        # h: (BL,N,H), e: (BL,E,W), edge_index: (E,2) [src,dst]
        BL, N, H = h.shape
        src = edge_index[:, 0]
        dst = edge_index[:, 1]

        h_src = h[:, src, :]  # (BL,E,H)
        h_dst = h[:, dst, :]  # (BL,E,H)

        gate = torch.sigmoid(self.gate_mlp(torch.cat([h_src, h_dst, e], dim=-1)))  # (BL,E,H)
        msg = self.msg_lin(h_src) * gate  # (BL,E,H)

        agg = torch.zeros(BL, N, H, device=h.device, dtype=h.dtype)
        agg.index_add_(1, dst, msg)

        h2 = self.ln1(h + self.drop(agg))
        h3 = self.ln2(h2 + self.drop(self.ff(h2)))
        return h3

class SpatialGNN(nn.Module):
    def __init__(self, in_dim: int, hidden: int, edge_dim: int, layers: int, dropout: float):
        super().__init__()
        self.in_proj = nn.Linear(in_dim, hidden)
        self.layers = nn.ModuleList([EdgeGatedMPLayer(hidden, edge_dim, dropout) for _ in range(layers)])
        self.drop = nn.Dropout(dropout)

    def forward(self, x: torch.Tensor, edge_index: torch.Tensor, e: torch.Tensor):
        # x: (B,L,N,F), e: (B,L,E,W) -> h: (B,L,N,H)
        B, L, N, _ = x.shape
        h = self.drop(self.in_proj(x))  # (B,L,N,H)
        H = h.shape[-1]

        h = h.reshape(B * L, N, H)
        e = e.reshape(B * L, e.shape[2], e.shape[3])

        for layer in self.layers:
            h = layer(h, edge_index, e)

        return h.reshape(B, L, N, H)

class TemporalEncoderXformerCLS(nn.Module):
    def __init__(self, d_model: int, n_layers: int, heads: int, dropout: float, ff_mult: int,
                 use_pos_emb: bool, causal: bool, max_len: int):
        super().__init__()
        self.d_model = d_model
        self.use_pos = use_pos_emb
        self.causal = causal

        self.cls = nn.Parameter(torch.zeros(1, 1, d_model))
        self.pos = nn.Parameter(torch.zeros(1, max_len + 1, d_model)) if use_pos_emb else None  # +1 for CLS

        heads = _fix_heads(d_model, heads)
        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=heads,
            dim_feedforward=ff_mult * d_model,
            dropout=dropout,
            activation="gelu",
            batch_first=True,
            norm_first=True,
        )
        self.enc = nn.TransformerEncoder(enc_layer, num_layers=n_layers)

    def forward(self, x: torch.Tensor):
        # x: (B,L,H) -> (B,H) via CLS
        B, L, H = x.shape
        cls = self.cls.expand(B, 1, H)
        z = torch.cat([cls, x], dim=1)  # (B,L+1,H)

        if self.pos is not None:
            z = z + self.pos[:, :L + 1, :]

        attn_mask = None
        if self.causal:
            sz = L + 1
            attn_mask = torch.triu(torch.ones(sz, sz, device=z.device, dtype=torch.bool), diagonal=1)

        z = self.enc(z, mask=attn_mask)
        return z[:, 0, :]

class TemporalEncoderAttnPool(nn.Module):
    def __init__(self, d_model: int, heads: int, dropout: float, use_pos_emb: bool, max_len: int):
        super().__init__()
        self.use_pos = use_pos_emb
        self.q = nn.Parameter(torch.zeros(1, 1, d_model))
        self.pos = nn.Parameter(torch.zeros(1, max_len, d_model)) if use_pos_emb else None

        heads = _fix_heads(d_model, heads)
        self.mha = nn.MultiheadAttention(d_model, heads, dropout=dropout, batch_first=True)
        self.ln = nn.LayerNorm(d_model)

    def forward(self, x: torch.Tensor):
        # x: (B,L,H) -> (B,H)
        if self.pos is not None:
            x = x + self.pos[:, :x.shape[1], :]
        q = self.q.expand(x.shape[0], 1, x.shape[2])
        out, _ = self.mha(q, x, x, need_weights=False)
        return self.ln(out.squeeze(1))

class GNNTemporal3Class(nn.Module):
    def __init__(self, node_in: int, edge_in: int):
        super().__init__()
        dropout = CFG["dropout"] if CFG["attn_dropout"] is None else CFG["attn_dropout"]
        hidden = CFG["hidden"]
        d_model = CFG["Attn_hidden"]
        assert hidden == d_model, "Ожидаю CFG['hidden'] == CFG['Attn_hidden']"

        self.gnn = SpatialGNN(node_in, hidden, edge_in, CFG["gnn_layers"], CFG["dropout"])

        max_len = CFG["lookback"]
        if CFG["temporal_mode"] == "xformer_cls":
            self.temporal = TemporalEncoderXformerCLS(
                d_model=d_model,
                n_layers=CFG["Attn_layers"],
                heads=CFG["attn_heads"],
                dropout=dropout,
                ff_mult=CFG["attn_ff_mult"],
                use_pos_emb=CFG["attn_use_pos_emb"],
                causal=CFG["attn_causal"],
                max_len=max_len,
            )
        elif CFG["temporal_mode"] == "attn_pool":
            self.temporal = TemporalEncoderAttnPool(
                d_model=d_model,
                heads=CFG["attn_heads"],
                dropout=dropout,
                use_pos_emb=CFG["attn_use_pos_emb"],
                max_len=max_len,
            )
        else:
            raise ValueError(f"Unknown temporal_mode={CFG['temporal_mode']}")

        self.head = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Dropout(CFG["dropout"]),
            nn.Linear(d_model, 3),  # down / flat / up
        )

    def forward(self, x: torch.Tensor, e: torch.Tensor, edge_index: torch.Tensor, target_node: int):
        # x: (B,L,N,F), e: (B,L,E,W) -> logits3: (B,3)
        h = self.gnn(x, edge_index, e)              # (B,L,N,H)
        h_tgt = h[:, :, target_node, :]             # (B,L,H)
        z = self.temporal(h_tgt)                    # (B,H)
        return self.head(z)

# --- sanity forward ---
node_in_dim = X_node_raw.shape[-1]
edge_in_dim = edge_feat.shape[-1]
model = GNNTemporal3Class(node_in=node_in_dim, edge_in=edge_in_dim).to(DEVICE)

xb, eb, ytr, ydr, ytb_b, exb = next(iter(dl_tr0))
with torch.no_grad():
    logits3 = model(xb.to(DEVICE), eb.to(DEVICE), EDGE_INDEX.to(DEVICE), TARGET_NODE)
print("SANITY logits3:", logits3.shape)  # (B,3)


SANITY logits3: torch.Size([64, 3])




## 8. Training/Eval: Stage A (trade) и Stage B (direction)

In [59]:
# ==========================
# Step 10 (REPLACE v2). Metrics + PnL stats for 3-class head
# ==========================
# ЛОГИЧЕСКИЙ БЛОК: f1 (3-class), AUC_trade, AUC_OVR_3c, PnL grid + mean/std/sharpe-proxy
# ИСПОЛНЕНИЕ БЛОКА:

import numpy as np
import math
from sklearn.metrics import f1_score, roc_auc_score

EPS = 1e-9

def safe_auc_binary(y_true: np.ndarray, y_score: np.ndarray):
    try:
        if len(np.unique(y_true)) < 2:
            return float("nan")
        return float(roc_auc_score(y_true, y_score))
    except Exception:
        return float("nan")

def safe_auc_ovr_multiclass(y_true: np.ndarray, y_prob_3: np.ndarray):
    try:
        classes = np.unique(y_true)
        if len(classes) < 2:
            return float("nan")
        return float(roc_auc_score(y_true, y_prob_3, multi_class="ovr", average="macro"))
    except Exception:
        return float("nan")

def probs_from_logits3(logits3: torch.Tensor):
    return torch.softmax(logits3, dim=-1)

def derive_p_trade_and_p_up_cond(p3: np.ndarray):
    p_down = p3[:, 0]
    p_flat = p3[:, 1]
    p_up   = p3[:, 2]

    p_trade = 1.0 - p_flat
    denom = p_up + p_down
    p_up_cond = np.where(denom > EPS, p_up / denom, 0.5)
    return p_trade, p_up_cond

def derive_pred_tb_from_two_thresholds(p3: np.ndarray, thr_trade: float=0.5, thr_dir: float=0.5):
    p_trade, p_up_cond = derive_p_trade_and_p_up_cond(p3)
    conf_dir = np.maximum(p_up_cond, 1.0 - p_up_cond)

    pred = np.ones(len(p_trade), dtype=np.int64)  # flat
    take = (p_trade >= thr_trade) & (conf_dir >= thr_dir)
    pred[take & (p_up_cond >= 0.5)] = 2
    pred[take & (p_up_cond < 0.5)]  = 0
    return pred

def pnl_from_preds(pred_tb: np.ndarray, exit_ret: np.ndarray, cost_bps: float):
    pos = np.zeros_like(exit_ret, dtype=np.float32)
    pos[pred_tb == 2] = 1.0
    pos[pred_tb == 0] = -1.0
    gross = pos * exit_ret

    trade = (pred_tb != 1).astype(np.float32)
    cost = (cost_bps / 1e4) * trade
    return gross - cost  # net pnl per sample

def pnl_stats(net: np.ndarray, pred_tb: np.ndarray):
    """
    Stats on executed trades only.
    sharpe-proxy = mean/std * sqrt(n_trades)
    """
    trade_mask = (pred_tb != 1)
    n_trades = int(trade_mask.sum())
    if n_trades == 0:
        return {"trades": 0, "sum": 0.0, "mean": float("nan"), "std": float("nan"), "sharpe": float("nan")}
    x = net[trade_mask].astype(np.float64)
    m = float(x.mean())
    s = float(x.std(ddof=1)) if n_trades > 1 else float("nan")
    sh = float("nan") if (not np.isfinite(s) or s <= 0) else float(m / s * math.sqrt(n_trades))
    return {"trades": n_trades, "sum": float(x.sum()), "mean": m, "std": s, "sharpe": sh}

def pnl_grid_search_from_p3(p3: np.ndarray, y_tb_true: np.ndarray, exit_ret: np.ndarray,
                            cost_bps: float, thr_trade_grid, thr_dir_grid, min_trades: int=50):
    best = {
        "pnl_sum": -1e18,
        "thr_trade": None,
        "thr_dir": None,
        "trades": 0,
        "pnl_mean": float("nan"),
        "pnl_std": float("nan"),
        "pnl_sharpe": float("nan"),
        "f1m": float("nan"),
    }
    for tt in thr_trade_grid:
        for td in thr_dir_grid:
            pred = derive_pred_tb_from_two_thresholds(p3, tt, td)
            trades = int(np.sum(pred != 1))
            if trades < min_trades:
                continue

            net = pnl_from_preds(pred, exit_ret, cost_bps)
            st = pnl_stats(net, pred)

            pnl_sum = st["sum"]
            f1m = float(f1_score(y_tb_true, pred, average="macro"))

            if pnl_sum > best["pnl_sum"]:
                best = {
                    "pnl_sum": pnl_sum,
                    "thr_trade": float(tt),
                    "thr_dir": float(td),
                    "trades": trades,
                    "pnl_mean": st["mean"],
                    "pnl_std": st["std"],
                    "pnl_sharpe": st["sharpe"],
                    "f1m": f1m,
                }
    return best

print("OK: metrics+pnl v2 ready.")


OK: metrics+pnl v2 ready.


## 9. Two-stage PnL by confidence thresholds

In [60]:
# ==========================
# Step 11 (REPLACE v2). Train / Eval for 3-class head (select best epoch by PnL)
# ==========================
# ЛОГИЧЕСКИЙ БЛОК: CE(3-class) + лог tr_loss, va_loss, f1, va_auc, pnl_sum/mean/std/sharpe
# ИСПОЛНЕНИЕ БЛОКА:

def train_one_epoch_3c(model, dl, optimizer, edge_index, target_node):
    model.train()
    total_loss, n = 0.0, 0

    use_amp = bool(CFG["use_amp"] and DEVICE.type == "cuda")
    gscaler = torch.amp.GradScaler('cuda', enabled=use_amp)

    for xb, eb, ytr, ydr, ytb_b, exb in dl:
        xb = xb.to(DEVICE)
        eb = eb.to(DEVICE)
        ytb = ytb_b.to(DEVICE)

        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=use_amp):
            logits3 = model(xb, eb, edge_index, target_node)
            loss = F.cross_entropy(logits3, ytb)

        gscaler.scale(loss).backward()
        if CFG["grad_clip"] is not None:
            gscaler.unscale_(optimizer)
            nn.utils.clip_grad_norm_(model.parameters(), CFG["grad_clip"])
        gscaler.step(optimizer)
        gscaler.update()

        bs = xb.size(0)
        total_loss += float(loss.item()) * bs
        n += bs

    return total_loss / max(1, n)

@torch.no_grad()
def eval_one_epoch_3c(model, dl, edge_index, target_node):
    model.eval()
    total_loss, n = 0.0, 0

    all_logits, all_y, all_ex = [], [], []
    for xb, eb, ytr, ydr, ytb_b, exb in dl:
        xb = xb.to(DEVICE)
        eb = eb.to(DEVICE)
        ytb = ytb_b.to(DEVICE)

        logits3 = model(xb, eb, edge_index, target_node)
        loss = F.cross_entropy(logits3, ytb)

        bs = xb.size(0)
        total_loss += float(loss.item()) * bs
        n += bs

        all_logits.append(logits3.detach().cpu())
        all_y.append(ytb_b.numpy())
        all_ex.append(exb.numpy())

    va_loss = total_loss / max(1, n)

    logits = torch.cat(all_logits, dim=0)             # (N,3)
    p3 = probs_from_logits3(logits).numpy()           # (N,3)
    y = np.concatenate(all_y).astype(np.int64)        # (N,)
    ex = np.concatenate(all_ex).astype(np.float32)    # (N,)

    # классификация "как есть" по argmax
    pred_argmax = np.argmax(p3, axis=1)
    f1m = float(f1_score(y, pred_argmax, average="macro"))

    # AUC trade (flat vs trade): p_trade = 1 - p_flat
    y_trade_true = (y != 1).astype(np.int64)
    p_trade, _ = derive_p_trade_and_p_up_cond(p3)
    auc_trade = safe_auc_binary(y_trade_true, p_trade)

    # optional: multiclass ovr auc
    auc_ovr_3c = safe_auc_ovr_multiclass(y, p3)

    # best PnL by thresholds
    thr_trade_grid = CFG["thr_trade_grid"] if CFG["proxy_thr_trade_grid"] is None else CFG["proxy_thr_trade_grid"]
    thr_dir_grid   = CFG["thr_dir_grid"]   if CFG["proxy_thr_dir_grid"]   is None else CFG["proxy_thr_dir_grid"]

    best = pnl_grid_search_from_p3(
        p3, y, ex,
        cost_bps=CFG["cost_bps"],
        thr_trade_grid=thr_trade_grid,
        thr_dir_grid=thr_dir_grid,
        min_trades=CFG["proxy_min_trades"],
    )

    if best["thr_trade"] is not None:
        pred_best = derive_pred_tb_from_two_thresholds(p3, best["thr_trade"], best["thr_dir"])
        net = pnl_from_preds(pred_best, ex, CFG["cost_bps"])
        st = pnl_stats(net, pred_best)
    else:
        st = {"sum": float("nan"), "mean": float("nan"), "std": float("nan"), "sharpe": float("nan"), "trades": 0}

    return {
        "va_loss": va_loss,
        "f1m_3c": f1m,
        "va_auc": auc_trade,
        "auc_ovr_3c": auc_ovr_3c,

        "pnl_best_sum": st["sum"],
        "pnl_best_mean": st["mean"],
        "pnl_best_std": st["std"],
        "pnl_best_sharpe": st["sharpe"],
        "pnl_best_trades": st["trades"],

        "best_thr_trade": best["thr_trade"],
        "best_thr_dir": best["thr_dir"],
    }

def run_fold_training_3c(fold_id: int, dl_train, dl_val, dl_test):
    model = GNNTemporal3Class(node_in=node_in_dim, edge_in=edge_in_dim).to(DEVICE)
    opt = torch.optim.AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"])
    edge_index = EDGE_INDEX.to(DEVICE)

    # выбираем лучшую эпоху по PnL на val
    best_key = "pnl_best_sum"
    best_val = -1e18
    best_state = None

    for ep in range(1, CFG["epochs"] + 1):
        tr_loss = train_one_epoch_3c(model, dl_train, opt, edge_index, TARGET_NODE)
        va = eval_one_epoch_3c(model, dl_val, edge_index, TARGET_NODE)

        key = va[best_key]
        if key == key and key > best_val:  # not nan
            best_val = key
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

        print(f"[FOLD {fold_id}] ep {ep:02d} lr={CFG['lr']:.2e} "
              f"tr_loss={tr_loss:.4f} va_loss={va['va_loss']:.4f} "
              f"f1={va['f1m_3c']:.3f} va_auc={va['va_auc']:.3f} "
              f"pnl_sum={va['pnl_best_sum']:.4f} pnl_mean={va['pnl_best_mean']:.6f} "
              f"pnl_std={va['pnl_best_std']:.6f} sharpe~={va['pnl_best_sharpe']:.3f} "
              f"trades={va['pnl_best_trades']} thr=({va['best_thr_trade']},{va['best_thr_dir']}) "
              f"auc_ovr3c={va['auc_ovr_3c']:.3f}")

    if best_state is not None:
        model.load_state_dict(best_state)

    te = eval_one_epoch_3c(model, dl_test, edge_index, TARGET_NODE)
    print(f"[FOLD {fold_id}] TEST: loss={te['va_loss']:.4f} f1={te['f1m_3c']:.3f} "
          f"va_auc={te['va_auc']:.3f} pnl_sum={te['pnl_best_sum']:.4f} "
          f"pnl_mean={te['pnl_best_mean']:.6f} pnl_std={te['pnl_best_std']:.6f} "
          f"sharpe~={te['pnl_best_sharpe']:.3f} trades={te['pnl_best_trades']} "
          f"thr=({te['best_thr_trade']},{te['best_thr_dir']}) auc_ovr3c={te['auc_ovr_3c']:.3f}")

    return {"test": te}

print("OK: 3-class train/eval v2 (best by PnL) ready.")


OK: 3-class train/eval v2 (best by PnL) ready.


## 10. Run folds: scale once → train trade → filter trades → train dir → PnL sweep

In [61]:
# ==================================
# Step 12 (REPLACE v2). Walk-forward CV training (3-class) + extended summary
# ==================================
# ЛОГИЧЕСКИЙ БЛОК: прогон по фолдам + summary
# ИСПОЛНЕНИЕ БЛОКА:

results = []
for fi, (idx_tr, idx_va, idx_te) in enumerate(walk_splits, start=1):
    dl_tr, dl_va, dl_te, _ = make_loaders_for_fold(
        X_node_raw=X_node_raw,
        edge_feat=edge_feat,
        sample_t=sample_t_cv,
        y_tb=y_tb,
        y_trade=y_trade,
        y_dir=y_dir,
        exit_ret=exit_ret,
        idx_train=idx_tr,
        idx_val=idx_va,
        idx_test=idx_te,
        batch_size=CFG["batch_size"],
    )
    out = run_fold_training_3c(fi, dl_tr, dl_va, dl_te)
    results.append(out)

print("\n=== SUMMARY (per fold test) ===")
for i, r in enumerate(results, start=1):
    te = r["test"]
    print(f"fold {i}: f1={te['f1m_3c']:.3f} va_auc={te['va_auc']:.3f} auc_ovr3c={te['auc_ovr_3c']:.3f} "
          f"pnl_sum={te['pnl_best_sum']:.4f} pnl_mean={te['pnl_best_mean']:.6f} "
          f"pnl_std={te['pnl_best_std']:.6f} sharpe~={te['pnl_best_sharpe']:.3f} "
          f"trades={te['pnl_best_trades']}")




[FOLD 1] ep 01 lr=2.00e-04 tr_loss=0.9883 va_loss=0.9627 f1=0.254 va_auc=0.379 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.400
[FOLD 1] ep 02 lr=2.00e-04 tr_loss=0.9646 va_loss=0.9556 f1=0.254 va_auc=0.359 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.403
[FOLD 1] ep 03 lr=2.00e-04 tr_loss=0.9535 va_loss=0.9459 f1=0.254 va_auc=0.375 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.420
[FOLD 1] ep 04 lr=2.00e-04 tr_loss=0.9366 va_loss=0.9447 f1=0.254 va_auc=0.422 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.462
[FOLD 1] ep 05 lr=2.00e-04 tr_loss=0.9327 va_loss=0.9429 f1=0.254 va_auc=0.438 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.501
[FOLD 1] ep 06 lr=2.00e-04 tr_loss=0.9187 va_loss=0.9640 f1=0.254 va_auc=0.387 pnl_sum=-0.0187 pnl_mean=-0.000415 pnl_std=0.005227 sharpe~=-0.533 trades=45



[FOLD 2] ep 01 lr=2.00e-04 tr_loss=1.0513 va_loss=0.8456 f1=0.286 va_auc=0.371 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.334
[FOLD 2] ep 02 lr=2.00e-04 tr_loss=0.9576 va_loss=0.7732 f1=0.286 va_auc=0.331 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.310
[FOLD 2] ep 03 lr=2.00e-04 tr_loss=0.9466 va_loss=0.7695 f1=0.286 va_auc=0.318 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.294
[FOLD 2] ep 04 lr=2.00e-04 tr_loss=0.9342 va_loss=0.8165 f1=0.286 va_auc=0.306 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.281
[FOLD 2] ep 05 lr=2.00e-04 tr_loss=0.9249 va_loss=0.8005 f1=0.286 va_auc=0.396 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.346
[FOLD 2] ep 06 lr=2.00e-04 tr_loss=0.9087 va_loss=0.8760 f1=0.286 va_auc=0.435 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) au



[FOLD 3] ep 01 lr=2.00e-04 tr_loss=0.9938 va_loss=1.4582 f1=0.156 va_auc=0.262 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.398
[FOLD 3] ep 02 lr=2.00e-04 tr_loss=0.9482 va_loss=1.4432 f1=0.156 va_auc=0.279 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.351
[FOLD 3] ep 03 lr=2.00e-04 tr_loss=0.9236 va_loss=1.3967 f1=0.156 va_auc=0.279 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.356
[FOLD 3] ep 04 lr=2.00e-04 tr_loss=0.9155 va_loss=1.4742 f1=0.156 va_auc=0.322 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.343
[FOLD 3] ep 05 lr=2.00e-04 tr_loss=0.9102 va_loss=1.6462 f1=0.156 va_auc=0.315 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=0.331
[FOLD 3] ep 06 lr=2.00e-04 tr_loss=0.9019 va_loss=1.7912 f1=0.156 va_auc=0.316 pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) au



[FOLD 4] ep 01 lr=2.00e-04 tr_loss=1.0079 va_loss=nan f1=0.148 va_auc=nan pnl_sum=nan pnl_mean=nan pnl_std=nan sharpe~=nan trades=0 thr=(None,None) auc_ovr3c=nan
[FOLD 4] ep 02 lr=2.00e-04 tr_loss=0.9835 va_loss=nan f1=0.148 va_auc=nan pnl_sum=0.1393 pnl_mean=0.002247 pnl_std=0.008850 sharpe~=1.999 trades=62 thr=(0.5,0.5) auc_ovr3c=nan
[FOLD 4] ep 03 lr=2.00e-04 tr_loss=0.9711 va_loss=nan f1=0.148 va_auc=nan pnl_sum=0.0743 pnl_mean=0.000722 pnl_std=0.009255 sharpe~=0.791 trades=103 thr=(0.55,0.5) auc_ovr3c=nan
[FOLD 4] ep 04 lr=2.00e-04 tr_loss=0.9552 va_loss=nan f1=0.322 va_auc=nan pnl_sum=-0.0558 pnl_mean=-0.000473 pnl_std=0.009210 sharpe~=-0.557 trades=118 thr=(0.65,0.5) auc_ovr3c=nan
[FOLD 4] ep 05 lr=2.00e-04 tr_loss=0.9426 va_loss=nan f1=0.288 va_auc=nan pnl_sum=0.2146 pnl_mean=0.004470 pnl_std=0.007295 sharpe~=4.245 trades=48 thr=(0.6,0.5) auc_ovr3c=nan
[FOLD 4] ep 06 lr=2.00e-04 tr_loss=0.9188 va_loss=nan f1=0.214 va_auc=nan pnl_sum=0.2193 pnl_mean=0.004872 pnl_std=0.007145 sha