<a href="https://colab.research.google.com/github/yshnxd/solaris/blob/main/SOLARISSS%20stablezzz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup Libraries

In [1]:
# === STEP 0: Setup Libraries ===
# Core
import numpy as np
import pandas as pd
import gc
import os
import warnings
warnings.filterwarnings("ignore")

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Technical indicators & TA-Lib alternative
!pip install ta --quiet
import ta

# Machine Learning
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, mean_absolute_error, mean_squared_error
)

# XGBoost
!pip install xgboost --quiet
from xgboost import XGBClassifier, XGBRegressor

# Deep Learning (TensorFlow/Keras)
!pip install tensorflow --quiet
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    Dense, Dropout, Flatten, Conv1D, MaxPooling1D,
    LSTM, Input, BatchNormalization, GlobalAveragePooling1D
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Utilities for reproducibility
import random
import tensorflow as tf

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

print("✅ Libraries loaded successfully.")


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ta (setup.py) ... [?25l[?25hdone
✅ Libraries loaded successfully.


# Collect Data

In [2]:
# === STEP 1: Data Collection (Hourly) ===
!pip install yfinance --quiet
import yfinance as yf
from datetime import datetime
import pandas as pd
import os

# Target + market context tickers
tickers = ["AAPL", "SPY", "TSLA", "NVDA", "QQQ"]  # note: ^VIX for Yahoo
interval = "60m"  # 1-hour bars
period = "729d"   # max allowed for hourly

data_dict = {}
print("Downloading hourly data...")
for t in tickers:
    try:
        df = yf.download(t, interval=interval, period=period, progress=False)
        df.dropna(inplace=True)
        df.index = df.index.tz_localize(None)
        data_dict[t] = df
        print(f"{t}: {df.shape[0]} rows from {df.index.min()} to {df.index.max()}")
    except Exception as e:
        print(f"❌ Failed to get {t}: {e}")
# ✅ Replace old close_df creation with this
target_index = data_dict["AAPL"].index
aligned_close = pd.DataFrame(index=target_index)

for t, df in data_dict.items():
    aligned_close[t] = df.reindex(target_index)['Close']

print("\nSample aligned close prices:")
print(aligned_close.tail())

# Save raw hourly data
os.makedirs("data_raw", exist_ok=True)
for t, df in data_dict.items():
    df.to_csv(f"data_raw/{t}_60m.csv")
print("\n✅ Hourly data downloaded and saved to 'data_raw/'")


Downloading hourly data...
AAPL: 5075 rows from 2022-09-15 13:30:00 to 2025-08-12 19:30:00
SPY: 5075 rows from 2022-09-15 13:30:00 to 2025-08-12 19:30:00
TSLA: 5075 rows from 2022-09-15 13:30:00 to 2025-08-12 19:30:00
NVDA: 5075 rows from 2022-09-15 13:30:00 to 2025-08-12 19:30:00
QQQ: 5075 rows from 2022-09-15 13:30:00 to 2025-08-12 19:30:00

Sample aligned close prices:
                           AAPL         SPY        TSLA        NVDA  \
Datetime                                                              
2025-08-12 15:30:00  229.779999  641.379272  338.031494  182.274994   
2025-08-12 16:30:00  229.774994  641.539978  338.790009  182.240005   
2025-08-12 17:30:00  229.570007  642.219971  339.413696  183.065002   
2025-08-12 18:30:00  229.182999  642.320007  340.859985  182.784500   
2025-08-12 19:30:00  229.649994  642.630005  340.755005  183.100006   

                            QQQ  
Datetime                         
2025-08-12 15:30:00  578.659973  
2025-08-12 16:30:00  578.

#Feature Creation

Creating Features

In [3]:
all_feat_data = []

# Forward-fill aligned_close once globally
aligned_ffill = aligned_close.ffill()

for ticker in aligned_ffill.columns:
    if aligned_ffill[ticker].isna().all():
        continue

    price_series = aligned_ffill[ticker]
    feat_tmp = pd.DataFrame(index=price_series.index)

    # Lag returns
    for lag in [1, 3, 6, 12, 24]:
        feat_tmp[f"ret_{lag}h"] = price_series.pct_change(lag)

    # Rolling volatility
    for window in [6, 12, 24]:
        feat_tmp[f"vol_{window}h"] = price_series.pct_change().rolling(window).std()

    # Technical indicators
    feat_tmp["rsi_14"] = ta.momentum.RSIIndicator(price_series, window=14).rsi()
    macd = ta.trend.MACD(price_series)
    feat_tmp["macd"] = macd.macd()
    feat_tmp["macd_signal"] = macd.macd_signal()

    # Moving averages
    for w in [5, 10, 20]:
        feat_tmp[f"sma_{w}"] = price_series.rolling(w).mean()
        feat_tmp[f"ema_{w}"] = price_series.ewm(span=w, adjust=False).mean()

    # Volume features
    if ticker in data_dict and "Volume" in data_dict[ticker].columns:
        vol_series = data_dict[ticker].reindex(price_series.index)["Volume"].ffill()
        feat_tmp["vol_change_1h"] = vol_series.pct_change()
        feat_tmp["vol_ma_24h"] = vol_series.rolling(24).mean()

    # Cross-asset returns — from the globally ffilled dataframe
    for asset in ["SPY", "QQQ", "NVDA"]:
        if asset in aligned_ffill.columns:
            feat_tmp[f"{asset}_ret_1h"] = aligned_ffill[asset].pct_change()

    if "^VIX" in aligned_ffill.columns:
        feat_tmp["vix_ret_1h"] = aligned_ffill["^VIX"].pct_change()

    # Calendar features
    feat_tmp["hour"] = feat_tmp.index.hour
    feat_tmp["day_of_week"] = feat_tmp.index.dayofweek

    # Only drop rows with NaNs in features for THIS ticker
    feat_tmp = feat_tmp.dropna(subset=[col for col in feat_tmp.columns if col not in ["datetime", "ticker"]])

    feat_tmp["datetime"] = feat_tmp.index
    feat_tmp["ticker"] = ticker

    all_feat_data.append(feat_tmp.reset_index(drop=True))

features_df = pd.concat(all_feat_data, ignore_index=True)

print(f"✅ Created features for {features_df['ticker'].nunique()} tickers")
print("Shape:", features_df.shape)
print(features_df.head())


✅ Created features for 5 tickers
Shape: (25210, 26)
     ret_1h    ret_3h    ret_6h   ret_12h   ret_24h    vol_6h   vol_12h  \
0  0.003927 -0.009147 -0.007584  0.005846  0.046665  0.007817  0.007090   
1 -0.012844 -0.022400 -0.023381 -0.018455  0.029676  0.008701  0.007126   
2 -0.008392 -0.017286 -0.023793 -0.030869  0.015118  0.008742  0.007124   
3 -0.007873 -0.028836 -0.037719 -0.030889  0.002985  0.007122  0.007125   
4  0.005376 -0.010910 -0.033066 -0.026802  0.010589  0.008245  0.007425   

    vol_24h     rsi_14      macd  ...      ema_20  vol_change_1h  \
0  0.005565  54.275636  0.990354  ...  155.298311       0.531565   
1  0.006314  44.979346  0.740424  ...  155.147044      -0.481519   
2  0.006528  40.196954  0.433265  ...  154.887325       0.716998   
3  0.006707  36.327580  0.091949  ...  154.538056      -0.441574   
4  0.006770  40.506051 -0.111656  ...  154.299480      -0.340051   

     vol_ma_24h  SPY_ret_1h  QQQ_ret_1h  NVDA_ret_1h  hour  day_of_week  \
0  1.222141e+

Label Creation

In [4]:
# === LABEL CREATION FOR ALL TICKERS (pooled dataset) ===

horizon = 1               # predict 1 hour ahead
vol_lookback = 24         # hours to compute rolling volatility
vol_multiplier = 0.5      # threshold scaling vs volatility

all_data = []

for ticker in aligned_close.columns:
    # Skip if ticker is all NaN (e.g., ^VIX alignment issues)
    if aligned_close[ticker].dropna().empty:
        continue

    price_series = aligned_close[ticker]

    # Forward return
    future_price = price_series.shift(-horizon)
    future_ret = (future_price - price_series) / price_series

    # Volatility-based threshold
    rolling_vol = price_series.pct_change().rolling(vol_lookback).std()
    threshold = rolling_vol * vol_multiplier

    # Label creation
    label = future_ret.copy()
    label[future_ret > threshold] = 1    # Up
    label[future_ret < -threshold] = -1  # Down
    label[(future_ret <= threshold) & (future_ret >= -threshold)] = 0  # Neutral

    # Drop NaNs
    label = label.dropna()

    # Combine into dataframe
    df_tmp = pd.DataFrame({
        "datetime": label.index,
        "ticker": ticker,
        "price": price_series.loc[label.index],
        "label": label.values,
        "future_ret": future_ret.loc[label.index],
        "volatility": rolling_vol.loc[label.index]
    })

    all_data.append(df_tmp)

# Combine all tickers
labels_df = pd.concat(all_data, ignore_index=True)

print("Combined dataset shape:", labels_df.shape)
print(labels_df["label"].value_counts(normalize=True))
labels_df.head(10)


Combined dataset shape: (25370, 6)
label
 0.000000    0.532519
 1.000000    0.244974
-1.000000    0.217777
-0.005652    0.000039
-0.010207    0.000039
               ...   
 0.001342    0.000039
-0.007730    0.000039
 0.003572    0.000039
-0.000854    0.000039
-0.006713    0.000039
Name: proportion, Length: 123, dtype: float64


Unnamed: 0,datetime,ticker,price,label,future_ret,volatility
0,2022-09-15 13:30:00,AAPL,153.809998,-0.006306,-0.006306,
1,2022-09-15 14:30:00,AAPL,152.839996,-0.002486,-0.002486,
2,2022-09-15 15:30:00,AAPL,152.460007,0.009543,0.009543,
3,2022-09-15 16:30:00,AAPL,153.914993,-0.005652,-0.005652,
4,2022-09-15 17:30:00,AAPL,153.044998,-0.010207,-0.010207,
5,2022-09-15 18:30:00,AAPL,151.482895,0.005724,0.005724,
6,2022-09-15 19:30:00,AAPL,152.350006,-0.018969,-0.018969,
7,2022-09-16 13:30:00,AAPL,149.460007,-0.002141,-0.002141,
8,2022-09-16 14:30:00,AAPL,149.139999,-0.002496,-0.002496,
9,2022-09-16 15:30:00,AAPL,148.767807,0.003443,0.003443,


Scaling

# Preprocessing

Normalize Features

In [5]:
# Merge features with labels
df = features_df.merge(labels_df, on=["datetime", "ticker"], how="inner")

# Drop NaNs (just in case)
df = df.dropna()

# Separate features & labels
X = df.drop(columns=["datetime", "ticker", "label", "future_ret"])
y = df["label"]

print("X shape:", X.shape)
print("y distribution:\n", y.value_counts(normalize=True))


X shape: (25205, 26)
y distribution:
 label
 0.0    0.535013
 1.0    0.246102
-1.0    0.218885
Name: proportion, dtype: float64


Scale

In [6]:
from sklearn.preprocessing import StandardScaler
import numpy as np

# Merge features and labels
df = features_df.merge(labels_df, on=["datetime", "ticker"], how="inner")

# Sort by time
df = df.sort_values(["datetime", "ticker"]).reset_index(drop=True)

# Replace inf values with NaN
df = df.replace([np.inf, -np.inf], np.nan)

# Drop rows with NaNs
df = df.dropna()

# Separate features & target
X = df.drop(columns=["datetime", "ticker", "label", "future_ret"])
y = df["label"]

# Time-based split
train_size = int(len(df) * 0.7)
val_size = int(len(df) * 0.15)

X_train = X.iloc[:train_size]
y_train = y.iloc[:train_size]

X_val = X.iloc[train_size:train_size + val_size]
y_val = y.iloc[train_size:train_size + val_size]

X_test = X.iloc[train_size + val_size:]
y_test = y.iloc[train_size + val_size:]

# Ensure all values are finite before scaling
assert np.isfinite(X_train.values).all(), "Found non-finite values in X_train!"

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

print(f"✅ Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")
print("Label distribution in Train:", y_train.value_counts(normalize=True))


✅ Train: (17631, 26), Val: (3778, 26), Test: (3779, 26)
Label distribution in Train: label
 0.0    0.525665
 1.0    0.251943
-1.0    0.222392
Name: proportion, dtype: float64


Sequence making - For LSTM AND CNN

In [7]:
import numpy as np

def create_sequences(X, y, seq_len=24):
    """
    Convert tabular (samples, features) into sequential (samples, seq_len, features)
    for CNN/LSTM, keeping labels aligned to the last timestep.
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i+seq_len])
        y_seq.append(y[i+seq_len])  # label at next hour
    return np.array(X_seq), np.array(y_seq)

# === Choose sequence length ===
SEQ_LEN = 24  # last 24 hours to predict next hour

# Reshape train/val/test sets
X_train_seq, y_train_seq = create_sequences(X_train, y_train.values, SEQ_LEN)
X_val_seq,   y_val_seq   = create_sequences(X_val,   y_val.values,   SEQ_LEN)
X_test_seq,  y_test_seq  = create_sequences(X_test,  y_test.values,  SEQ_LEN)

print(f"Train seq: {X_train_seq.shape}, Val seq: {X_val_seq.shape}, Test seq: {X_test_seq.shape}")


Train seq: (17607, 24, 26), Val seq: (3754, 24, 26), Test seq: (3755, 24, 26)


In [8]:
# CELL 1 — label encoding + class weights
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.utils import to_categorical

# mapping: -1 -> 0 (down), 0 -> 1 (neutral), 1 -> 2 (up)
label_map = { -1.0: 0, 0.0: 1, 1.0: 2 }

# If your y_* are numpy arrays (seq labels), convert
y_train_seq_mapped = np.vectorize(label_map.get)(y_train_seq)
y_val_seq_mapped   = np.vectorize(label_map.get)(y_val_seq)
y_test_seq_mapped  = np.vectorize(label_map.get)(y_test_seq)

# one-hot for Keras
y_train_cat = to_categorical(y_train_seq_mapped, num_classes=3)
y_val_cat   = to_categorical(y_val_seq_mapped, num_classes=3)
y_test_cat  = to_categorical(y_test_seq_mapped, num_classes=3)

# compute class weights from training sequence labels
classes = np.unique(y_train_seq_mapped)
class_weights = compute_class_weight("balanced", classes=classes, y=y_train_seq_mapped)
class_weights_dict = {int(c): w for c, w in zip(classes, class_weights)}
print("Class weights:", class_weights_dict)
print("Train class distribution:", np.bincount(y_train_seq_mapped) / len(y_train_seq_mapped))


Class weights: {0: np.float64(1.5025601638504864), 1: np.float64(0.6336644353271431), 2: np.float64(1.322144627168281)}
Train class distribution: [0.22184358 0.52604078 0.25211564]


# Build the Model

CNN

In [35]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, Activation, Dropout, SpatialDropout1D,
    GlobalAveragePooling1D, GlobalMaxPooling1D, Dense, Add, Multiply, Concatenate
)
from tensorflow.keras.regularizers import l2

# try to use AdamW (TF nightly / TF >=2.11 has experimental AdamW); fallback to classic Adam
try:
    from tensorflow.keras.optimizers import experimental as exp_optimizers
    AdamW = exp_optimizers.AdamW
    use_adamw = True
except Exception:
    from tensorflow.keras.optimizers import Adam
    AdamW = None
    use_adamw = False

def se_block(x, reduction=8):
    """Squeeze-and-Excitation block."""
    channels = int(x.shape[-1])
    se = GlobalAveragePooling1D()(x)
    se = tf.keras.layers.Dense(channels // reduction, activation='relu', kernel_initializer='he_normal')(se)
    se = tf.keras.layers.Dense(channels, activation='sigmoid', kernel_initializer='he_normal')(se)
    se = tf.keras.layers.Reshape((1, channels))(se)
    return Multiply()([x, se])

def residual_block(x, filters, kernel_size, dropout_rate=0.2, weight_decay=1e-4):
    shortcut = x
    # projection if channels mismatch
    if x.shape[-1] != filters:
        shortcut = Conv1D(filters, kernel_size=1, padding='same',
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(weight_decay))(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Conv1D(filters, kernel_size=kernel_size, padding='same',
               kernel_initializer='he_normal',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    # spatial dropout is preferable for time-series channels
    x = SpatialDropout1D(dropout_rate)(x)

    x = Conv1D(filters, kernel_size=kernel_size, padding='same',
               kernel_initializer='he_normal',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)

    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    return x

def build_cnn_enhanced(input_shape, n_classes=3, dropout_rate=0.25, weight_decay=1e-4):
    inp = Input(shape=input_shape)

    # optionally normalize channels at input
    x = BatchNormalization()(inp)

    # initial conv
    x = Conv1D(64, kernel_size=3, padding='same',
               kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # residual blocks (reduce complexity growth if dataset small)
    x = residual_block(x, 64, 3, dropout_rate=dropout_rate, weight_decay=weight_decay)
    x = residual_block(x, 128, 5, dropout_rate=dropout_rate, weight_decay=weight_decay)
    x = residual_block(x, 256, 3, dropout_rate=dropout_rate, weight_decay=weight_decay)

    # attention
    x = se_block(x, reduction=8)

    # pooling combination
    gap = GlobalAveragePooling1D()(x)
    gmp = GlobalMaxPooling1D()(x)
    x = Concatenate()([gap, gmp])  # combined representation

    x = Dense(256, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(128, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x)
    x = Dropout(dropout_rate)(x)
    out = Dense(n_classes, activation='softmax')(x)

    model = Model(inp, out)

    # optimizer: try AdamW if available, otherwise Adam with weight decay via L2 above
    if use_adamw and AdamW is not None:
        opt = AdamW(learning_rate=1e-3, weight_decay=1e-5)
    else:
        opt = tf.keras.optimizers.Adam(learning_rate=1e-3)

    model.compile(
        optimizer=opt,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Build model (example)
cnn_model = build_cnn_enhanced(input_shape=X_train_seq.shape[1:], n_classes=3, dropout_rate=0.25, weight_decay=1e-4)
cnn_model.summary()



LSTM

In [38]:
from tensorflow.keras.layers import (
    Input, Bidirectional, LSTM, GRU, Dense, Dropout,
    LayerNormalization, GlobalAveragePooling1D, GlobalMaxPooling1D,
    Concatenate, Attention
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

def build_lstm_enhanced(input_shape, n_classes=3, dropout_rate=0.25):
    inp = Input(shape=input_shape)
    x = LayerNormalization()(inp)

    # First BiLSTM layer
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.2)(x)

    # Second BiGRU layer (faster and adds diversity in sequence modeling)
    x = Bidirectional(GRU(64, return_sequences=True))(x)
    x = Dropout(0.2)(x)

    # Self-Attention layer
    attn_data = Attention()([x, x])
    x = Concatenate()([x, attn_data])  # fuse original and attention output

    # Pooling
    x_avg = GlobalAveragePooling1D()(x)
    x_max = GlobalMaxPooling1D()(x)
    x = Concatenate()([x_avg, x_max])

    # Dense layers
    x = Dense(256, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(dropout_rate)(x)

    out = Dense(n_classes, activation='softmax')(x)

    model = Model(inp, out)
    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Instantiate model
lstm_model = build_lstm_enhanced(input_shape=X_train_seq.shape[1:], n_classes=3, dropout_rate=0.25)
lstm_model.summary()


XGBOOST

In [29]:
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ---- Map labels safely to integers 0,1,2 ----
label_map = {-1.0: 0, 0.0: 1, 1.0: 2}
# if y_train etc are pandas Series this is robust:
y_train_tab = pd.Series(y_train).map(label_map).astype('int').to_numpy()
y_val_tab   = pd.Series(y_val).map(label_map).astype('int').to_numpy()
y_test_tab  = pd.Series(y_test).map(label_map).astype('int').to_numpy()

# ---- Per-sample balanced weights (recommended for multiclass) ----
sample_weight_train = compute_sample_weight('balanced', y_train_tab)
# sample_weight_train is length n_train; pass it into .fit(..., sample_weight=...)

# ---- Defensive: compute per-class counts (avoid division by zero) ----
class_counts = np.bincount(y_train_tab, minlength=3)
total = len(y_train_tab)
n_classes = len(class_counts)
per_class_scale = np.where(class_counts == 0, 0.0, total / (n_classes * class_counts))
print("class_counts:", class_counts, "per_class_scale:", per_class_scale)

# If you prefer per-sample weights from per_class_scale:
sample_weight_from_scale = np.array([per_class_scale[c] for c in y_train_tab])

# ---- XGBoost classifier (multi-class) ----
xgb_clf = xgb.XGBClassifier(
    objective='multi:softprob',    # multiclass probability output
    num_class=3,                   # explicit number of classes (safe to include)
    n_estimators=1000,             # set high + use early stopping
    learning_rate=0.03,
    max_depth=6,
    min_child_weight=3,
    gamma=1,
    subsample=0.85,
    colsample_bytree=0.85,
    reg_alpha=0.1,
    reg_lambda=1.0,
    random_state=42,
    n_jobs=-1,
    tree_method='hist',            # 'gpu_hist' if you have GPU
    eval_metric='mlogloss',
    use_label_encoder=False
)


class_counts: [3921 9268 4442] per_class_scale: [1.49885233 0.63411739 1.32305268]


# Train the model

CNN

In [37]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
import tensorflow as tf
import datetime

# Enable mixed precision if supported
if tf.config.list_physical_devices('GPU'):
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

# Compile with lower LR initially for stability
cnn_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
es = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

rlr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=4,
    min_lr=1e-6,
    verbose=1
)

chk = ModelCheckpoint(
    filepath='best_cnn_full_model.keras',  # save full model, not just weights
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

# TensorBoard logging
log_dir = "logs/cnn/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Training
cnn_history = cnn_model.fit(
    X_train_seq, y_train_cat,
    validation_data=(X_val_seq, y_val_cat),
    epochs=100,
    batch_size=128,
    class_weight=class_weights_dict,
    callbacks=[es, rlr, chk, tb_callback],
    verbose=2
)


Epoch 1/100

Epoch 1: val_loss improved from inf to 1.33763, saving model to best_cnn_full_model.keras
138/138 - 57s - 411ms/step - accuracy: 0.5117 - loss: 1.5105 - val_accuracy: 0.5493 - val_loss: 1.3376 - learning_rate: 5.0000e-04
Epoch 2/100

Epoch 2: val_loss improved from 1.33763 to 1.32411, saving model to best_cnn_full_model.keras
138/138 - 45s - 328ms/step - accuracy: 0.5260 - loss: 1.4473 - val_accuracy: 0.5493 - val_loss: 1.3241 - learning_rate: 5.0000e-04
Epoch 3/100

Epoch 3: val_loss improved from 1.32411 to 1.30621, saving model to best_cnn_full_model.keras
138/138 - 83s - 598ms/step - accuracy: 0.5260 - loss: 1.4246 - val_accuracy: 0.5493 - val_loss: 1.3062 - learning_rate: 5.0000e-04
Epoch 4/100

Epoch 4: val_loss improved from 1.30621 to 1.30310, saving model to best_cnn_full_model.keras
138/138 - 45s - 323ms/step - accuracy: 0.5269 - loss: 1.4097 - val_accuracy: 0.5493 - val_loss: 1.3031 - learning_rate: 5.0000e-04
Epoch 5/100

Epoch 5: val_loss improved from 1.30310

LSTM

In [39]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
import tensorflow as tf
import datetime

# Enable mixed precision if GPU supports it (LSTMs benefit less than CNNs, but still good for speed)
if tf.config.list_physical_devices('GPU'):
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

# Compile LSTM with gradient clipping (helps with exploding gradients in RNNs)
lstm_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4, clipnorm=1.0),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
es = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

rlr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=4,
    min_lr=1e-6,
    verbose=1
)

chk = ModelCheckpoint(
    filepath='best_lstm_full_model.keras',  # Save entire model
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

# TensorBoard logging
log_dir = "logs/lstm/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Training
lstm_history = lstm_model.fit(
    X_train_seq, y_train_cat,
    validation_data=(X_val_seq, y_val_cat),
    epochs=120,  # Give it more room to converge
    batch_size=96,  # Smaller batch size often helps LSTM generalization
    class_weight=class_weights_dict,
    callbacks=[es, rlr, chk, tb_callback],
    verbose=2
)


Epoch 1/120

Epoch 1: val_loss improved from inf to 0.99749, saving model to best_lstm_full_model.keras
184/184 - 52s - 280ms/step - accuracy: 0.5216 - loss: 1.1150 - val_accuracy: 0.5495 - val_loss: 0.9975 - learning_rate: 3.0000e-04
Epoch 2/120

Epoch 2: val_loss improved from 0.99749 to 0.99479, saving model to best_lstm_full_model.keras
184/184 - 42s - 228ms/step - accuracy: 0.5262 - loss: 1.0887 - val_accuracy: 0.5479 - val_loss: 0.9948 - learning_rate: 3.0000e-04
Epoch 3/120

Epoch 3: val_loss did not improve from 0.99479
184/184 - 83s - 451ms/step - accuracy: 0.5266 - loss: 1.0800 - val_accuracy: 0.5509 - val_loss: 1.0005 - learning_rate: 3.0000e-04
Epoch 4/120

Epoch 4: val_loss did not improve from 0.99479
184/184 - 84s - 454ms/step - accuracy: 0.5280 - loss: 1.0736 - val_accuracy: 0.5490 - val_loss: 0.9997 - learning_rate: 3.0000e-04
Epoch 5/120

Epoch 5: val_loss did not improve from 0.99479
184/184 - 80s - 436ms/step - accuracy: 0.5294 - loss: 1.0666 - val_accuracy: 0.5503 

XGBOOST

In [36]:
!pip install optuna --quiet

import optuna
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.metrics import accuracy_score

# --- ensure label mapping (same as before) ---
label_map = {-1.0: 0, 0.0: 1, 1.0: 2}
y_train_tab = pd.Series(y_train).map(label_map).astype(int).to_numpy()
y_val_tab   = pd.Series(y_val).map(label_map).astype(int).to_numpy()
y_test_tab  = pd.Series(y_test).map(label_map).astype(int).to_numpy()

# --- sample weights (multiclass) ---
sw_train = compute_sample_weight(class_weight='balanced', y=y_train_tab)
sw_val   = compute_sample_weight(class_weight='balanced', y=y_val_tab)

# helper: robust predict that works across xgboost versions
def predict_with_best(booster: xgb.Booster, dmatrix: xgb.DMatrix):
    """
    Try several predict call styles in order:
      1) predict(..., iteration_range=(0, best_iteration+1)) if best_iteration exists
      2) predict(..., ntree_limit=best_ntree_limit) if supported
      3) fallback: predict(dmatrix)
    """
    # 1) iteration_range (newer interface)
    try:
        if hasattr(booster, "best_iteration") and booster.best_iteration is not None:
            return booster.predict(dmatrix, iteration_range=(0, int(booster.best_iteration) + 1))
    except TypeError:
        # some versions may not accept iteration_range
        pass
    # 2) ntree_limit (older interface)
    try:
        if hasattr(booster, "best_ntree_limit") and booster.best_ntree_limit is not None:
            return booster.predict(dmatrix, ntree_limit=int(booster.best_ntree_limit))
    except TypeError:
        pass
    # 3) plain predict fallback
    return booster.predict(dmatrix)

# prepare DMatrix for validation & test once outside objective (weights included)
dval = xgb.DMatrix(X_val, label=y_val_tab, weight=sw_val)
dtest = xgb.DMatrix(X_test)

# Optuna objective (robust)
def objective(trial):
    params = {
        'objective': 'multi:softprob',
        'num_class': 3,
        'eval_metric': 'mlogloss',
        'tree_method': 'hist',
        'seed': 42,
        'verbosity': 0,
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 5.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.5, 3.0),
    }

    # create DMatrix for the training fold (weights applied)
    dtrain = xgb.DMatrix(X_train, label=y_train_tab, weight=sw_train)

    try:
        bst = xgb.train(
            params,
            dtrain,
            num_boost_round=1500,
            evals=[(dtrain, 'train'), (dval, 'eval')],
            early_stopping_rounds=50,
            verbose_eval=False
        )
        # robust predict
        preds = predict_with_best(bst, dval)
        pred_labels = np.argmax(preds, axis=1)
        acc = accuracy_score(y_val_tab, pred_labels)
        # store best_iteration (useful later)
        trial.set_user_attr("best_iteration", getattr(bst, "best_iteration", None))
        return acc

    except Exception as e:
        # Log a warning and return a very low score so Optuna can continue.
        # Avoid letting the exception bubble and stop the entire study.
        print(f"[Optuna objective] caught exception during training/predict: {e!r}")
        return 0.0

# run the study
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=50, show_progress_bar=True)

print("\nBest parameters:", study.best_params)
print("Best validation accuracy:", study.best_value)
print("Best trial attrs:", study.best_trial.user_attrs)

# Train final model on same train/val with best params
best_params = study.best_params.copy()
best_params.update({
    'objective': 'multi:softprob',
    'num_class': 3,
    'eval_metric': 'mlogloss',
    'tree_method': 'hist',
    'seed': 42,
    'verbosity': 1
})

dtrain = xgb.DMatrix(X_train, label=y_train_tab, weight=sw_train)
dval   = xgb.DMatrix(X_val, label=y_val_tab, weight=sw_val)
final_model = xgb.train(
    best_params,
    dtrain,
    num_boost_round=3000,
    evals=[(dtrain, 'train'), (dval, 'eval')],
    early_stopping_rounds=50,
    verbose_eval=50
)
# -------------------------
# Save & evaluate (fixed, saving to xgb_model.pkl)
# -------------------------
import joblib
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Defensive check
if 'final_model' not in globals():
    raise RuntimeError("final_model not found. Make sure xgb.train(...) completed and produced final_model.")

# 1) Save the trained Booster
# (a) JSON copy — reliable XGBoost-native format
final_model.save_model("xgb_model.json")

# (b) Pickle via joblib so filename is xgb_model.pkl as requested
# joblib will serialize the Booster object; this is convenient for later joblib.load()
joblib.dump(final_model, "xgb_model.pkl")

# 2) How to reload later (examples)
# loaded_bst = joblib.load("xgb_model.pkl")
# OR
# loaded_bst = xgb.Booster()
# loaded_bst.load_model("xgb_model.json")

# 3) Evaluate on test set using the robust predict helper
y_prob = predict_with_best(final_model, dtest)
y_pred = np.argmax(y_prob, axis=1)

print("Final Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred))
print("\nConfusion matrix:\n", confusion_matrix(y_test, y_pred))


[I 2025-08-13 01:06:55,894] A new study created in memory with name: no-name-4eb7b8e5-8ba7-4275-a767-c3a48dee0304


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-08-13 01:06:57,192] Trial 0 finished with value: 0.47697194282689254 and parameters: {'max_depth': 5, 'learning_rate': 0.2536999076681772, 'subsample': 0.892797576724562, 'colsample_bytree': 0.8394633936788146, 'gamma': 0.7800932022121826, 'min_child_weight': 2, 'reg_alpha': 0.05808361216819946, 'reg_lambda': 2.665440364437338}. Best is trial 0 with value: 0.47697194282689254.
[I 2025-08-13 01:07:00,138] Trial 1 finished with value: 0.46823716251985176 and parameters: {'max_depth': 7, 'learning_rate': 0.11114989443094977, 'subsample': 0.608233797718321, 'colsample_bytree': 0.9879639408647978, 'gamma': 4.162213204002109, 'min_child_weight': 3, 'reg_alpha': 0.18182496720710062, 'reg_lambda': 0.9585112746335845}. Best is trial 0 with value: 0.47697194282689254.
[I 2025-08-13 01:07:02,045] Trial 2 finished with value: 0.4745897300158814 and parameters: {'max_depth': 5, 'learning_rate': 0.05958389350068958, 'subsample': 0.7727780074568463, 'colsample_bytree': 0.7164916560792167, 'ga

# O-O-F Predictions

In [63]:
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import OneHotEncoder

# ==== Combine your split data back into one dataset ====
X_tab = np.vstack([X_train, X_val, X_test])
y_tab = np.concatenate([y_train, y_val, y_test])
y_tab = pd.Series(y_tab)  # needed for .map()

# ==== Time series split config ====
n_splits = 3
tscv = TimeSeriesSplit(n_splits=n_splits)

# ==== Label remap (-1, 0, 1) -> (0, 1, 2) ====
label_map = {-1: 0, 0: 1, 1: 2}

# ==== One-hot encoder for DL models ====
enc = OneHotEncoder(sparse_output=False)
y_all_mapped = y_tab.map(label_map)
y_all_oh = enc.fit_transform(y_all_mapped.values.reshape(-1, 1))

# ==== Storage for OOF predictions ====
oof_cnn = np.zeros((len(X_tab), 3))
oof_lstm = np.zeros((len(X_tab), 3))
oof_xgb = np.zeros((len(X_tab), 3))

# ==== Ensure tabular and sequence data match in length ====
min_len = min(len(X_tab), len(X_train_seq), len(y_all_mapped))

X_tab_aligned = X_tab[:min_len]
X_train_seq_aligned = X_train_seq[:min_len]
y_all_mapped_aligned = y_all_mapped.iloc[:min_len]
y_all_oh_aligned = y_all_oh[:min_len]

print(f"Aligned lengths: tab={X_tab_aligned.shape}, seq={X_train_seq_aligned.shape}, y={y_all_mapped_aligned.shape}")


Aligned lengths: tab=(17607, 26), seq=(17607, 24, 26), y=(17607,)


In [67]:
# Option A: create aligned copies (recommended, non-destructive)
import numpy as np

# require oof_idx_xgb exists
if 'oof_idx_xgb' not in globals():
    raise RuntimeError("oof_idx_xgb not found; re-run XGB OOF generation which should set oof_idx_xgb.")

idx = np.asarray(oof_idx_xgb)
# create aligned versions of CNN/LSTM that match oof_xgb's rows
oof_cnn_aligned = np.asarray(oof_cnn)[idx]
oof_lstm_aligned = np.asarray(oof_lstm)[idx]
oof_xgb_aligned = np.asarray(oof_xgb)  # already aligned to idx

print("Aligned shapes:")
print(" oof_cnn_aligned:", oof_cnn_aligned.shape)
print(" oof_lstm_aligned:", oof_lstm_aligned.shape)
print(" oof_xgb_aligned:", oof_xgb_aligned.shape)

# Keep originals for debugging; set globals for convenience
globals()['oof_cnn_aligned'] = oof_cnn_aligned
globals()['oof_lstm_aligned'] = oof_lstm_aligned
globals()['oof_xgb_aligned'] = oof_xgb_aligned

# If later code expects oof_cnn/oof_lstm to be same length as oof_xgb, use these aligned ones.


Aligned shapes:
 oof_cnn_aligned: (17607, 3)
 oof_lstm_aligned: (17607, 3)
 oof_xgb_aligned: (17607, 3)


In [41]:
print("=== Generating CNN OOF predictions ===")
for fold, (train_idx, val_idx) in enumerate(tscv.split(X_tab_aligned), 1):
    print(f"\n--- Fold {fold}/{n_splits} ---")

    # Sequences
    X_seq_tr, X_seq_va = X_train_seq[train_idx], X_train_seq[val_idx]
    y_seq_tr_oh, y_seq_va_oh = y_all_oh[train_idx], y_all_oh[val_idx]

    # Build + compile
    cnn_model = build_cnn_enhanced(input_shape=X_seq_tr.shape[1:], n_classes=3, dropout_rate=0.25)
    cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train
    cnn_model.fit(X_seq_tr, y_seq_tr_oh, validation_data=(X_seq_va, y_seq_va_oh),
                  epochs=5, batch_size=64, verbose=0)

    # Predict
    oof_cnn[val_idx] = cnn_model.predict(X_seq_va, verbose=0)


=== Generating CNN OOF predictions ===

--- Fold 1/3 ---

--- Fold 2/3 ---

--- Fold 3/3 ---


In [42]:
print("=== Generating LSTM OOF predictions ===")
for fold, (train_idx, val_idx) in enumerate(tscv.split(X_tab_aligned), 1):
    print(f"\n--- Fold {fold}/{n_splits} ---")

    # Sequences
    X_seq_tr, X_seq_va = X_train_seq[train_idx], X_train_seq[val_idx]
    y_seq_tr_oh, y_seq_va_oh = y_all_oh[train_idx], y_all_oh[val_idx]

    # Build + compile
    lstm_model = build_lstm_enhanced(input_shape=X_seq_tr.shape[1:], n_classes=3, dropout_rate=0.25)
    lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train
    lstm_model.fit(X_seq_tr, y_seq_tr_oh, validation_data=(X_seq_va, y_seq_va_oh),
                   epochs=5, batch_size=64, verbose=0)

    # Predict
    oof_lstm[val_idx] = lstm_model.predict(X_seq_va, verbose=0)


=== Generating LSTM OOF predictions ===

--- Fold 1/3 ---

--- Fold 2/3 ---

--- Fold 3/3 ---


In [69]:
from xgboost import XGBClassifier

print("=== Generating XGB OOF predictions ===")
for fold, (train_idx, val_idx) in enumerate(tscv.split(X_tab_aligned), 1):
    print(f"\n--- Fold {fold}/{n_splits} ---")

    # Tabular split
    X_tr_tab, X_va_tab = X_tab_aligned[train_idx], X_tab_aligned[val_idx]
    y_tr_tab, y_va_tab = y_all_mapped_aligned.iloc[train_idx], y_all_mapped_aligned.iloc[val_idx]

    # Build with best parameters
    xgb_clf = XGBClassifier(
        objective='multi:softprob',
        num_class=3,
        max_depth=8,
        learning_rate=0.0182014870849285,
        subsample=0.8218430665716767,
        colsample_bytree=0.7628150155615464,
        gamma=0.7186090872199041,
        min_child_weight=8,
        reg_alpha=0.5856294446650139,
        reg_lambda=2.9006322365170183,
        n_estimators=300,
        eval_metric='mlogloss'
    )

    # Train
    xgb_clf.fit(X_tr_tab, y_tr_tab)

    # Predict
    oof_xgb[val_idx] = xgb_clf.predict_proba(X_va_tab)


=== Generating XGB OOF predictions ===

--- Fold 1/3 ---

--- Fold 2/3 ---

--- Fold 3/3 ---


In [80]:
# Option A: create aligned copies (non-destructive)
import numpy as np

# require index mapping produced when generating XGB OOF
if 'oof_idx_xgb' not in globals():
    raise RuntimeError("oof_idx_xgb not found. Re-run the XGB OOF cell which should set oof_idx_xgb.")

idx = np.asarray(oof_idx_xgb)
n_full = oof_cnn.shape[0]
if idx.min() < 0 or idx.max() >= n_full:
    raise RuntimeError(f"oof_idx_xgb values out of range for full length {n_full}")

# create aligned versions
oof_cnn_aligned = np.asarray(oof_cnn)[idx]
oof_lstm_aligned = np.asarray(oof_lstm)[idx]
oof_xgb_aligned = np.asarray(oof_xgb)   # already the right smaller length

print("Aligned shapes:")
print(" oof_cnn_aligned:", oof_cnn_aligned.shape)
print(" oof_lstm_aligned:", oof_lstm_aligned.shape)
print(" oof_xgb_aligned:", oof_xgb_aligned.shape)

# set globals (convenience)
globals()['oof_cnn_aligned'] = oof_cnn_aligned
globals()['oof_lstm_aligned'] = oof_lstm_aligned
globals()['oof_xgb_aligned'] = oof_xgb_aligned


Aligned shapes:
 oof_cnn_aligned: (17607, 3)
 oof_lstm_aligned: (17607, 3)
 oof_xgb_aligned: (17607, 3)


In [81]:

import joblib

# Save OOF predictions and labels
joblib.dump(
    {
        "oof_cnn": oof_cnn,
        "oof_lstm": oof_lstm,
        "oof_xgb": oof_xgb,
        "y": y_all_mapped.values
    },
    "oof_preds.pkl"
)
print("💾 OOF predictions saved to oof_preds.pkl")


💾 OOF predictions saved to oof_preds.pkl


# META LEARNER

In [82]:
# --- Replace np.hstack([oof_cnn, oof_lstm, oof_xgb]) with this ---
import numpy as np

# 1) Ensure the XGB index mapping exists
if 'oof_idx_xgb' not in globals():
    raise RuntimeError("oof_idx_xgb not found. Re-run XGB OOF generation to set oof_idx_xgb.")

idx = np.asarray(oof_idx_xgb)

# 2) Select aligned rows from CNN/LSTM using that mapping
aligned_cnn = np.asarray(oof_cnn)[idx]
aligned_lstm = np.asarray(oof_lstm)[idx]
aligned_xgb = np.asarray(oof_xgb)  # already matches idx

# 3) Sanity checks
assert aligned_cnn.shape[0] == aligned_lstm.shape[0] == aligned_xgb.shape[0], \
       f"Aligned shapes mismatch: cnn {aligned_cnn.shape} lstm {aligned_lstm.shape} xgb {aligned_xgb.shape}"

# 4) Build meta matrices (stacked probabilities)
meta_X_train = np.hstack([aligned_cnn, aligned_lstm, aligned_xgb])  # shape: (N_aligned, 9)
# Use the y that already matches XGB (you indicated y_all_mapped_aligned is length 17607)
meta_y_train = np.asarray(y_all_mapped_aligned)

print("meta_X_train shape:", meta_X_train.shape)
print("meta_y_train shape:", meta_y_train.shape)


meta_X_train shape: (17607, 9)
meta_y_train shape: (17607,)


In [83]:
# Optional: overwrite original oof_* so downstream code that expects the smaller length works
oof_cnn = aligned_cnn
oof_lstm = aligned_lstm
oof_xgb = aligned_xgb
globals().update({'oof_cnn': oof_cnn, 'oof_lstm': oof_lstm, 'oof_xgb': oof_xgb})


In [84]:
# Cell B — meta-features construction
import numpy as np
import pandas as pd

# Stack base model probabilities as features
meta_probs = np.hstack([oof_cnn, oof_lstm, oof_xgb])  # shape (n, 9)

# Add engineered meta-features derived from base model outputs:
# - per-sample mean/std/max/min across base model probs
# - disagreements (max prob - 2nd max prob) for confidence
def meta_stats_from_probs(probs_block):
    # probs_block shape (n, n_models * n_classes)
    n_classes = 3
    n_models = probs_block.shape[1] // n_classes
    block = probs_block.reshape(len(probs_block), n_models, n_classes)
    # per model best class prob and best class
    best_probs = block.max(axis=2)                 # (n, n_models)
    best_classes = block.argmax(axis=2)             # (n, n_models)
    # statistics across models on best_probs
    mean_best = best_probs.mean(axis=1)
    std_best = best_probs.std(axis=1)
    max_best = best_probs.max(axis=1)
    min_best = best_probs.min(axis=1)
    # disagreement: how many models agree with majority class
    from scipy.stats import mode
    mode_vals, mode_counts = mode(best_classes, axis=1)
    agree_fraction = (mode_counts.ravel() / n_models)
    # confidence gap: top prob - second top prob per model averaged
    def gap_per_row(row_block):
        # row_block shape (n_models, n_classes)
        gaps = []
        for m in range(row_block.shape[0]):
            arr = np.sort(row_block[m])[::-1]
            gaps.append(arr[0] - (arr[1] if arr.shape[0] > 1 else 0.0))
        return np.mean(gaps)
    gap = np.array([gap_per_row(row) for row in block])

    stats = np.vstack([mean_best, std_best, max_best, min_best, agree_fraction, gap]).T
    stats_cols = ["mean_best_prob", "std_best_prob", "max_best_prob", "min_best_prob", "agree_frac", "avg_top_gap"]
    return stats, stats_cols

stats, stats_cols = meta_stats_from_probs(meta_probs)

# Optionally add a few simple original features if available (e.g. last-hour vol, last return).
# If you saved a features/labels DataFrame earlier, load and align here:
# features_df = pd.read_parquet("features_aligned.parquet")  # or load whatever you have
# extra_feats = features_df.loc[:n-1, ["vol_24h", "ret_1h"]].to_numpy()

# For now, we'll build final meta_X from probs + stats
meta_X = np.hstack([meta_probs, stats])
meta_feature_names = (
    [f"cnn_p{c}" for c in range(3)] +
    [f"lstm_p{c}" for c in range(3)] +
    [f"xgb_p{c}" for c in range(3)] +
    stats_cols
)

print("Meta X shape:", meta_X.shape)
print("Feature names:", meta_feature_names)


Meta X shape: (17607, 15)
Feature names: ['cnn_p0', 'cnn_p1', 'cnn_p2', 'lstm_p0', 'lstm_p1', 'lstm_p2', 'xgb_p0', 'xgb_p1', 'xgb_p2', 'mean_best_prob', 'std_best_prob', 'max_best_prob', 'min_best_prob', 'agree_frac', 'avg_top_gap']


In [86]:
# ---------- Train final meta-learner on aligned meta features (fix mismatch) ----------
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.utils.class_weight import compute_sample_weight
import joblib
import numpy as np

# Use aligned meta arrays produced earlier
if 'meta_X_train' in globals() and 'meta_y_train' in globals():
    X_meta = np.asarray(meta_X_train)
    y_meta = np.asarray(meta_y_train)
else:
    raise RuntimeError("Aligned meta arrays not found. Make sure meta_X_train and meta_y_train exist.")

print("Shapes before fit: X_meta:", X_meta.shape, "y_meta:", y_meta.shape)

# sanity
if X_meta.shape[0] != y_meta.shape[0]:
    raise ValueError(f"Shape mismatch: X_meta rows {X_meta.shape[0]} != y_meta {y_meta.shape[0]}")

# compute balanced sample weights for the meta set
sample_weight_meta = compute_sample_weight(class_weight="balanced", y=y_meta)

# build final model with the best hyperparameters found earlier (best_cfg)
# make sure best_cfg exists; if not, you can hardcode the best config you printed
try:
    best_cfg  # noqa: F821
except NameError:
    # fallback: use the best config you printed: {'max_iter':200,'learning_rate':0.1,'max_depth':4}
    best_cfg = {'max_iter': 200, 'learning_rate': 0.1, 'max_depth': 4}

final_model = HistGradientBoostingClassifier(**best_cfg, early_stopping=True, random_state=42)
final_model.fit(X_meta, y_meta, sample_weight=sample_weight_meta)

# Save the trained meta model
joblib.dump(final_model, "meta_learner.pkl")
print("Final meta_learner model trained and saved to meta_laerner.pkl")

# Quick train-set diagnostics
from sklearn.metrics import classification_report, balanced_accuracy_score
pred_train = final_model.predict(X_meta)
print("Train balanced accuracy:", balanced_accuracy_score(y_meta, pred_train))
print("Train classification report:\n", classification_report(y_meta, pred_train, digits=4))


Shapes before fit: X_meta: (17607, 9) y_meta: (17607,)
Final meta model trained and saved to meta_final_model.pkl
Train balanced accuracy: 0.714867776296772
Train classification report:
               precision    recall  f1-score   support

           0     0.4572    0.9190    0.6106      3915
           1     0.8858    0.5571    0.6840      9255
           2     0.7574    0.6685    0.7102      4437

    accuracy                         0.6656     17607
   macro avg     0.7001    0.7149    0.6683     17607
weighted avg     0.7581    0.6656    0.6743     17607



In [88]:
# Robust: Load meta model (or ensemble object) and predict probabilities / classes
import joblib
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, classification_report
from typing import Optional

def load_and_predict(
    model_path: str,
    meta_X: np.ndarray,
    y: Optional[np.ndarray] = None,
    threshold: Optional[np.ndarray] = None,
    label_map_back: Optional[dict] = None,  # e.g. {0:-1.0, 1:0.0, 2:1.0}
    verbose: bool = True
):
    """
    Loads a saved meta model (or a dict with 'meta_model' and optional 'calibrators'),
    predicts probabilities and classes on meta_X, applies optional thresholding,
    and returns (probs, preds, preds_mapped_if_label_map).
    """
    assert isinstance(meta_X, np.ndarray), "meta_X must be a numpy array"
    # load artifact
    obj = joblib.load(model_path)
    # object can be:
    # - a fitted estimator with predict_proba
    # - a dict: {'meta_model': estimator, 'calibrators': [cal1, cal2, ...]} or similar
    if isinstance(obj, dict):
        if verbose:
            print("Loaded dict artifact with keys:", list(obj.keys()))
        meta_model = obj.get("meta_model") or obj.get("model") or obj.get("estimator")
        calibrators = obj.get("calibrators", None)
    else:
        if verbose:
            print("Loaded estimator directly from", model_path)
        meta_model = obj
        calibrators = None

    if meta_model is None:
        raise RuntimeError("No 'meta_model' found in artifact and artifact is not a direct estimator.")

    # Get probabilities
    if calibrators:
        if verbose:
            print("Using calibrators to produce averaged probabilities (len calibrators):", len(calibrators))
        probs_list = []
        for cal in calibrators:
            # prefer calibrator.predict_proba if it's a calibrator, otherwise fall back
            if hasattr(cal, "predict_proba"):
                probs_list.append(cal.predict_proba(meta_X))
            elif hasattr(cal, "predict"):
                # if calibrator returns labels, convert to one-hot-ish fallback (not ideal)
                preds = cal.predict(meta_X)
                onehot = np.zeros((len(preds), np.max(preds)+1))
                onehot[np.arange(len(preds)), preds] = 1.0
                probs_list.append(onehot)
            else:
                raise RuntimeError("Calibrator does not support predict_proba or predict.")
        probs = np.mean(probs_list, axis=0)
    else:
        if not hasattr(meta_model, "predict_proba"):
            raise RuntimeError("Loaded meta_model has no predict_proba method.")
        probs = meta_model.predict_proba(meta_X)

    # Optional thresholding (vector of length n_classes or None)
    if threshold is not None:
        threshold = np.asarray(threshold)
        if threshold.shape[0] != probs.shape[1]:
            raise ValueError("threshold must have length equal to number of classes")
        # Score = prob - threshold, then pick argmax (falls back to argmax of prob if all below)
        scores = probs - threshold.reshape((1, -1))
        preds = np.argmax(scores, axis=1)
    else:
        preds = np.argmax(probs, axis=1)

    # Map back to original labels if requested
    preds_mapped = None
    if label_map_back is not None:
        # label_map_back maps internal ints -> original labels
        preds_mapped = np.array([label_map_back[int(p)] for p in preds])

    # Diagnostics if ground-truth y provided
    if y is not None:
        if len(y) != len(preds):
            raise ValueError(f"Length mismatch: y ({len(y)}) vs preds ({len(preds)})")
        acc = accuracy_score(y, preds)
        f1m = f1_score(y, preds, average="macro", zero_division=0)
        if verbose:
            print(f"Accuracy: {acc:.4f}   F1_macro: {f1m:.4f}")
            print(classification_report(y, preds, digits=4))
    else:
        if verbose:
            print("No ground-truth y provided; returning predictions only.")

    return probs, preds, preds_mapped

# -------------------------
# Example usage (replace meta_X and y with your data)
# -------------------------
# meta_X must be numpy array shaped like training-time meta features (e.g. meta_X_train shape)
# y is optional
# label_map_back = {0:-1.0, 1:0.0, 2:1.0}  # if you want original labels back
probs, preds, preds_mapped = load_and_predict("meta_model.pkl", meta_X=meta_X, y=None, threshold=None, label_map_back=None, verbose=True)

# Inspect outputs
print("probs.shape:", probs.shape)
print("preds.shape:", preds.shape)
if preds_mapped is not None:
    print("preds_mapped[:10]:", preds_mapped[:10])


Loaded dict artifact with keys: ['meta_model', 'calibrators', 'best_cfg', 'meta_feature_names']
Using calibrators to produce averaged probabilities (len calibrators): 5
No ground-truth y provided; returning predictions only.
probs.shape: (17607, 3)
preds.shape: (17607,)


# save

In [89]:
import joblib

joblib.dump(final_model, "meta_learner.pkl")


['meta_learner.pkl']

In [90]:
from google.colab import files
files.download("meta_learner.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [91]:
import joblib
from google.colab import files

# Save the three base models
joblib.dump(cnn_model, "cnn_model.pkl")
joblib.dump(lstm_model, "lstm_model.pkl")
joblib.dump(xgb_clf, "xgb_model.pkl")  # change to your actual XGB var name

# Download them
files.download("cnn_model.pkl")
files.download("lstm_model.pkl")
files.download("xgb_model.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [92]:
import joblib
joblib.dump(scaler, "scaler.pkl")
files.download("scaler.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>