<a href="https://colab.research.google.com/github/yshnxd/solaris/blob/main/solaris_reborn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup Libraries

In [1]:
# === STEP 0: Setup Libraries ===
# Core
import numpy as np
import pandas as pd
import gc
import os
import warnings
warnings.filterwarnings("ignore")

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Technical indicators & TA-Lib alternative
!pip install ta --quiet
import ta

# Machine Learning
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, mean_absolute_error, mean_squared_error
)

# XGBoost
!pip install xgboost --quiet
from xgboost import XGBClassifier, XGBRegressor

# Deep Learning (TensorFlow/Keras)
!pip install tensorflow --quiet
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    Dense, Dropout, Flatten, Conv1D, MaxPooling1D,
    LSTM, Input, BatchNormalization, GlobalAveragePooling1D
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Utilities for reproducibility
import random
import tensorflow as tf

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

print("✅ Libraries loaded successfully.")


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ta (setup.py) ... [?25l[?25hdone
✅ Libraries loaded successfully.


# Collect Data

In [2]:
# === STEP 1: Data Collection (Hourly) ===
!pip install yfinance --quiet
import yfinance as yf
from datetime import datetime
import pandas as pd
import os

# Target + market context tickers
tickers = ["AAPL", "SPY", "TSLA", "NVDA", "QQQ"]  # note: ^VIX for Yahoo
interval = "60m"  # 1-hour bars
period = "729d"   # max allowed for hourly

data_dict = {}
print("Downloading hourly data...")
for t in tickers:
    try:
        df = yf.download(t, interval=interval, period=period, progress=False)
        df.dropna(inplace=True)
        df.index = df.index.tz_localize(None)
        data_dict[t] = df
        print(f"{t}: {df.shape[0]} rows from {df.index.min()} to {df.index.max()}")
    except Exception as e:
        print(f"❌ Failed to get {t}: {e}")
# ✅ Replace old close_df creation with this
target_index = data_dict["AAPL"].index
aligned_close = pd.DataFrame(index=target_index)

for t, df in data_dict.items():
    aligned_close[t] = df.reindex(target_index)['Close']

print("\nSample aligned close prices:")
print(aligned_close.tail())

# Save raw hourly data
os.makedirs("data_raw", exist_ok=True)
for t, df in data_dict.items():
    df.to_csv(f"data_raw/{t}_60m.csv")
print("\n✅ Hourly data downloaded and saved to 'data_raw/'")


Downloading hourly data...
AAPL: 5075 rows from 2022-09-13 13:30:00 to 2025-08-08 19:30:00
SPY: 5075 rows from 2022-09-13 13:30:00 to 2025-08-08 19:30:00
TSLA: 5075 rows from 2022-09-13 13:30:00 to 2025-08-08 19:30:00
NVDA: 5075 rows from 2022-09-13 13:30:00 to 2025-08-08 19:30:00
QQQ: 5075 rows from 2022-09-13 13:30:00 to 2025-08-08 19:30:00

Sample aligned close prices:
                           AAPL         SPY        TSLA        NVDA  \
Datetime                                                              
2025-08-08 15:30:00  228.199997  636.875000  330.319214  182.908096   
2025-08-08 16:30:00  228.675003  636.395020  328.660004  181.884995   
2025-08-08 17:30:00  229.289993  637.219971  329.510498  182.615005   
2025-08-08 18:30:00  228.830002  637.260010  328.630005  182.695007   
2025-08-08 19:30:00  229.369995  637.119995  329.679993  182.750000   

                            QQQ  
Datetime                         
2025-08-08 15:30:00  574.000000  
2025-08-08 16:30:00  573.

#Feature Creation

Creating Features

In [3]:
all_feat_data = []

# Forward-fill aligned_close once globally
aligned_ffill = aligned_close.ffill()

for ticker in aligned_ffill.columns:
    if aligned_ffill[ticker].isna().all():
        continue

    price_series = aligned_ffill[ticker]
    feat_tmp = pd.DataFrame(index=price_series.index)

    # Lag returns
    for lag in [1, 3, 6, 12, 24]:
        feat_tmp[f"ret_{lag}h"] = price_series.pct_change(lag)

    # Rolling volatility
    for window in [6, 12, 24]:
        feat_tmp[f"vol_{window}h"] = price_series.pct_change().rolling(window).std()

    # Technical indicators
    feat_tmp["rsi_14"] = ta.momentum.RSIIndicator(price_series, window=14).rsi()
    macd = ta.trend.MACD(price_series)
    feat_tmp["macd"] = macd.macd()
    feat_tmp["macd_signal"] = macd.macd_signal()

    # Moving averages
    for w in [5, 10, 20]:
        feat_tmp[f"sma_{w}"] = price_series.rolling(w).mean()
        feat_tmp[f"ema_{w}"] = price_series.ewm(span=w, adjust=False).mean()

    # Volume features
    if ticker in data_dict and "Volume" in data_dict[ticker].columns:
        vol_series = data_dict[ticker].reindex(price_series.index)["Volume"].ffill()
        feat_tmp["vol_change_1h"] = vol_series.pct_change()
        feat_tmp["vol_ma_24h"] = vol_series.rolling(24).mean()

    # Cross-asset returns — from the globally ffilled dataframe
    for asset in ["SPY", "QQQ", "NVDA"]:
        if asset in aligned_ffill.columns:
            feat_tmp[f"{asset}_ret_1h"] = aligned_ffill[asset].pct_change()

    if "^VIX" in aligned_ffill.columns:
        feat_tmp["vix_ret_1h"] = aligned_ffill["^VIX"].pct_change()

    # Calendar features
    feat_tmp["hour"] = feat_tmp.index.hour
    feat_tmp["day_of_week"] = feat_tmp.index.dayofweek

    # Only drop rows with NaNs in features for THIS ticker
    feat_tmp = feat_tmp.dropna(subset=[col for col in feat_tmp.columns if col not in ["datetime", "ticker"]])

    feat_tmp["datetime"] = feat_tmp.index
    feat_tmp["ticker"] = ticker

    all_feat_data.append(feat_tmp.reset_index(drop=True))

features_df = pd.concat(all_feat_data, ignore_index=True)

print(f"✅ Created features for {features_df['ticker'].nunique()} tickers")
print("Shape:", features_df.shape)
print(features_df.head())


✅ Created features for 5 tickers
Shape: (25210, 26)
     ret_1h    ret_3h    ret_6h   ret_12h   ret_24h    vol_6h   vol_12h  \
0  0.007971  0.012013  0.024587  0.031305 -0.007604  0.003690  0.003882   
1  0.002602  0.008187  0.021955  0.036208 -0.003386  0.003683  0.003588   
2  0.001715  0.012327  0.018588  0.040581  0.003208  0.003681  0.003188   
3  0.011595  0.015967  0.028172  0.049035  0.016817  0.004992  0.003984   
4  0.004310  0.017698  0.026030  0.047453  0.012652  0.004917  0.003945   

    vol_24h     rsi_14      macd  ...      ema_20  vol_change_1h  \
0  0.006928  57.174728 -0.489358  ...  152.371399       0.354865   
1  0.006946  58.880818 -0.266801  ...  152.577932      -0.018507   
2  0.006880  60.014242 -0.068253  ...  152.790033       1.223954   
3  0.007246  66.708324  0.231274  ...  153.152888      -0.300933   
4  0.007102  68.822038  0.517156  ...  153.545469       0.135969   

     vol_ma_24h  SPY_ret_1h  QQQ_ret_1h  NVDA_ret_1h  hour  day_of_week  \
0  1.176824e+

Label Creation

In [4]:
# === LABEL CREATION FOR ALL TICKERS (pooled dataset) ===

horizon = 1               # predict 1 hour ahead
vol_lookback = 24         # hours to compute rolling volatility
vol_multiplier = 0.5      # threshold scaling vs volatility

all_data = []

for ticker in aligned_close.columns:
    # Skip if ticker is all NaN (e.g., ^VIX alignment issues)
    if aligned_close[ticker].dropna().empty:
        continue

    price_series = aligned_close[ticker]

    # Forward return
    future_price = price_series.shift(-horizon)
    future_ret = (future_price - price_series) / price_series

    # Volatility-based threshold
    rolling_vol = price_series.pct_change().rolling(vol_lookback).std()
    threshold = rolling_vol * vol_multiplier

    # Label creation
    label = future_ret.copy()
    label[future_ret > threshold] = 1    # Up
    label[future_ret < -threshold] = -1  # Down
    label[(future_ret <= threshold) & (future_ret >= -threshold)] = 0  # Neutral

    # Drop NaNs
    label = label.dropna()

    # Combine into dataframe
    df_tmp = pd.DataFrame({
        "datetime": label.index,
        "ticker": ticker,
        "price": price_series.loc[label.index],
        "label": label.values,
        "future_ret": future_ret.loc[label.index],
        "volatility": rolling_vol.loc[label.index]
    })

    all_data.append(df_tmp)

# Combine all tickers
labels_df = pd.concat(all_data, ignore_index=True)

print("Combined dataset shape:", labels_df.shape)
print(labels_df["label"].value_counts(normalize=True))
labels_df.head(10)


Combined dataset shape: (25370, 6)
label
 0.000000    0.532322
 1.000000    0.245211
-1.000000    0.217737
-0.009297    0.000039
-0.006482    0.000039
               ...   
 0.004660    0.000039
-0.013126    0.000039
-0.002298    0.000039
-0.002268    0.000039
 0.002448    0.000039
Name: proportion, Length: 123, dtype: float64


Unnamed: 0,datetime,ticker,price,label,future_ret,volatility
0,2022-09-13 13:30:00,AAPL,157.820007,-0.008618,-0.008618,
1,2022-09-13 14:30:00,AAPL,156.4599,-0.001324,-0.001324,
2,2022-09-13 15:30:00,AAPL,156.252701,0.001647,0.001647,
3,2022-09-13 16:30:00,AAPL,156.509995,-0.009297,-0.009297,
4,2022-09-13 17:30:00,AAPL,155.054993,-0.006482,-0.006482,
5,2022-09-13 18:30:00,AAPL,154.050003,-0.001298,-0.001298,
6,2022-09-13 19:30:00,AAPL,153.850006,0.00741,0.00741,
7,2022-09-14 13:30:00,AAPL,154.990005,0.006065,0.006065,
8,2022-09-14 14:30:00,AAPL,155.929993,-0.003912,-0.003912,
9,2022-09-14 15:30:00,AAPL,155.320007,-0.001642,-0.001642,


Scaling

# Preprocessing

Normalize Features

In [5]:
# Merge features with labels
df = features_df.merge(labels_df, on=["datetime", "ticker"], how="inner")

# Drop NaNs (just in case)
df = df.dropna()

# Separate features & labels
X = df.drop(columns=["datetime", "ticker", "label", "future_ret"])
y = df["label"]

print("X shape:", X.shape)
print("y distribution:\n", y.value_counts(normalize=True))


X shape: (25205, 26)
y distribution:
 label
 0.0    0.534854
 1.0    0.246023
-1.0    0.219123
Name: proportion, dtype: float64


Scale

In [6]:
from sklearn.preprocessing import StandardScaler
import numpy as np

# Merge features and labels
df = features_df.merge(labels_df, on=["datetime", "ticker"], how="inner")

# Sort by time
df = df.sort_values(["datetime", "ticker"]).reset_index(drop=True)

# Replace inf values with NaN
df = df.replace([np.inf, -np.inf], np.nan)

# Drop rows with NaNs
df = df.dropna()

# Separate features & target
X = df.drop(columns=["datetime", "ticker", "label", "future_ret"])
y = df["label"]

# Time-based split
train_size = int(len(df) * 0.7)
val_size = int(len(df) * 0.15)

X_train = X.iloc[:train_size]
y_train = y.iloc[:train_size]

X_val = X.iloc[train_size:train_size + val_size]
y_val = y.iloc[train_size:train_size + val_size]

X_test = X.iloc[train_size + val_size:]
y_test = y.iloc[train_size + val_size:]

# Ensure all values are finite before scaling
assert np.isfinite(X_train.values).all(), "Found non-finite values in X_train!"

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

print(f"✅ Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")
print("Label distribution in Train:", y_train.value_counts(normalize=True))


✅ Train: (17631, 26), Val: (3778, 26), Test: (3779, 26)
Label distribution in Train: label
 0.0    0.525268
 1.0    0.252113
-1.0    0.222619
Name: proportion, dtype: float64


Sequence making - For LSTM AND CNN

In [7]:
import numpy as np

def create_sequences(X, y, seq_len=24):
    """
    Convert tabular (samples, features) into sequential (samples, seq_len, features)
    for CNN/LSTM, keeping labels aligned to the last timestep.
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i+seq_len])
        y_seq.append(y[i+seq_len])  # label at next hour
    return np.array(X_seq), np.array(y_seq)

# === Choose sequence length ===
SEQ_LEN = 24  # last 24 hours to predict next hour

# Reshape train/val/test sets
X_train_seq, y_train_seq = create_sequences(X_train, y_train.values, SEQ_LEN)
X_val_seq,   y_val_seq   = create_sequences(X_val,   y_val.values,   SEQ_LEN)
X_test_seq,  y_test_seq  = create_sequences(X_test,  y_test.values,  SEQ_LEN)

print(f"Train seq: {X_train_seq.shape}, Val seq: {X_val_seq.shape}, Test seq: {X_test_seq.shape}")


Train seq: (17607, 24, 26), Val seq: (3754, 24, 26), Test seq: (3755, 24, 26)


In [8]:
# CELL 1 — label encoding + class weights
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.utils import to_categorical

# mapping: -1 -> 0 (down), 0 -> 1 (neutral), 1 -> 2 (up)
label_map = { -1.0: 0, 0.0: 1, 1.0: 2 }

# If your y_* are numpy arrays (seq labels), convert
y_train_seq_mapped = np.vectorize(label_map.get)(y_train_seq)
y_val_seq_mapped   = np.vectorize(label_map.get)(y_val_seq)
y_test_seq_mapped  = np.vectorize(label_map.get)(y_test_seq)

# one-hot for Keras
y_train_cat = to_categorical(y_train_seq_mapped, num_classes=3)
y_val_cat   = to_categorical(y_val_seq_mapped, num_classes=3)
y_test_cat  = to_categorical(y_test_seq_mapped, num_classes=3)

# compute class weights from training sequence labels
classes = np.unique(y_train_seq_mapped)
class_weights = compute_class_weight("balanced", classes=classes, y=y_train_seq_mapped)
class_weights_dict = {int(c): w for c, w in zip(classes, class_weights)}
print("Class weights:", class_weights_dict)
print("Train class distribution:", np.bincount(y_train_seq_mapped) / len(y_train_seq_mapped))


Class weights: {0: np.float64(1.4983405667602758), 1: np.float64(0.6344864864864865), 2: np.float64(1.321846846846847)}
Train class distribution: [0.22246834 0.52535923 0.25217243]


# Build the Model

CNN

In [17]:
# === CNN Architecture ===
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, Activation, Dropout, GlobalAveragePooling1D, Dense

def build_cnn_best(input_shape, n_classes=3, dropout_rate=0.25):
    inp = Input(shape=input_shape)
    x = Conv1D(64, kernel_size=3, padding='same')(inp)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv1D(128, kernel_size=5, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.2)(x)

    x = Conv1D(256, kernel_size=3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = GlobalAveragePooling1D()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    out = Dense(n_classes, activation='softmax')(x)

    model = Model(inp, out)
    return model

cnn_model = build_cnn_best(input_shape=X_train_seq.shape[1:], n_classes=3, dropout_rate=0.25)


LSTM

In [19]:
# === LSTM Architecture (Fixed) ===
from tensorflow.keras.layers import Bidirectional, LSTM, LayerNormalization, GlobalAveragePooling1D, GlobalMaxPooling1D, Concatenate

def build_lstm_best(input_shape, n_classes=3, dropout_rate=0.25):
    inp = Input(shape=input_shape)
    x = LayerNormalization()(inp)

    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.2)(x)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)
    x = Dropout(0.2)(x)

    # Use Keras pooling layers instead of raw TF ops
    x_avg = GlobalAveragePooling1D()(x)
    x_max = GlobalMaxPooling1D()(x)
    x = Concatenate()([x_avg, x_max])

    x = Dense(128, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    out = Dense(n_classes, activation='softmax')(x)

    model = Model(inp, out)
    return model

# Instantiate
lstm_model = build_lstm_best(input_shape=X_train_seq.shape[1:], n_classes=3, dropout_rate=0.25)


XGBOOST

In [20]:
# === XGBoost Setup ===
import xgboost as xgb
from sklearn.utils.class_weight import compute_sample_weight
import numpy as np

label_map = { -1.0: 0, 0.0: 1, 1.0: 2 }
y_train_tab = np.vectorize(label_map.get)(y_train)
y_val_tab   = np.vectorize(label_map.get)(y_val)
y_test_tab  = np.vectorize(label_map.get)(y_test)

sample_weight = compute_sample_weight('balanced', y_train_tab)

xgb_clf = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=3,
    n_estimators=400,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1,
    tree_method='auto'
)


# Train the model

CNN

In [21]:
# === CNN Training ===
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

cnn_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

es = EarlyStopping(monitor='val_loss', patience=12, restore_best_weights=True, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=6, min_lr=1e-6, verbose=1)
chk = ModelCheckpoint('best_cnn.h5', monitor='val_loss', save_best_only=True)

cnn_history = cnn_model.fit(
    X_train_seq, y_train_cat,
    validation_data=(X_val_seq, y_val_cat),
    epochs=80,
    batch_size=128,
    class_weight=class_weights_dict,
    callbacks=[es, rlr, chk],
    verbose=2
)


Epoch 1/80




138/138 - 30s - 215ms/step - accuracy: 0.4032 - loss: 1.0999 - val_accuracy: 0.4451 - val_loss: 1.0644 - learning_rate: 1.0000e-03
Epoch 2/80
138/138 - 20s - 144ms/step - accuracy: 0.4490 - loss: 1.0722 - val_accuracy: 0.4294 - val_loss: 1.0801 - learning_rate: 1.0000e-03
Epoch 3/80
138/138 - 13s - 91ms/step - accuracy: 0.4673 - loss: 1.0623 - val_accuracy: 0.4065 - val_loss: 1.0935 - learning_rate: 1.0000e-03
Epoch 4/80




138/138 - 20s - 148ms/step - accuracy: 0.4786 - loss: 1.0524 - val_accuracy: 0.4704 - val_loss: 1.0453 - learning_rate: 1.0000e-03
Epoch 5/80
138/138 - 21s - 151ms/step - accuracy: 0.4840 - loss: 1.0421 - val_accuracy: 0.4100 - val_loss: 1.0997 - learning_rate: 1.0000e-03
Epoch 6/80
138/138 - 21s - 152ms/step - accuracy: 0.4943 - loss: 1.0324 - val_accuracy: 0.4529 - val_loss: 1.0653 - learning_rate: 1.0000e-03
Epoch 7/80




138/138 - 21s - 149ms/step - accuracy: 0.5011 - loss: 1.0222 - val_accuracy: 0.4939 - val_loss: 1.0344 - learning_rate: 1.0000e-03
Epoch 8/80
138/138 - 20s - 142ms/step - accuracy: 0.5086 - loss: 1.0091 - val_accuracy: 0.4686 - val_loss: 1.0489 - learning_rate: 1.0000e-03
Epoch 9/80
138/138 - 14s - 101ms/step - accuracy: 0.5095 - loss: 0.9977 - val_accuracy: 0.4579 - val_loss: 1.0740 - learning_rate: 1.0000e-03
Epoch 10/80
138/138 - 13s - 94ms/step - accuracy: 0.5197 - loss: 0.9877 - val_accuracy: 0.4763 - val_loss: 1.0525 - learning_rate: 1.0000e-03
Epoch 11/80
138/138 - 13s - 91ms/step - accuracy: 0.5296 - loss: 0.9729 - val_accuracy: 0.4912 - val_loss: 1.0404 - learning_rate: 1.0000e-03
Epoch 12/80
138/138 - 20s - 148ms/step - accuracy: 0.5327 - loss: 0.9640 - val_accuracy: 0.4672 - val_loss: 1.0704 - learning_rate: 1.0000e-03
Epoch 13/80

Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
138/138 - 20s - 148ms/step - accuracy: 0.5380 - loss: 0.9501 - val_a

LSTM

In [22]:
# === LSTM Training ===
lstm_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

es = EarlyStopping(monitor='val_loss', patience=12, restore_best_weights=True, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=6, min_lr=1e-6, verbose=1)
chk = ModelCheckpoint('best_lstm.h5', monitor='val_loss', save_best_only=True)

lstm_history = lstm_model.fit(
    X_train_seq, y_train_cat,
    validation_data=(X_val_seq, y_val_cat),
    epochs=100,
    batch_size=128,
    class_weight=class_weights_dict,
    callbacks=[es, rlr, chk],
    verbose=2
)


Epoch 1/100




138/138 - 49s - 353ms/step - accuracy: 0.4139 - loss: 1.0866 - val_accuracy: 0.4036 - val_loss: 1.0861 - learning_rate: 5.0000e-04
Epoch 2/100




138/138 - 78s - 565ms/step - accuracy: 0.4479 - loss: 1.0718 - val_accuracy: 0.4153 - val_loss: 1.0792 - learning_rate: 5.0000e-04
Epoch 3/100




138/138 - 41s - 296ms/step - accuracy: 0.4640 - loss: 1.0571 - val_accuracy: 0.4315 - val_loss: 1.0715 - learning_rate: 5.0000e-04
Epoch 4/100
138/138 - 42s - 303ms/step - accuracy: 0.4782 - loss: 1.0444 - val_accuracy: 0.4113 - val_loss: 1.0827 - learning_rate: 5.0000e-04
Epoch 5/100
138/138 - 39s - 283ms/step - accuracy: 0.4900 - loss: 1.0244 - val_accuracy: 0.4046 - val_loss: 1.0754 - learning_rate: 5.0000e-04
Epoch 6/100
138/138 - 42s - 303ms/step - accuracy: 0.5060 - loss: 1.0013 - val_accuracy: 0.4124 - val_loss: 1.0779 - learning_rate: 5.0000e-04
Epoch 7/100




138/138 - 41s - 300ms/step - accuracy: 0.5206 - loss: 0.9738 - val_accuracy: 0.4257 - val_loss: 1.0674 - learning_rate: 5.0000e-04
Epoch 8/100
138/138 - 43s - 314ms/step - accuracy: 0.5404 - loss: 0.9385 - val_accuracy: 0.4078 - val_loss: 1.0895 - learning_rate: 5.0000e-04
Epoch 9/100
138/138 - 39s - 282ms/step - accuracy: 0.5584 - loss: 0.9136 - val_accuracy: 0.4366 - val_loss: 1.0715 - learning_rate: 5.0000e-04
Epoch 10/100
138/138 - 38s - 272ms/step - accuracy: 0.5739 - loss: 0.8760 - val_accuracy: 0.4321 - val_loss: 1.0784 - learning_rate: 5.0000e-04
Epoch 11/100
138/138 - 37s - 270ms/step - accuracy: 0.5884 - loss: 0.8465 - val_accuracy: 0.4267 - val_loss: 1.1009 - learning_rate: 5.0000e-04
Epoch 12/100
138/138 - 43s - 308ms/step - accuracy: 0.6058 - loss: 0.8149 - val_accuracy: 0.4329 - val_loss: 1.0858 - learning_rate: 5.0000e-04
Epoch 13/100

Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
138/138 - 39s - 284ms/step - accuracy: 0.6135 - loss: 0.7887

XGBOOST

In [27]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

# Create DMatrix objects
dtrain = xgb.DMatrix(X_train, label=y_train_tab)
dval = xgb.DMatrix(X_val, label=y_val_tab)

# Set parameters
params = {
    'objective': 'multi:softmax',
    'num_class': 3,
    'learning_rate': 0.05,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'eval_metric': 'mlogloss',
    'seed': 42,
    'tree_method': 'hist'
}

# Train with early stopping
evals = [(dtrain, 'train'), (dval, 'eval')]
model = xgb.train(
    params,
    dtrain,
    num_boost_round=1000,
    evals=evals,
    early_stopping_rounds=50,
    verbose_eval=50
)

# Predict
dtest = xgb.DMatrix(X_test)
y_pred = model.predict(dtest)

print("Test Accuracy:", accuracy_score(y_test_tab, y_pred))


[0]	train-mlogloss:1.08732	eval-mlogloss:1.08898
[50]	train-mlogloss:0.89629	eval-mlogloss:0.96849
[100]	train-mlogloss:0.83466	eval-mlogloss:0.96683
[132]	train-mlogloss:0.80427	eval-mlogloss:0.96905
Test Accuracy: 0.5824292140777984


# O-O-F Predictions

In [51]:
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Time series split config
n_splits = 3
tscv = TimeSeriesSplit(n_splits=n_splits)

# Label remap (-1, 0, 1) -> (0, 1, 2)
label_map = {-1: 0, 0: 1, 1: 2}

# One-hot encoder for DL models
enc = OneHotEncoder(sparse_output=False)
y_all_mapped = y_tab.map(label_map)
y_all_oh = enc.fit_transform(y_all_mapped.values.reshape(-1, 1))

# Storage for OOF predictions
oof_cnn = np.zeros((len(X_tab), 3))
oof_lstm = np.zeros((len(X_tab), 3))
oof_xgb = np.zeros((len(X_tab), 3))

# Ensure tabular and sequence data match in length
min_len = min(len(X_tab), len(X_train_seq), len(y_all_mapped))

X_tab_aligned = X_tab[:min_len]
X_train_seq_aligned = X_train_seq[:min_len]
y_all_mapped_aligned = y_all_mapped.iloc[:min_len]
y_all_oh_aligned = y_all_oh[:min_len]

print(f"Aligned lengths: tab={X_tab_aligned.shape}, seq={X_train_seq_aligned.shape}, y={y_all_mapped_aligned.shape}")


Aligned lengths: tab=(17607, 26), seq=(17607, 24, 26), y=(17607,)


In [52]:
print("=== Generating CNN OOF predictions ===")
for fold, (train_idx, val_idx) in enumerate(tscv.split(X_tab_aligned), 1):
    print(f"\n--- Fold {fold}/{n_splits} ---")

    # Sequences
    X_seq_tr, X_seq_va = X_train_seq[train_idx], X_train_seq[val_idx]
    y_seq_tr_oh, y_seq_va_oh = y_all_oh[train_idx], y_all_oh[val_idx]

    # Build + compile
    cnn_model = build_cnn_best(input_shape=X_seq_tr.shape[1:], n_classes=3, dropout_rate=0.25)
    cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train
    cnn_model.fit(X_seq_tr, y_seq_tr_oh, validation_data=(X_seq_va, y_seq_va_oh),
                  epochs=5, batch_size=64, verbose=0)

    # Predict
    oof_cnn[val_idx] = cnn_model.predict(X_seq_va, verbose=0)


=== Generating CNN OOF predictions ===

--- Fold 1/3 ---

--- Fold 2/3 ---

--- Fold 3/3 ---


In [53]:
print("=== Generating LSTM OOF predictions ===")
for fold, (train_idx, val_idx) in enumerate(tscv.split(X_tab_aligned), 1):
    print(f"\n--- Fold {fold}/{n_splits} ---")

    # Sequences
    X_seq_tr, X_seq_va = X_train_seq[train_idx], X_train_seq[val_idx]
    y_seq_tr_oh, y_seq_va_oh = y_all_oh[train_idx], y_all_oh[val_idx]

    # Build + compile
    lstm_model = build_lstm_best(input_shape=X_seq_tr.shape[1:], n_classes=3, dropout_rate=0.25)
    lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train
    lstm_model.fit(X_seq_tr, y_seq_tr_oh, validation_data=(X_seq_va, y_seq_va_oh),
                   epochs=5, batch_size=64, verbose=0)

    # Predict
    oof_lstm[val_idx] = lstm_model.predict(X_seq_va, verbose=0)


=== Generating LSTM OOF predictions ===

--- Fold 1/3 ---

--- Fold 2/3 ---

--- Fold 3/3 ---


In [54]:
from xgboost import XGBClassifier

print("=== Generating XGB OOF predictions ===")
for fold, (train_idx, val_idx) in enumerate(tscv.split(X_tab_aligned), 1):
    print(f"\n--- Fold {fold}/{n_splits} ---")

    # Tabular
    X_tr_tab, X_va_tab = X_tab[train_idx], X_tab[val_idx]
    y_tr_tab, y_va_tab = y_all_mapped.iloc[train_idx], y_all_mapped.iloc[val_idx]

    # Build
    xgb_clf = XGBClassifier(
        objective='multi:softprob',
        num_class=3,
        max_depth=6,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        n_estimators=300,
        eval_metric='mlogloss'
    )

    # Train
    xgb_clf.fit(X_tr_tab, y_tr_tab)

    # Predict
    oof_xgb[val_idx] = xgb_clf.predict_proba(X_va_tab)


=== Generating XGB OOF predictions ===

--- Fold 1/3 ---

--- Fold 2/3 ---

--- Fold 3/3 ---


In [55]:
# Stack all model predictions for meta-learner
meta_X_train = np.hstack([oof_cnn, oof_lstm, oof_xgb])
meta_y_train = y_all_mapped.values  # already mapped to (0,1,2)

print("✅ OOF generation complete!")
print("Meta-train shape:", meta_X_train.shape)


✅ OOF generation complete!
Meta-train shape: (17631, 9)


In [57]:

import joblib

# Save OOF predictions and labels
joblib.dump(
    {
        "oof_cnn": oof_cnn,
        "oof_lstm": oof_lstm,
        "oof_xgb": oof_xgb,
        "y": y_all_mapped.values
    },
    "oof_preds.pkl"
)
print("💾 OOF predictions saved to oof_preds.pkl")


💾 OOF predictions saved to oof_preds.pkl


# META LEARNER

In [58]:
# Cell A — Load OOF predictions and align
import joblib
import numpy as np
import pandas as pd
from sklearn.utils import check_array

# Try to load previously saved OOF predictions. If you already have them in memory,
# replace this with the arrays directly (oof_cnn, oof_lstm, oof_xgb, y).
oof_path = "oof_preds.pkl"
obj = joblib.load(oof_path)  # expects dict with "oof_cnn","oof_lstm","oof_xgb","y"

oof_cnn = obj["oof_cnn"]    # shape (n_samples, 3)
oof_lstm = obj["oof_lstm"]
oof_xgb = obj["oof_xgb"]
y_raw   = obj["y"]          # ground-truth labels used during OOF (mapped 0/1/2 or original)

# Make numpy arrays and check sizes
oof_cnn = np.asarray(oof_cnn)
oof_lstm = np.asarray(oof_lstm)
oof_xgb = np.asarray(oof_xgb)
y_raw = np.asarray(y_raw).ravel()

print("Shapes:", oof_cnn.shape, oof_lstm.shape, oof_xgb.shape, y_raw.shape)

# Basic sanity checks
n = len(y_raw)
assert oof_cnn.shape[0] == n and oof_lstm.shape[0] == n and oof_xgb.shape[0] == n, "OOF arrays must align with labels"

# If your y is in -1/0/1 mapping, map to 0/1/2 (consistent)
# Detect automatically if values are -1/0/1 and map to 0/1/2
if set(np.unique(y_raw)).issuperset({-1,0,1}) and set(np.unique(y_raw)).difference({-1,0,1})==set():
    print("Detected labels in {-1,0,1} — remapping to {0,1,2}")
    map_arr = np.array([0 if v==-1 else (1 if v==0 else 2) for v in y_raw])
    y = map_arr
else:
    # assume already 0/1/2
    y = y_raw.copy()

# Confirm class distribution
unique, counts = np.unique(y, return_counts=True)
print("Label distribution (mapped):", dict(zip(unique, counts)))


Shapes: (17631, 3) (17631, 3) (17631, 3) (17631,)
Label distribution (mapped): {np.int64(0): np.int64(3925), np.int64(1): np.int64(9261), np.int64(2): np.int64(4445)}


In [59]:
# Cell B — meta-features construction
import numpy as np
import pandas as pd

# Stack base model probabilities as features
meta_probs = np.hstack([oof_cnn, oof_lstm, oof_xgb])  # shape (n, 9)

# Add engineered meta-features derived from base model outputs:
# - per-sample mean/std/max/min across base model probs
# - disagreements (max prob - 2nd max prob) for confidence
def meta_stats_from_probs(probs_block):
    # probs_block shape (n, n_models * n_classes)
    n_classes = 3
    n_models = probs_block.shape[1] // n_classes
    block = probs_block.reshape(len(probs_block), n_models, n_classes)
    # per model best class prob and best class
    best_probs = block.max(axis=2)                 # (n, n_models)
    best_classes = block.argmax(axis=2)             # (n, n_models)
    # statistics across models on best_probs
    mean_best = best_probs.mean(axis=1)
    std_best = best_probs.std(axis=1)
    max_best = best_probs.max(axis=1)
    min_best = best_probs.min(axis=1)
    # disagreement: how many models agree with majority class
    from scipy.stats import mode
    mode_vals, mode_counts = mode(best_classes, axis=1)
    agree_fraction = (mode_counts.ravel() / n_models)
    # confidence gap: top prob - second top prob per model averaged
    def gap_per_row(row_block):
        # row_block shape (n_models, n_classes)
        gaps = []
        for m in range(row_block.shape[0]):
            arr = np.sort(row_block[m])[::-1]
            gaps.append(arr[0] - (arr[1] if arr.shape[0] > 1 else 0.0))
        return np.mean(gaps)
    gap = np.array([gap_per_row(row) for row in block])

    stats = np.vstack([mean_best, std_best, max_best, min_best, agree_fraction, gap]).T
    stats_cols = ["mean_best_prob", "std_best_prob", "max_best_prob", "min_best_prob", "agree_frac", "avg_top_gap"]
    return stats, stats_cols

stats, stats_cols = meta_stats_from_probs(meta_probs)

# Optionally add a few simple original features if available (e.g. last-hour vol, last return).
# If you saved a features/labels DataFrame earlier, load and align here:
# features_df = pd.read_parquet("features_aligned.parquet")  # or load whatever you have
# extra_feats = features_df.loc[:n-1, ["vol_24h", "ret_1h"]].to_numpy()

# For now, we'll build final meta_X from probs + stats
meta_X = np.hstack([meta_probs, stats])
meta_feature_names = (
    [f"cnn_p{c}" for c in range(3)] +
    [f"lstm_p{c}" for c in range(3)] +
    [f"xgb_p{c}" for c in range(3)] +
    stats_cols
)

print("Meta X shape:", meta_X.shape)
print("Feature names:", meta_feature_names)


Meta X shape: (17631, 15)
Feature names: ['cnn_p0', 'cnn_p1', 'cnn_p2', 'lstm_p0', 'lstm_p1', 'lstm_p2', 'xgb_p0', 'xgb_p1', 'xgb_p2', 'mean_best_prob', 'std_best_prob', 'max_best_prob', 'min_best_prob', 'agree_frac', 'avg_top_gap']


In [60]:
# Cell C — Train meta-learner with TimeSeriesSplit CV and calibration
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

n_splits = 5
tscv = TimeSeriesSplit(n_splits=n_splits)

# We will collect fold validation predictions for diagnostics
val_preds = np.zeros((len(meta_X), 3))   # prob preds
val_mask = np.zeros(len(meta_X), dtype=bool)

# Simple hyperparameter grid (small)
param_grid = [
    {"max_iter": 200, "learning_rate": 0.05, "max_depth": 6},
    {"max_iter": 300, "learning_rate": 0.03, "max_depth": 8},
    {"max_iter": 200, "learning_rate": 0.1,  "max_depth": 4},
]

best_cfg = None
best_score = -1

for cfg in param_grid:
    print("Testing cfg:", cfg)
    cfg_val_probs = np.zeros((len(meta_X), 3))
    cfg_mask = np.zeros(len(meta_X), dtype=bool)
    fold_scores = []
    for fold, (tr_idx, va_idx) in enumerate(tscv.split(meta_X), 1):
        X_tr, X_va = meta_X[tr_idx], meta_X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        # Light class imbalance: use class_weight via sample_weight
        # compute sample_weight as inverse freq on train
        unique, counts = np.unique(y_tr, return_counts=True)
        inv_freq = {u: (1.0 / c) for u, c in zip(unique, counts)}
        sample_weight = np.array([inv_freq[v] for v in y_tr])

        model = HistGradientBoostingClassifier(**cfg, early_stopping=True, scoring="f1_macro", random_state=42)
        model.fit(X_tr, y_tr, sample_weight=sample_weight)

        probs = model.predict_proba(X_va)
        cfg_val_probs[va_idx] = probs
        cfg_mask[va_idx] = True

        # compute fold metric: macro F1 on argmax preds
        preds = probs.argmax(axis=1)
        f1 = f1_score(y_va, preds, average="macro", zero_division=0)
        fold_scores.append(f1)
        print(f"  fold {fold} f1_macro: {f1:.4f}")

    mean_f1 = np.mean(fold_scores)
    print("  mean f1_macro:", mean_f1)
    if mean_f1 > best_score:
        best_score = mean_f1
        best_cfg = cfg
        best_oof_probs = cfg_val_probs.copy()
    print("----")

print("Best cfg:", best_cfg, "best mean f1_macro:", best_score)

# Train final meta model on full training portion (we will use the same model type + calibration)
final_model = HistGradientBoostingClassifier(**best_cfg, early_stopping=True, random_state=42)
# compute final sample weights on entire meta training set
unique, counts = np.unique(y, return_counts=True)
inv_freq = {u: (1.0 / c) for u, c in zip(unique, counts)}
sample_weight_full = np.array([inv_freq[v] for v in y])
final_model.fit(meta_X, y, sample_weight=sample_weight_full)

# Calibrate probabilities (Platt scaling) using cross-validation
calibrator = CalibratedClassifierCV(final_model, method="sigmoid", cv=3)  # cv on folds (not time-aware)
calibrator.fit(meta_X, y)

# Save
import joblib
joblib.dump({"meta_model": final_model, "calibrator": calibrator, "best_cfg": best_cfg, "meta_feature_names": meta_feature_names}, "meta_model.pkl")
print("Saved meta_model.pkl")


Testing cfg: {'max_iter': 200, 'learning_rate': 0.05, 'max_depth': 6}
  fold 1 f1_macro: 0.1220
  fold 2 f1_macro: 0.6719
  fold 3 f1_macro: 0.7021
  fold 4 f1_macro: 0.7194
  fold 5 f1_macro: 0.7198
  mean f1_macro: 0.5870340309627685
----
Testing cfg: {'max_iter': 300, 'learning_rate': 0.03, 'max_depth': 8}
  fold 1 f1_macro: 0.1220
  fold 2 f1_macro: 0.6779
  fold 3 f1_macro: 0.7032
  fold 4 f1_macro: 0.7185
  fold 5 f1_macro: 0.7198
  mean f1_macro: 0.5882808873047827
----
Testing cfg: {'max_iter': 200, 'learning_rate': 0.1, 'max_depth': 4}
  fold 1 f1_macro: 0.1220
  fold 2 f1_macro: 0.6810
  fold 3 f1_macro: 0.7035
  fold 4 f1_macro: 0.7263
  fold 5 f1_macro: 0.7294
  mean f1_macro: 0.5924434043516823
----
Best cfg: {'max_iter': 200, 'learning_rate': 0.1, 'max_depth': 4} best mean f1_macro: 0.5924434043516823
Saved meta_model.pkl


In [61]:
# Cell D — evaluate & threshold tuning
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, f1_score, precision_score, recall_score, accuracy_score
from itertools import product

# load calibrator if needed
obj = joblib.load("meta_model.pkl")
calibrator = obj["calibrator"]

probs = calibrator.predict_proba(meta_X)   # (n,3) calibrated
y_true = y

# baseline: argmax
pred_argmax = probs.argmax(axis=1)
print("Baseline (argmax) metrics:")
print("Accuracy:", accuracy_score(y_true, pred_argmax))
print(classification_report(y_true, pred_argmax, zero_division=0))

# tune "confidence cutoff" — if top probability < cutoff => force neutral class (1)
best_cutoff = 0.0
best_f1 = -1
best_preds = None
for cutoff in np.linspace(0.0, 0.75, 16):
    top_probs = probs.max(axis=1)
    preds = probs.argmax(axis=1).copy()
    # map anything with low confidence to neutral class index (1)
    preds[top_probs < cutoff] = 1
    f1 = f1_score(y_true, preds, average="macro", zero_division=0)
    if f1 > best_f1:
        best_f1 = f1
        best_cutoff = cutoff
        best_preds = preds.copy()

print("Best confidence cutoff:", best_cutoff, "=> macro F1:", best_f1)
print("Metrics with best cutoff:")
print("Accuracy:", accuracy_score(y_true, best_preds))
print(classification_report(y_true, best_preds, zero_division=0))
print("Confusion matrix (rows=actual, cols=pred):")
print(confusion_matrix(y_true, best_preds))


Baseline (argmax) metrics:
Accuracy: 0.7045544779082298
              precision    recall  f1-score   support

           0       0.78      0.52      0.62      3925
           1       0.68      0.86      0.76      9261
           2       0.75      0.54      0.62      4445

    accuracy                           0.70     17631
   macro avg       0.73      0.64      0.67     17631
weighted avg       0.72      0.70      0.69     17631

Best confidence cutoff: 0.35000000000000003 => macro F1: 0.6683918915681422
Metrics with best cutoff:
Accuracy: 0.7045544779082298
              precision    recall  f1-score   support

           0       0.78      0.52      0.62      3925
           1       0.68      0.86      0.76      9261
           2       0.75      0.54      0.62      4445

    accuracy                           0.70     17631
   macro avg       0.73      0.64      0.67     17631
weighted avg       0.72      0.70      0.69     17631

Confusion matrix (rows=actual, cols=pred):
[[2033 1

# save

In [62]:
import joblib

# Save the trained meta-learner
meta_model_path = "meta_learner.pkl"
joblib.dump(meta_clf, meta_model_path)

print(f"✅ Meta-learner saved to {meta_model_path}")


✅ Meta-learner saved to meta_learner.pkl
