This is a watered-down version of project 4 from the [fintech bootcamp at home](https://github.com/whitgroves/fintech-bootcamp-at-home) used with some models from my [optiver Kaggle submission](https://github.com/whitgroves/optiver-trading-at-the-close) to test the ensemble's performance. Feel free to treat this as a setup guide if you need an example and/or use case.

In [1]:
!echo $PATH # if /usr/local/cuda/bin is missing, re-run VScode form terminal

/mnt/Data/Repos/clique-ml/.cuda/bin:/usr/local/cuda/bin:/home/whitgroves/.local/bin:/usr/local/cuda/bin:/home/whitgroves/.local/bin:/usr/local/cuda/bin:/home/whitgroves/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin


In [2]:
!ptxas --version # expecting 12.2

ptxas: NVIDIA (R) Ptx optimizing assembler
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Jun_13_19:13:58_PDT_2023
Cuda compilation tools, release 12.2, V12.2.91
Build cuda_12.2.r12.2/compiler.32965470_0


In [3]:
import pandas as pd
import random as r

df = pd.read_csv('./stock_bars.csv')
df = df.loc[df.symbol == r.choice(df.symbol.unique())] # pick a single stock from the test data each time
df

Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
0,BB,2022-01-03 05:00:00+00:00,9.32,9.3900,9.1000,9.37,5267494.0,26372.0,9.268915
1,BB,2022-01-04 05:00:00+00:00,9.37,9.4900,9.1200,9.33,6360118.0,29899.0,9.271508
2,BB,2022-01-05 05:00:00+00:00,9.27,9.3471,8.7800,8.78,9620376.0,39131.0,9.026742
3,BB,2022-01-06 05:00:00+00:00,8.79,9.0200,8.4700,8.78,11505077.0,47090.0,8.771278
4,BB,2022-01-07 05:00:00+00:00,8.97,9.2300,8.8000,8.86,6509053.0,29903.0,8.953267
...,...,...,...,...,...,...,...,...,...
511,BB,2024-01-17 05:00:00+00:00,3.33,3.3500,3.2748,3.34,3264012.0,9790.0,3.322546
512,BB,2024-01-18 05:00:00+00:00,3.37,3.4200,3.3450,3.39,3564418.0,9148.0,3.377668
513,BB,2024-01-19 05:00:00+00:00,3.38,3.4550,3.3450,3.43,7640902.0,13106.0,3.423459
514,BB,2024-01-22 05:00:00+00:00,3.60,3.6000,3.6000,3.60,2332.0,15.0,3.569584


In [4]:
def standardize(data:pd.DataFrame, skip_cols:list[str]=[]) -> pd.DataFrame:
    skip_cols = [col for col in skip_cols if col in data.columns]
    skip = data[skip_cols]
    temp = data.drop(skip_cols, axis=1)
    temp = (temp - temp.mean()) / temp.std(ddof=0) # standardize
    temp = temp.ffill().fillna(0) # impute
    return pd.concat([skip, temp], axis=1, join='inner')

df = standardize(df, ['symbol', 'timestamp'])
df

Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
0,BB,2022-01-03 05:00:00+00:00,3.230217,3.114700,3.236637,3.282115,-0.326388,0.809768,3.194762
1,BB,2022-01-04 05:00:00+00:00,3.268630,3.189574,3.252447,3.251254,-0.062726,1.157147,3.196758
2,BB,2022-01-05 05:00:00+00:00,3.191805,3.082579,2.983688,2.826914,0.724010,2.066417,3.008389
3,BB,2022-01-06 05:00:00+00:00,2.823042,2.837666,2.738644,2.826914,1.178808,2.850308,2.811788
4,BB,2022-01-07 05:00:00+00:00,2.961328,2.994901,2.999498,2.888637,-0.026787,1.157541,2.951844
...,...,...,...,...,...,...,...,...,...
511,BB,2024-01-17 05:00:00+00:00,-1.371634,-1.407688,-1.367984,-1.370193,-0.809850,-0.823411,-1.381475
512,BB,2024-01-18 05:00:00+00:00,-1.340904,-1.355277,-1.312493,-1.331617,-0.737359,-0.886643,-1.339053
513,BB,2024-01-19 05:00:00+00:00,-1.333221,-1.329071,-1.312493,-1.300756,0.246341,-0.496815,-1.303813
514,BB,2024-01-22 05:00:00+00:00,-1.164205,-1.220503,-1.110925,-1.169596,-1.596929,-1.786162,-1.191358


In [5]:
y = df['low'].shift(-1) - df['low'] # since data is standardized, we learn/predict the difference between t-1 and t
X = df.drop(['symbol', 'timestamp'], axis=1)

In [6]:
X

Unnamed: 0,open,high,low,close,volume,trade_count,vwap
0,3.230217,3.114700,3.236637,3.282115,-0.326388,0.809768,3.194762
1,3.268630,3.189574,3.252447,3.251254,-0.062726,1.157147,3.196758
2,3.191805,3.082579,2.983688,2.826914,0.724010,2.066417,3.008389
3,2.823042,2.837666,2.738644,2.826914,1.178808,2.850308,2.811788
4,2.961328,2.994901,2.999498,2.888637,-0.026787,1.157541,2.951844
...,...,...,...,...,...,...,...
511,-1.371634,-1.407688,-1.367984,-1.370193,-0.809850,-0.823411,-1.381475
512,-1.340904,-1.355277,-1.312493,-1.331617,-0.737359,-0.886643,-1.339053
513,-1.333221,-1.329071,-1.312493,-1.300756,0.246341,-0.496815,-1.303813
514,-1.164205,-1.220503,-1.110925,-1.169596,-1.596929,-1.786162,-1.191358


In [7]:
y

0      0.015809
1     -0.268758
2     -0.245044
3      0.260854
4     -0.229235
         ...   
511    0.055491
512    0.000000
513    0.201569
514   -0.047428
515         NaN
Name: low, Length: 516, dtype: float64

In [8]:
import xgboost as xgb
import lightgbm as lgb
import catboost as cat
import tensorflow as tf
layers = tf.keras.layers
Sequential = tf.keras.Sequential

N_FEATURES = len(X.columns)
ACTIVATION_1 = 'tanh' # inputs are standardized (vs normalized) so keep negative range
ACTIVATION_2 = 'relu' # performed better than tanh, sigmoid
DROPOUT = 0.5         # performed better than 0.3, 0.4
RANDOM_SEED = 25      # even funnier that 24

tf.keras.utils.set_random_seed(RANDOM_SEED)
shared_kw = dict(random_state=RANDOM_SEED, learning_rate=0.2, max_depth=3, subsample=0.8)
xgb_lgb_kw = dict(n_jobs=16, colsample_bytree=0.85, reg_alpha=500)
xgb_cat_kw = dict(early_stopping_rounds=5)
lgb_cat_kw = dict(num_leaves=8, min_child_samples=2000)
regularizer = tf.keras.regularizers.l1(0.001)

models = [ # order matters if limit is set; frontloading stronger models will cause more rejections; the reverse will oversaturate
    xgb.XGBRegressor(**shared_kw, **xgb_lgb_kw, **xgb_cat_kw, eval_metric='mae', tree_method='hist', gamma=0.2),
    lgb.LGBMRegressor(**shared_kw, **xgb_lgb_kw, **lgb_cat_kw, early_stopping_round=5, metric='l1', min_split_gain=0.001, verbosity=-1),
    cat.CatBoostRegressor(**shared_kw, **xgb_cat_kw, **lgb_cat_kw, eval_metric='MAE'),
    Sequential([layers.Dense(1, activation=ACTIVATION_1, input_shape=[N_FEATURES])], name='linear'), # N -> 1
    Sequential([ # N -> N/2 -> 1
        layers.Dense(N_FEATURES, kernel_regularizer=regularizer, activation=ACTIVATION_1, input_shape=[N_FEATURES]),
        layers.Dropout(DROPOUT),
        layers.BatchNormalization(),
        layers.Dense(N_FEATURES//2, kernel_regularizer=regularizer, activation=ACTIVATION_2),
        layers.Dropout(DROPOUT),
        layers.BatchNormalization(),
        layers.Dense(1)
    ], name='net'),
]

2025-07-20 16:01:17.894404: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-20 16:01:17.902712: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753045277.911866  130454 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753045277.915034  130454 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1753045277.922038  130454 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [9]:
import clique
cutoff = int(len(X)*0.8) # 80/20 train/test split
training_inputs, testing_inputs = X[:cutoff], X[cutoff:-1]
training_targets, testing_targets = y[:cutoff], y[cutoff:-1]
ensemble = clique.Clique(models=models, inputs=testing_inputs, targets=testing_targets)
ensemble

<Clique (5 model(s); limit: none)>

In [10]:
# ensemble.load('.models/')

In [11]:
import gc
from sklearn.model_selection import TimeSeriesSplit
for fold, (training, validation) in enumerate(TimeSeriesSplit().split(training_inputs)):
    print(f'Training ensemble on fold {fold+1}')
    val_data = [(training_inputs.iloc[validation, :], training_targets.iloc[validation])]
    for model in ensemble:
        fit_kw = dict()
        predict_kw = dict()
        match model.model_type:
            case 'Sequential' | 'Model':
                if fold == 0: model.compile(optimizer='adam', loss='mae')
                keras_kw = dict(verbose=0, batch_size=256)
                fit_kw.update(keras_kw)
                predict_kw.update(keras_kw)
            case 'LGBMRegressor':
                fit_kw.update(dict(eval_set=val_data, eval_metric='l1'))
            case 'XGBRegressor' | 'CatBoostRegressor':
                fit_kw.update(dict(verbose=0, eval_set=val_data))
        model.fit_kw = fit_kw
        model.predict_kw = predict_kw
    ensemble.fit(training_inputs.iloc[training, :], training_targets.iloc[training])
    del val_data
    while gc.collect() > 0: pass

Training ensemble on fold 1


I0000 00:00:1753045280.001505  130602 service.cc:152] XLA service 0x78bb0c005180 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1753045280.001520  130602 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
I0000 00:00:1753045280.001522  130602 service.cc:160]   StreamExecutor device (1): NVIDIA GeForce GTX 1660 SUPER, Compute Capability 7.5
2025-07-20 16:01:20.008741: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1753045280.032379  130602 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1753045280.161912  130602 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Training ensemble on fold 2
Training ensemble on fold 3
Training ensemble on fold 4
Training ensemble on fold 5


In [12]:
predictions = ensemble.predict(testing_inputs)
performance = ensemble.scoring(testing_targets, predictions)
performance

0.2480253010157111

In [13]:
ensemble.evaluate()

<Clique (5 model(s); limit: none)>

In [14]:
ensemble.mean_score

0.29060288085907704

In [15]:
ensemble.best_score

0.07976239276254381

In [16]:
ensemble.best_model

<ModelProfile (CatBoostRegressor)>

In [17]:
ensemble.save('.models/')

<Clique (5 model(s); limit: none)>

In [18]:
exclusive = clique.Clique(models='.models/', limit=3, inputs=testing_inputs, targets=testing_targets).evaluate().prune()
exclusive

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


<Clique (3 model(s); limit: 3)>

In [19]:
exclusive.best_score

0.07976239276254381

In [20]:
exclusive.best_model

<ModelProfile (CatBoostRegressor)>

In [21]:
clique.Clique(models=models).save('.untrained/')
clique.Clique(models='.untrained/').evaluate() # EvaluationError

EvaluationError: Cannot evaluate models before they are trained. Call `fit` first.