This is a watered-down version of project 4 from the [fintech bootcamp at home](https://github.com/whitgroves/fintech-bootcamp-at-home) used with some models from my [optiver Kaggle submission](https://github.com/whitgroves/optiver-trading-at-the-close) to test the ensemble's performance. Feel free to treat this as a setup guide if you need an example and/or use case.

In [1]:
!echo $PATH # if /usr/local/cuda/bin is missing, re-run VScode form terminal

/mnt/Data/Repos/clique-ml/.cuda/bin:/usr/local/cuda/bin:/home/whitgroves/.local/bin:/usr/local/cuda/bin:/home/whitgroves/.local/bin:/usr/local/cuda/bin:/home/whitgroves/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin


In [2]:
!ptxas --version # expecting 12.2

ptxas: NVIDIA (R) Ptx optimizing assembler
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Jun_13_19:13:58_PDT_2023
Cuda compilation tools, release 12.2, V12.2.91
Build cuda_12.2.r12.2/compiler.32965470_0


In [3]:
import pandas as pd
import random as r

df = pd.read_csv('./stock_bars.csv')
df = df.loc[df.symbol == r.choice(df.symbol.unique())] # pick a single stock from the test data each time
df

Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
1032,SPCE,2022-01-03 05:00:00+00:00,13.48,13.7350,12.890,13.5400,12819868.0,78293.0,13.361041
1033,SPCE,2022-01-04 05:00:00+00:00,13.64,13.6499,12.895,13.2100,13109366.0,80434.0,13.104721
1034,SPCE,2022-01-05 05:00:00+00:00,13.02,13.3300,11.980,12.0500,17112567.0,117267.0,12.503853
1035,SPCE,2022-01-06 05:00:00+00:00,12.17,12.3464,11.300,11.7800,16410751.0,101101.0,11.860575
1036,SPCE,2022-01-07 05:00:00+00:00,12.00,12.8050,11.920,12.4500,16331824.0,97612.0,12.506667
...,...,...,...,...,...,...,...,...,...
1543,SPCE,2024-01-17 05:00:00+00:00,1.90,1.9700,1.870,1.9600,8085803.0,28004.0,1.934658
1544,SPCE,2024-01-18 05:00:00+00:00,2.00,2.0199,1.900,1.9400,6972790.0,18103.0,1.952433
1545,SPCE,2024-01-19 05:00:00+00:00,1.96,1.9600,1.820,1.9200,9586535.0,25767.0,1.887089
1546,SPCE,2024-01-22 05:00:00+00:00,2.05,2.0500,2.050,2.0500,11828.0,60.0,2.055300


In [4]:
def standardize(data:pd.DataFrame, skip_cols:list[str]=[]) -> pd.DataFrame:
    skip_cols = [col for col in skip_cols if col in data.columns]
    skip = data[skip_cols]
    temp = data.drop(skip_cols, axis=1)
    temp = (temp - temp.mean()) / temp.std(ddof=0) # standardize
    temp = temp.ffill().fillna(0) # impute
    return pd.concat([skip, temp], axis=1, join='inner')

df = standardize(df, ['symbol', 'timestamp'])
df

Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
1032,SPCE,2022-01-03 05:00:00+00:00,3.439862,3.346928,3.410052,3.474965,0.013214,0.622757,3.399282
1033,SPCE,2022-01-04 05:00:00+00:00,3.505648,3.313171,3.412192,3.339071,0.033379,0.666481,3.293718
1034,SPCE,2022-01-05 05:00:00+00:00,3.250727,3.186277,3.020571,2.861384,0.312218,1.418684,3.046252
1035,SPCE,2022-01-06 05:00:00+00:00,2.901237,2.796112,2.729530,2.750198,0.263334,1.088542,2.781320
1036,SPCE,2022-01-07 05:00:00+00:00,2.831339,2.978025,2.994891,3.026104,0.257836,1.017290,3.047411
...,...,...,...,...,...,...,...,...,...
1543,SPCE,2024-01-17 05:00:00+00:00,-1.321418,-1.319896,-1.306521,-1.293673,-0.316532,-0.404245,-1.306635
1544,SPCE,2024-01-18 05:00:00+00:00,-1.280302,-1.300103,-1.293681,-1.301909,-0.394058,-0.606444,-1.299315
1545,SPCE,2024-01-19 05:00:00+00:00,-1.296749,-1.323863,-1.327921,-1.310145,-0.212000,-0.449929,-1.326226
1546,SPCE,2024-01-22 05:00:00+00:00,-1.259744,-1.288163,-1.229480,-1.256611,-0.878917,-0.974918,-1.256949


In [5]:
y = df['low'].shift(-1) - df['low'] # since data is standardized, we learn/predict the difference between t-1 and t
X = df.drop(['symbol', 'timestamp'], axis=1)

In [6]:
X

Unnamed: 0,open,high,low,close,volume,trade_count,vwap
1032,3.439862,3.346928,3.410052,3.474965,0.013214,0.622757,3.399282
1033,3.505648,3.313171,3.412192,3.339071,0.033379,0.666481,3.293718
1034,3.250727,3.186277,3.020571,2.861384,0.312218,1.418684,3.046252
1035,2.901237,2.796112,2.729530,2.750198,0.263334,1.088542,2.781320
1036,2.831339,2.978025,2.994891,3.026104,0.257836,1.017290,3.047411
...,...,...,...,...,...,...,...
1543,-1.321418,-1.319896,-1.306521,-1.293673,-0.316532,-0.404245,-1.306635
1544,-1.280302,-1.300103,-1.293681,-1.301909,-0.394058,-0.606444,-1.299315
1545,-1.296749,-1.323863,-1.327921,-1.310145,-0.212000,-0.449929,-1.326226
1546,-1.259744,-1.288163,-1.229480,-1.256611,-0.878917,-0.974918,-1.256949


In [7]:
y

1032    0.002140
1033   -0.391621
1034   -0.291041
1035    0.265361
1036   -0.096300
          ...   
1543    0.012840
1544   -0.034240
1545    0.098440
1546    0.004280
1547         NaN
Name: low, Length: 516, dtype: float64

In [8]:
import xgboost as xgb
import lightgbm as lgb
import catboost as cat
import tensorflow as tf
layers = tf.keras.layers
Sequential = tf.keras.Sequential

N_FEATURES = len(X.columns)
ACTIVATION_1 = 'tanh' # inputs are standardized (vs normalized) so keep negative range
ACTIVATION_2 = 'relu' # performed better than tanh, sigmoid
DROPOUT = 0.5         # performed better than 0.3, 0.4
RANDOM_SEED = 25      # even funnier that 24

tf.keras.utils.set_random_seed(RANDOM_SEED)
shared_kw = dict(random_state=RANDOM_SEED, learning_rate=0.2, max_depth=3, subsample=0.8)
xgb_lgb_kw = dict(n_jobs=16, colsample_bytree=0.85, reg_alpha=500)
xgb_cat_kw = dict(early_stopping_rounds=5)
lgb_cat_kw = dict(num_leaves=8, min_child_samples=2000)
regularizer = tf.keras.regularizers.l1(0.001)

models = [ # order matters if limit is set; frontloading stronger models will cause more rejections; the reverse will oversaturate
    xgb.XGBRegressor(**shared_kw, **xgb_lgb_kw, **xgb_cat_kw, eval_metric='mae', tree_method='hist', gamma=0.2),
    lgb.LGBMRegressor(**shared_kw, **xgb_lgb_kw, **lgb_cat_kw, early_stopping_round=5, metric='l1', min_split_gain=0.001, verbosity=-1),
    cat.CatBoostRegressor(**shared_kw, **xgb_cat_kw, **lgb_cat_kw, eval_metric='MAE'),
    Sequential([layers.Dense(1, activation=ACTIVATION_1, input_shape=[N_FEATURES])], name='linear'), # N -> 1
    Sequential([ # N -> N/2 -> 1
        layers.Dense(N_FEATURES, kernel_regularizer=regularizer, activation=ACTIVATION_1, input_shape=[N_FEATURES]),
        layers.Dropout(DROPOUT),
        layers.BatchNormalization(),
        layers.Dense(N_FEATURES//2, kernel_regularizer=regularizer, activation=ACTIVATION_2),
        layers.Dropout(DROPOUT),
        layers.BatchNormalization(),
        layers.Dense(1)
    ], name='net'),
]

2025-07-22 23:44:31.862491: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-22 23:44:32.216491: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753245872.336721   14779 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753245872.371397   14779 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1753245872.682282   14779 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [9]:
import clique
cutoff = int(len(X)*0.8) # 80/20 train/test split
training_inputs, testing_inputs = X[:cutoff], X[cutoff:-1]
training_targets, testing_targets = y[:cutoff], y[cutoff:-1]
ensemble = clique.Clique(models=models, inputs=testing_inputs, targets=testing_targets)
ensemble

<Clique (5 model(s); limit: none)>

In [10]:
# ensemble.load('.models/')

In [11]:
import gc
from sklearn.model_selection import TimeSeriesSplit
for fold, (training, validation) in enumerate(TimeSeriesSplit().split(training_inputs)):
    print(f'Training ensemble on fold {fold+1}')
    val_data = [(training_inputs.iloc[validation, :], training_targets.iloc[validation])]
    for model in ensemble:
        fit_kw = dict()
        predict_kw = dict()
        match model.model_type:
            case 'Sequential' | 'Model':
                if fold == 0: model.compile(optimizer='adam', loss='mae')
                keras_kw = dict(verbose=0, batch_size=256)
                fit_kw.update(keras_kw)
                predict_kw.update(keras_kw)
            case 'LGBMRegressor':
                fit_kw.update(dict(eval_set=val_data, eval_metric='l1'))
            case 'XGBRegressor' | 'CatBoostRegressor':
                fit_kw.update(dict(verbose=0, eval_set=val_data))
        model.fit_kw = fit_kw
        model.predict_kw = predict_kw
    ensemble.fit(training_inputs.iloc[training, :], training_targets.iloc[training])
    del val_data
    while gc.collect() > 0: pass

Training ensemble on fold 1


I0000 00:00:1753245875.922239   14953 service.cc:152] XLA service 0x74f318018090 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1753245875.922262   14953 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
I0000 00:00:1753245875.922264   14953 service.cc:160]   StreamExecutor device (1): NVIDIA GeForce GTX 1660 SUPER, Compute Capability 7.5
2025-07-22 23:44:35.947480: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1753245875.997017   14953 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1753245876.162194   14953 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Training ensemble on fold 2
Training ensemble on fold 3
Training ensemble on fold 4
Training ensemble on fold 5


In [12]:
predictions = ensemble.predict(testing_inputs)
performance = ensemble.scoring(testing_targets, predictions)
performance

EvaluationError: Model is guessing a constant value.

In [None]:
ensemble.evaluate()

<Clique (5 model(s); limit: none)>

In [None]:
ensemble.mean_score

0.29060288085907704

In [None]:
ensemble.best_score

0.07976239276254381

In [None]:
ensemble.best_model

<ModelProfile (CatBoostRegressor)>

In [None]:
ensemble.save('.models/')

<Clique (5 model(s); limit: none)>

In [None]:
exclusive = clique.Clique(models='.models/', limit=3, inputs=testing_inputs, targets=testing_targets).evaluate().prune()
exclusive

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


<Clique (3 model(s); limit: 3)>

In [None]:
exclusive.best_score

0.07976239276254381

In [None]:
exclusive.best_model

<ModelProfile (CatBoostRegressor)>

In [None]:
clique.Clique(models=models).save('.untrained/')
clique.Clique(models='.untrained/').evaluate() # EvaluationError

EvaluationError: Testing inputs and targets have not been defined.