*In this notebook we are reading our dataset with technical indicators and fitting the CatBoost model*

In [3]:
import numpy as np
import pandas as pd

from catboost import CatBoostClassifier, Pool, CatBoostRegressor
from sklearn.model_selection import train_test_split

In [19]:
# initialize data
cryptoset = pd.read_csv('data/cryptoset_with_indicators.csv', delimiter=';', on_bad_lines='skip')
categorical_names = ('news_title', 'source')
features_names = cryptoset.columns.values[6:]
features_names = np.append(categorical_names, features_names)
target_names = ('high', 'low')
print('Features:', features_names)
print('Target:', target_names)

Features: ['news_title' 'source' 'pos_votes' 'neg_votes' 'imp_votes' 'com_votes'
 'like_votes' 'dislike_votes' 'lol_votes' 'save_votes' 'first_AAVE'
 'first_ADA' 'first_ALGO' 'first_AMP' 'first_ANC' 'first_APE' 'first_AR'
 'first_ATOM' 'first_AVAX' 'first_AXS' 'first_BAT' 'first_BCH'
 'first_BCHABC' 'first_BIT' 'first_BNB' 'first_BTC' 'first_BTT'
 'first_BUSD' 'first_CAKE' 'first_CEL' 'first_CELO' 'first_CHZ'
 'first_COMP' 'first_CRO' 'first_CVX' 'first_CVXCRV' 'first_DAI'
 'first_DCR' 'first_DFI' 'first_DOGE' 'first_DOT' 'first_EGLD' 'first_ENJ'
 'first_EOS' 'first_ERD' 'first_ETC' 'first_ETH' 'first_EXRD' 'first_FIL'
 'first_FLOW' 'first_FRAX' 'first_FTM' 'first_FTT' 'first_FXS'
 'first_GALA' 'first_GMT' 'first_GNO' 'first_GNT' 'first_GRT' 'first_GST'
 'first_GT' 'first_HBAR' 'first_HEART' 'first_HNT' 'first_HOT' 'first_HT'
 'first_ICP' 'first_ION' 'first_JEWEL' 'first_JUNO' 'first_KCS'
 'first_KDA' 'first_KLAY' 'first_KMD' 'first_KSM' 'first_LEND' 'first_LEO'
 'first_LINK' 'first_LR

In [20]:
cryptoset

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,_id,news_title,source,publication_date,pos_votes,neg_votes,imp_votes,com_votes,...,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,momentum_kama,others_dr,others_dlr,others_cr
0,0,0,14604850,We welcome Algolaunch Algorand ecosystem AlgoL...,cointelegraph.com,2022.03.17,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,40917.9,-5.336149,0.0,0.000000
1,1,1,14602502,Ripple Distributes 1 Billion XRP In Developer ...,bitcoinist.com,2022.03.17,1.0,0.0,1.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,40917.9,0.000000,0.0,0.000000
2,2,2,14602447,Treasure_DAO integrating Chainlink Price Feeds...,smart_contract,2022.03.17,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,40917.9,0.000000,0.0,0.000000
3,3,3,14608066,The Hedera22 Hackathon ️ 6 weeks 16 challenges...,hedera,2022.03.17,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,40917.9,0.000000,0.0,0.000000
4,4,4,14609384,Let BUIDL MinaCommunity The Mina Ecosystem rai...,minaprotocol,2022.03.17,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,40917.9,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11588,11588,11588,14958746,Bitcoin Taps 9-Day High BAYC ApeCoin Soars 30 ...,cryptopotato.com,2022.04.20,0.0,0.0,0.0,0.0,...,-1.799893e-10,-2.647137e-10,8.472435e-11,6.866258e-09,1.009748e-08,-3.231227e-09,40917.9,0.000000,0.0,1.076033
11589,11589,11589,14960138,Биткоин-биржа OKCoin анонсировала запуск NFT-м...,forklog.com,2022.04.20,0.0,0.0,0.0,0.0,...,-1.666542e-10,-2.451018e-10,7.844760e-11,6.357638e-09,9.349515e-09,-2.991877e-09,40917.9,0.000000,0.0,1.076033
11590,11590,11590,14960165,Quant Analyst Plan B Says Bitcoin Currently Pr...,dailyhodl.com,2022.04.20,0.0,0.0,0.0,0.0,...,-1.543042e-10,-2.269423e-10,7.263805e-11,5.886699e-09,8.656952e-09,-2.770253e-09,40917.9,0.000000,0.0,1.076033
11591,11591,11591,14951808,Japan Central Bank Will Test Digital Yen Cauti...,cryptopotato.com,2022.04.20,3.0,0.0,2.0,0.0,...,-1.428690e-10,-2.101276e-10,6.725857e-11,5.450647e-09,8.015691e-09,-2.565044e-09,40917.9,0.000000,0.0,1.076033


In [21]:
X = cryptoset[features_names]
y1, y2 = cryptoset['low'], cryptoset['high']

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, train_size=0.75, random_state=42)

In [23]:
# set up the model
catboost_model = CatBoostRegressor(n_estimators=100,
                                   loss_function = 'RMSE',
                                   eval_metric = 'RMSE',
                                   cat_features = categorical_names)
# fit model
catboost_model.fit(X_train, y_train, 
                   eval_set = (X_test, y_test),
                   use_best_model = True,
                   plot = True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.291309
0:	learn: 1795.7084238	test: 1807.1201605	best: 1807.1201605 (0)	total: 176ms	remaining: 17.5s
1:	learn: 1290.0401788	test: 1298.1618630	best: 1298.1618630 (1)	total: 207ms	remaining: 10.2s
2:	learn: 932.9534176	test: 935.9943384	best: 935.9943384 (2)	total: 241ms	remaining: 7.79s
3:	learn: 686.1867606	test: 686.3730729	best: 686.3730729 (3)	total: 279ms	remaining: 6.71s
4:	learn: 500.2315614	test: 499.2910766	best: 499.2910766 (4)	total: 315ms	remaining: 5.98s
5:	learn: 366.0081689	test: 365.2061869	best: 365.2061869 (5)	total: 348ms	remaining: 5.45s
6:	learn: 271.9228406	test: 270.4460042	best: 270.4460042 (6)	total: 379ms	remaining: 5.03s
7:	learn: 205.9426555	test: 204.7091018	best: 204.7091018 (7)	total: 412ms	remaining: 4.74s
8:	learn: 160.6414418	test: 159.6756074	best: 159.6756074 (8)	total: 447ms	remaining: 4.52s
9:	learn: 129.5247672	test: 127.6627232	best: 127.6627232 (9)	total: 492ms	remaining: 4.43s
10:	learn: 106.3713255	test: 105.4059888	bes

94:	learn: 10.0390856	test: 24.7330575	best: 24.7283463 (93)	total: 3.25s	remaining: 171ms
95:	learn: 10.0144739	test: 24.7235948	best: 24.7235948 (95)	total: 3.3s	remaining: 138ms
96:	learn: 9.9658210	test: 24.7028184	best: 24.7028184 (96)	total: 3.34s	remaining: 103ms
97:	learn: 9.8903212	test: 24.6709834	best: 24.6709834 (97)	total: 3.37s	remaining: 68.9ms
98:	learn: 9.7361157	test: 24.6407156	best: 24.6407156 (98)	total: 3.41s	remaining: 34.4ms
99:	learn: 9.6826960	test: 24.6152654	best: 24.6152654 (99)	total: 3.44s	remaining: 0us

bestTest = 24.61526539
bestIteration = 99



<catboost.core.CatBoostRegressor at 0x1520ab2a7a0>

In [24]:
preds = catboost_model.predict(X_test)
for i, j in zip(preds, y_test):
    print(f'Prediction: {i} | Value: {j}')

Prediction: 45371.66762780373 | Value: 45353.81
Prediction: 45203.91189467515 | Value: 45200.0
Prediction: 42128.298688116134 | Value: 42125.48
Prediction: 46947.55004693306 | Value: 46950.85
Prediction: 39251.802383105576 | Value: 39254.63
Prediction: 40877.64359839318 | Value: 40875.51
Prediction: 45351.186829151324 | Value: 45353.81
Prediction: 42106.897147125135 | Value: 42107.14
Prediction: 42126.309780510164 | Value: 42125.48
Prediction: 40499.86595620559 | Value: 40500.0
Prediction: 39201.12335959181 | Value: 39200.0
Prediction: 45632.514821563556 | Value: 45620.0
Prediction: 39211.74472318382 | Value: 39200.0
Prediction: 44423.38335311339 | Value: 44421.46
Prediction: 43578.05702347317 | Value: 43579.0
Prediction: 45199.15150284776 | Value: 45200.0
Prediction: 42726.94472390985 | Value: 42727.35
Prediction: 40820.46623259489 | Value: 40820.0
Prediction: 43119.780367329964 | Value: 43121.0
Prediction: 39199.22145936239 | Value: 39200.0
Prediction: 42106.897147125135 | Value: 421