In [30]:
import numpy as np
import pandas as pd

from catboost import CatBoostClassifier, Pool, CatBoostRegressor
from sklearn.model_selection import train_test_split

In [54]:
# initialize data
cryptoset = pd.read_csv('data/cryptoset.csv', delimiter=';', on_bad_lines='skip')
categorical_names = ('news_title',)
features_names = cryptoset.columns.values[5:]
features_names = np.append(categorical_names, features_names)
target_names = ('high', 'low')
print('Features:', features_names)
print('Target:', target_names)

Features: ['news_title' 'pos_votes' 'neg_votes' 'imp_votes' 'com_votes' 'like_votes'
 'dislike_votes' 'lol_votes' 'save_votes' 'first_AAVE' 'first_ADA'
 'first_ALGO' 'first_AMP' 'first_ANC' 'first_APE' 'first_AR' 'first_ATOM'
 'first_AVAX' 'first_AXS' 'first_BAT' 'first_BCH' 'first_BCHABC'
 'first_BIT' 'first_BNB' 'first_BTC' 'first_BTT' 'first_BUSD' 'first_CAKE'
 'first_CEL' 'first_CELO' 'first_CHZ' 'first_COMP' 'first_CRO' 'first_CVX'
 'first_CVXCRV' 'first_DAI' 'first_DCR' 'first_DFI' 'first_DOGE'
 'first_DOT' 'first_EGLD' 'first_ENJ' 'first_EOS' 'first_ERD' 'first_ETC'
 'first_ETH' 'first_EXRD' 'first_FIL' 'first_FLOW' 'first_FRAX'
 'first_FTM' 'first_FTT' 'first_FXS' 'first_GALA' 'first_GMT' 'first_GNO'
 'first_GNT' 'first_GRT' 'first_GST' 'first_GT' 'first_HBAR' 'first_HEART'
 'first_HNT' 'first_HOT' 'first_HT' 'first_ICP' 'first_ION' 'first_JEWEL'
 'first_JUNO' 'first_KCS' 'first_KDA' 'first_KLAY' 'first_KMD' 'first_KSM'
 'first_LEND' 'first_LEO' 'first_LINK' 'first_LRC' 'first_

In [55]:
X = cryptoset[features_names]
y1, y2 = cryptoset['low'], cryptoset['high']

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, train_size=0.75, random_state=42)

In [57]:
# set up the model
catboost_model = CatBoostRegressor(n_estimators=100,
                                   loss_function = 'RMSE',
                                   eval_metric = 'RMSE',
                                   cat_features = categorical_names)
# fit model
catboost_model.fit(X_train, y_train, 
                   eval_set = (X_test, y_test),
                   use_best_model = True,
                   plot = True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.291309
0:	learn: 1804.8485939	test: 1815.1188227	best: 1815.1188227 (0)	total: 37ms	remaining: 3.66s
1:	learn: 1303.5737386	test: 1310.9699341	best: 1310.9699341 (1)	total: 67.3ms	remaining: 3.3s
2:	learn: 953.3237639	test: 958.7815195	best: 958.7815195 (2)	total: 105ms	remaining: 3.39s
3:	learn: 688.8921157	test: 692.4276590	best: 692.4276590 (3)	total: 142ms	remaining: 3.4s
4:	learn: 502.6638458	test: 503.6994704	best: 503.6994704 (4)	total: 179ms	remaining: 3.4s
5:	learn: 376.1358044	test: 377.9321557	best: 377.9321557 (5)	total: 215ms	remaining: 3.37s
6:	learn: 277.7110802	test: 278.2704764	best: 278.2704764 (6)	total: 250ms	remaining: 3.32s
7:	learn: 211.6130500	test: 212.1943931	best: 212.1943931 (7)	total: 286ms	remaining: 3.29s
8:	learn: 161.0565824	test: 161.1928526	best: 161.1928526 (8)	total: 321ms	remaining: 3.25s
9:	learn: 124.9387954	test: 124.2983268	best: 124.2983268 (9)	total: 356ms	remaining: 3.2s
10:	learn: 97.8144478	test: 97.9642893	best: 97.

96:	learn: 1.6870866	test: 14.0395637	best: 14.0195824 (94)	total: 3.39s	remaining: 105ms
97:	learn: 1.6372563	test: 14.0251356	best: 14.0195824 (94)	total: 3.43s	remaining: 69.9ms
98:	learn: 1.6176070	test: 14.0198975	best: 14.0195824 (94)	total: 3.46s	remaining: 34.9ms
99:	learn: 1.5886175	test: 14.0169415	best: 14.0169415 (99)	total: 3.49s	remaining: 0us

bestTest = 14.01694155
bestIteration = 99



<catboost.core.CatBoostRegressor at 0x173b438fd00>

In [75]:
preds = catboost_model.predict(X_test)
for i, j in zip(preds, y_test):
    print(f'Prediction: {i} | Value: {j}')

Prediction: 45353.59789741638 | Value: 45353.81
Prediction: 45200.13958747112 | Value: 45200.0
Prediction: 42124.57729225577 | Value: 42125.48
Prediction: 46951.03562974514 | Value: 46950.85
Prediction: 39254.3836861448 | Value: 39254.63
Prediction: 40876.27659917229 | Value: 40875.51
Prediction: 45353.59789741638 | Value: 45353.81
Prediction: 42106.65954523642 | Value: 42107.14
Prediction: 42124.56455605925 | Value: 42125.48
Prediction: 40498.9798596562 | Value: 40500.0
Prediction: 39200.06210471095 | Value: 39200.0
Prediction: 45599.13570842006 | Value: 45620.0
Prediction: 39200.06210471095 | Value: 39200.0
Prediction: 44423.11437977235 | Value: 44421.46
Prediction: 43579.24128831737 | Value: 43579.0
Prediction: 45200.13958747112 | Value: 45200.0
Prediction: 42727.034446740276 | Value: 42727.35
Prediction: 40818.759073947134 | Value: 40820.0
Prediction: 43120.435971074985 | Value: 43121.0
Prediction: 39200.06210471095 | Value: 39200.0
Prediction: 42106.7098870688 | Value: 42107.14
Pr