In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, balanced_accuracy_score
from catboost import CatBoostClassifier
from sklearn.utils.class_weight import compute_class_weight

from vctr.data.data_loader import get_data_with_features_and_labels

symbol = 'ETH'
timeframe = '1h'
label_args = (0.07, 0.01)

data = get_data_with_features_and_labels(
    symbol=symbol,
    timeframe=timeframe,
    label_args=label_args,
    separate=False,
)

X, y = data.drop(['label'], axis=1), data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
X_train, X_test = StandardScaler().fit_transform(X_train), StandardScaler().fit_transform(X_test)

# Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)

# CatBoost
catboost_clf = CatBoostClassifier(iterations=100, learning_rate=0.1, class_weights=class_weights, verbose=False)
catboost_clf.fit(X_train, y_train)
catboost_preds = catboost_clf.predict(X_test)

# Evaluate classifiers using balanced accuracy
print('CatBoost balanced accuracy: ', balanced_accuracy_score(y_test, catboost_preds))
print('\nCatBoost classification report:\n', classification_report(y_test, catboost_preds))

catboost_clf.save_model('catboost.model')

CatBoost balanced accuracy:  0.8786028152400082

CatBoost classification report:
               precision    recall  f1-score   support

           0       1.00      0.79      0.88     13419
           1       0.11      0.93      0.19       169
           2       0.11      0.92      0.20       217

    accuracy                           0.79     13805
   macro avg       0.41      0.88      0.42     13805
weighted avg       0.97      0.79      0.86     13805



In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from vctr.utils.pf_stats import get_plot_and_pf, print_stats
from catboost import CatBoostClassifier
from vctr.data.data_loader import get_data_with_features_and_labels
import faulthandler
faulthandler.enable()

symbol = 'BTC'
timeframe = '1h'
label_args = (0.07, 0.01)

catboost_clf = CatBoostClassifier().load_model('catboost.model')

data = get_data_with_features_and_labels(
    symbol=symbol,
    timeframe=timeframe,
    label_args=label_args,
    start='2023-01-01',
    separate=False,
)

X, y = data.drop(['label'], axis=1), data['label']
X = StandardScaler().fit_transform(X)
catboost_preds = catboost_clf.predict(X)
all_predictions = np.concatenate(catboost_preds, axis=0)

print(pd.Series(all_predictions).value_counts())

data = data[-len(all_predictions) :].copy()
data['pred'] = pd.Series(all_predictions, index=data.index)
plot_key = 'pred'
fig, pf = get_plot_and_pf(
    data, plot_key, pf_params={'sl_stop': None}, **{f'plot_{plot_key}': False}, width=1050, height=600
)
fig.data[0].line.width = 1
fig.data[0].opacity = 0.75
fig.show()
print_stats(pf)

0    2083
1     300
2     266
dtype: int64


-------------------------  --------
Duration                   3 months
Total Return               [31m13%[0m
Benchmark Return           71%
-------------------------  -------
Annualized Return          [31m48%[0m
Sortino                    [31m2.00[0m
Sharpe                     [32m1.45[0m
-------------------------  -------
Number of Trades           34
Average Trade Duration     1 days
Win Rate                   [33m74%[0m
-------------------------  -------
Max Drawdown               [32m-13%[0m
Average Drawdown Duration  2 days
Max Drawdown Duration      22 days
-------------------------  --------


In [None]:
import catboost as cb
import pandas as pd

# Load your data
train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')

# Define your model
model = cb.CatBoostClassifier()

# Fit your model
model.fit(train_data.drop('target', axis=1), train_data['target'])

# Get feature importance scores
feature_importances = model.get_feature_importance()

# Print the feature importance scores
for feature_name, importance_score in zip(train_data.drop('target', axis=1).columns, feature_importances):
    print(f"{feature_name}: {importance_score}")
