In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, balanced_accuracy_score
from catboost import CatBoostClassifier
from sklearn.utils.class_weight import compute_class_weight

from vctr.data.labeling import label_data_trends_binary
from vctr.data.data_loader import get_data_with_features_and_labels

from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score

# List of symbols to make predictions for
SYMBOLS = ['XRP', 'ADA', 'DOT', 'SOL', 'TRX', 'LTC', 'LINK', 'BCH']
TIMEFRAME = '30m'

# Loop through each symbol to train a separate model
for symbol in SYMBOLS:
    print(f'Training model for {symbol}...')

    # Fetch data for the current symbol
    data = get_data_with_features_and_labels(
        symbol=symbol,
        timeframes=[TIMEFRAME],
        separate=False,
        no_cache=True,
    )

    # Label the data
    data = label_data_trends_binary(data, 0.01)  # Adjust this threshold as needed

    # Prepare features and labels
    X, y = data.drop(['label'], axis=1), data['label']

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the data
    X_train, X_test = StandardScaler().fit_transform(X_train), StandardScaler().fit_transform(X_test)

    # Initialize and train the CatBoost classifier
    catboost_clf = CatBoostClassifier(iterations=1000, learning_rate=0.1, verbose=False)
    catboost_clf.fit(X_train, y_train)

    # Make predictions on the test set
    catboost_preds = catboost_clf.predict(X_test)

    # Evaluate the model
    print('CatBoost balanced accuracy: ', balanced_accuracy_score(y_test, catboost_preds))
    print('CatBoost AUC-ROC: ', roc_auc_score(y_test, catboost_preds))
    print('\nCatBoost classification report:\n', classification_report(y_test, catboost_preds))

    # Cross-validation with AUC-ROC
    roc_auc_scorer = make_scorer(roc_auc_score)
    cv_scores = cross_val_score(catboost_clf, X, y, cv=5, scoring=roc_auc_scorer)
    print('CatBoost cross-validated AUC-ROC: ', np.mean(cv_scores))

    # Save the model with the symbol name included
    model_filename = f'trend_clf_{symbol}.model'
    catboost_clf.save_model(model_filename)
    print(f'Model for {symbol} saved as {model_filename}\n')


CatBoost cross-validated AUC-ROC:  0.5268598554989286
Model for LINK saved as trend_clf_LINK.model

Training model for BCH...
CatBoost balanced accuracy:  0.7187158776122404
CatBoost AUC-ROC:  0.7187158776122405

CatBoost classification report:
               precision    recall  f1-score   support

         0.0       0.73      0.70      0.72     10201
         1.0       0.71      0.73      0.72     10127

    accuracy                           0.72     20328
   macro avg       0.72      0.72      0.72     20328
weighted avg       0.72      0.72      0.72     20328

CatBoost cross-validated AUC-ROC:  0.49004414469719776
Model for BCH saved as trend_clf_BCH.model



In [4]:
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostClassifier
from vctr.data.labeling import label_data_trends_binary
from vctr.data.data_loader import get_data_with_features_and_labels
import numpy as np

# Your existing parameters
SYMBOLS = ['TRX', 'LTC']
TIMEFRAME = '30m'
CONFIDENCE_THRESHOLD = 0.7

# Constants for brokerage fees (in percentage)
MAKER_FEE = 0.1  # 0.1%
TAKER_FEE = 0.2  # 0.2%

# Weight for taker fee in the average
TAKER_WEIGHT = 0.6  # 60% weight to the taker fee

# Calculate the average fee with a bias towards taker fee
AVERAGE_FEE = (MAKER_FEE * (1 - TAKER_WEIGHT)) + (TAKER_FEE * TAKER_WEIGHT)

# Number of last recommendations to consider for moving average
N_RECS = 5

# Initialize list to keep track of last N recommendations
last_n_recs = []

# Function to make real-time predictions
def make_real_time_prediction(model, most_recent_data):
    preprocessed_data = StandardScaler().fit_transform(most_recent_data)
    prediction = model.predict(preprocessed_data)
    pred_prob = model.predict_proba(preprocessed_data)
    return prediction, pred_prob


# Function for decision making
def make_decision(prediction, pred_prob, confidence_threshold=0.7):
    # Calculate confidence
    confidence = max(pred_prob[0])

    # Append to last_n_recs and trim if needed
    last_n_recs.append(prediction[0])
    if len(last_n_recs) > N_RECS:
        last_n_recs.pop(0)

    # Calculate moving average of last N recommendations
    ma_last_n_recs = np.mean(last_n_recs)

    # Decision making
    if confidence > confidence_threshold:
        if ma_last_n_recs >= 0.5:  # More buy recommendations in last N recs
            expected_profit = (1 + confidence) - (AVERAGE_FEE / 100)
            if expected_profit > 1:
                return 'High confidence the market will go up. Consider buying.'
        else:
            expected_profit = (1 - confidence) - (AVERAGE_FEE / 100)
            if expected_profit > 1:
                return 'High confidence the market will go down. Consider selling.'

    return 'Model is not confident enough. No action.'

# Loop through each symbol to make real-time predictions
for symbol in SYMBOLS:
    # Load the trained model
    loaded_model = CatBoostClassifier()
    loaded_model.load_model(f'trend_clf_{symbol}.model')

    # Fetch the most recent data
    most_recent_data = get_data_with_features_and_labels(
        symbol=symbol,
        timeframes=[TIMEFRAME],
        separate=False,
        no_cache=True,
    )

    # Assuming the data is sorted by time, take the last row for prediction
    most_recent_row = most_recent_data.iloc[-1:].drop(['label'], axis=1)

    # Make a prediction
    real_time_prediction, pred_prob = make_real_time_prediction(loaded_model, most_recent_row)

    # Make a decision
    decision = make_decision(real_time_prediction, pred_prob, CONFIDENCE_THRESHOLD)

    # Print the decision
    print(f'Symbol: {symbol}')
    print(f'  - {decision}')


Symbol: TRX
  - High confidence the market will go up. Consider buying.
Symbol: LTC
  - High confidence the market will go up. Consider buying.


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostClassifier

from vctr.data.data_loader import get_data_with_features_and_labels

model = CatBoostClassifier().load_model('trend_clf.model')

symbol = 'ACH'
TIMEFRAME = '15m'

data = get_data_with_features_and_labels(
    symbol=symbol,
    timeframe=TIMEFRAME,
    start='2023-01-01',
    separate=False,
    no_cache=True,
)

X, y = data.drop(['label'], axis=1), data['label']
X = StandardScaler().fit_transform(X)

preds = model.predict(X)

data = data[-len(preds) :].copy()
data['pred'] = pd.Series(preds, index=data.index)
data['close'].vbt.overlay_with_heatmap(data['pred']).show()