In [None]:
import pandas as pd
import numpy as np
import warnings

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

##### CatBoostClassifier

In [None]:
SEED = 56
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.35, random_state=SEED)

In [None]:
params = {
                  # bootstrap bernoulli
                  'loss_function':'Logloss',   
                  'eval_metric':'AUC',                                     
                  'custom_loss': ['AUC', 'Precision', 'F1'],
                  'random_seed': SEED,   
                  'l2_leaf_reg': 2,
                  'learning_rate': 0.03,
                  'max_depth': 10,
                  'iterations': 1500,
                  'boosting_type': 'Ordered', 
                  'one_hot_max_size': 2000
         }

cat_features=['MARKET_CODE']

In [None]:
cb_model = CatBoostClassifier(**params)
cb_model.fit(X_train, y_train, 
             cat_features = cat_features, 
             eval_set=(X_test, y_test),
             verbose = 250,
             plot=True,
             use_best_model=True                
             )

In [None]:
predictions = cb_model.predict(X_test, prediction_type='Probability')[:,1]
roc_auc_score(y_test, predictions)

In [None]:
feature_importance_df = cb_model.get_feature_importance(prettified=True)

In [None]:
cb_model.save_model('my_cb_model.bin',
           format="cbm",
           export_parameters=None,
           pool=None)

##### RandomizedSearchCV

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify=y, random_state = 123)

In [None]:
model = LGBMClassifier()

In [None]:
cv = RepeatedStratifiedKFold(n_splits=8, n_repeats=2, random_state=123)

In [None]:
trees = [50, 150, 300, 500, 800, 1500, 2500, 3500, 5000]

depth = [5,10,25,60]

l_rate = [0.001, 0.1, 1.0]

boosting_type = ['gbdt', 'dart']

params = {'max_depth': depth, 'boosting_type': boosting_type, 'n_estimators': trees, 'learning_rate': l_rate}

In [None]:
rs_cv = RandomizedSearchCV(model, params, n_iter = 65, n_jobs=-1, random_state = 123, cv=cv)
rs_cv.fit(X_train, y_train)

In [None]:
best_estimator = rs_cv.best_estimator_
best_estimator.score(X_test, y_test)