In [11]:
import numpy as np
from scipy.sparse import load_npz
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score


In [12]:
X_train = load_npz("../data_process/processed_fe/X_train_proc_fe.npz")
X_valid = load_npz("../data_process/processed_fe/X_valid_proc_fe.npz")
X_test  = load_npz("../data_process/processed_fe/X_test_proc_fe.npz")

y_train = np.load("../data_process/processed_fe/y_train_fe.npy")
y_valid = np.load("../data_process/processed_fe/y_valid_fe.npy")

print("Train:", X_train.shape)
print("Valid:", X_valid.shape)
print("Type:", type(X_train))


Train: (475195, 67)
Valid: (118799, 67)
Type: <class 'scipy.sparse._csr.csr_matrix'>


In [13]:
model = CatBoostClassifier(
    iterations=2000,
    learning_rate=0.03,
    depth=6,
    loss_function="Logloss",
    eval_metric="AUC",
    random_seed=42,
    verbose=100,
    od_type="Iter",
    od_wait=100,    # early stopping rounds
)


In [14]:
model.fit(
    X_train, y_train,
    eval_set=(X_valid, y_valid),
)


0:	test: 0.8970253	best: 0.8970253 (0)	total: 47.9ms	remaining: 1m 35s
100:	test: 0.9127817	best: 0.9127817 (100)	total: 4.64s	remaining: 1m 27s
200:	test: 0.9145161	best: 0.9145161 (200)	total: 9.13s	remaining: 1m 21s
300:	test: 0.9153601	best: 0.9153601 (300)	total: 13.8s	remaining: 1m 17s
400:	test: 0.9160345	best: 0.9160350 (399)	total: 18.4s	remaining: 1m 13s
500:	test: 0.9168338	best: 0.9168338 (500)	total: 22.9s	remaining: 1m 8s
600:	test: 0.9175847	best: 0.9175853 (599)	total: 27.5s	remaining: 1m 4s
700:	test: 0.9180569	best: 0.9180569 (700)	total: 32.2s	remaining: 59.7s
800:	test: 0.9184673	best: 0.9184690 (799)	total: 37s	remaining: 55.4s
900:	test: 0.9187990	best: 0.9187990 (900)	total: 42.2s	remaining: 51.4s
1000:	test: 0.9191550	best: 0.9191550 (1000)	total: 48.3s	remaining: 48.2s
1100:	test: 0.9194749	best: 0.9194749 (1100)	total: 55.9s	remaining: 45.6s
1200:	test: 0.9197349	best: 0.9197349 (1200)	total: 1m 3s	remaining: 42.2s
1300:	test: 0.9199635	best: 0.9199635 (1300)	

<catboost.core.CatBoostClassifier at 0x1dfb09bbfa0>

In [15]:
y_valid_pred = model.predict_proba(X_valid)[:, 1]
auc_cat = roc_auc_score(y_valid, y_valid_pred)
print("CatBoost AUC:", auc_cat)


CatBoost AUC: 0.9211848619856187
