In [1]:
import numpy as np

import pandas as pd

from catboost import CatBoostClassifier,Pool

from sklearn.datasets import make_classification

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report

In [2]:
# Create dataset

X,y = make_classification(n_samples=5000, n_features=20, n_classes=2, random_state=42)

In [3]:
# Split the Data

X_train, X_test,y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=42)

In [4]:
# Convert to CatBoost Pool (for optimized performance)

train_pool = Pool(X_train,y_train)

test_pool = Pool(X_test,y_test)

In [5]:
# 1️⃣ Default CatBoost Model (No Overfitting Control)

default_model = CatBoostClassifier(iterations=1000,depth=6,learning_rate=0.1,verbose=200)

In [6]:
# Train with default model

default_model.fit(train_pool,eval_set=test_pool, use_best_model=False)

0:	learn: 0.5991344	test: 0.6014615	best: 0.6014615 (0)	total: 58.9ms	remaining: 58.9s
200:	learn: 0.0796664	test: 0.2079411	best: 0.2032139 (57)	total: 428ms	remaining: 1.7s
400:	learn: 0.0309344	test: 0.2185883	best: 0.2032139 (57)	total: 864ms	remaining: 1.29s
600:	learn: 0.0158874	test: 0.2344541	best: 0.2032139 (57)	total: 1.26s	remaining: 839ms
800:	learn: 0.0107245	test: 0.2448657	best: 0.2032139 (57)	total: 1.63s	remaining: 405ms
999:	learn: 0.0083733	test: 0.2526384	best: 0.2032139 (57)	total: 1.97s	remaining: 0us

bestTest = 0.2032139026
bestIteration = 57



<catboost.core.CatBoostClassifier at 0x14ef0b520>

In [7]:
# Predict with default model

default_preds = default_model.predict(X_test)

In [8]:
# 2️⃣ CatBoost with Early Stopping & Overfitting Detector

early_stopping_model = CatBoostClassifier(

    iterations=1000,
    depth=6,
    learning_rate=0.1,
    od_type='Iter',  # Stops training if no improvement
    od_wait=50,  # Waits 50 iterations before stopping
    verbose=200

)

In [9]:
# Train with Early stopping model

early_stopping_model.fit(train_pool, eval_set=test_pool, early_stopping_rounds=50)

0:	learn: 0.5991344	test: 0.6014615	best: 0.6014615 (0)	total: 1.91ms	remaining: 1.91s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.2032139026
bestIteration = 57

Shrink model to first 58 iterations.


<catboost.core.CatBoostClassifier at 0x14f1fc0a0>

In [10]:
# Predict with Early stopping model

early_stopping_preds = early_stopping_model.predict(X_test)

In [11]:
# 3️⃣ Regularized Model (L2 Regularization & Subsampling)

regularized_model = CatBoostClassifier(

    iterations=1000,
    depth=6,
    learning_rate=0.1,
    l2_leaf_reg=10, #L2 Regularization
    subsample=0.8,  # Use 80% of data per iteration
    verbose=200

)

In [12]:
# Train with Regularized model

regularized_model.fit(train_pool, eval_set=test_pool, early_stopping_rounds=50)

0:	learn: 0.6003794	test: 0.6023674	best: 0.6023674 (0)	total: 2.97ms	remaining: 2.97s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.2033098382
bestIteration = 76

Shrink model to first 77 iterations.


<catboost.core.CatBoostClassifier at 0x14f1fc460>

In [13]:
# Predict with Regularized model

regularized_preds = regularized_model.predict(X_test)

In [14]:
# Evaluate models
print("Default Model:\n", classification_report(y_test, default_preds))
print("Early Stopping Model:\n", classification_report(y_test, early_stopping_preds))
print("Regularized Model:\n", classification_report(y_test, regularized_preds))

Default Model:
               precision    recall  f1-score   support

           0       0.91      0.93      0.92       503
           1       0.93      0.91      0.92       497

    accuracy                           0.92      1000
   macro avg       0.92      0.92      0.92      1000
weighted avg       0.92      0.92      0.92      1000

Early Stopping Model:
               precision    recall  f1-score   support

           0       0.91      0.95      0.93       503
           1       0.94      0.90      0.92       497

    accuracy                           0.92      1000
   macro avg       0.92      0.92      0.92      1000
weighted avg       0.92      0.92      0.92      1000

Regularized Model:
               precision    recall  f1-score   support

           0       0.91      0.94      0.92       503
           1       0.93      0.91      0.92       497

    accuracy                           0.92      1000
   macro avg       0.92      0.92      0.92      1000
weighted avg   