In [1]:
import numpy as np

import pandas as pd

from catboost import CatBoostClassifier

from sklearn.datasets import make_classification

from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE

from sklearn.metrics import classification_report

In [9]:
# Create imbalanced dataset

X,y = make_classification(
    n_samples=5000,
    n_features=20,
    n_classes=2,
    weights=[0.9,0.1],
    random_state=42
    )

In [10]:
# Split the Data

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

Default CatBoost (No Balancing)


In [11]:
default_model = CatBoostClassifier(
    iterations=500, 
    depth=6,
    learning_rate=0.1,
    random_state=42,
    verbose=0
    )

In [12]:
default_model.fit(X_train,y_train)

<catboost.core.CatBoostClassifier at 0x166bac5b0>

In [13]:
default_preds = default_model.predict(X_test)

Using `auto_class_weights='Balanced'

In [14]:
balanced_model = CatBoostClassifier(
    iterations=500, 
    depth=6,
    learning_rate=0.1,
    auto_class_weights='Balanced',
    random_state=42,
    verbose=0
    )

In [15]:
balanced_model.fit(X_train,y_train)

<catboost.core.CatBoostClassifier at 0x166bac520>

In [16]:
balanced_preds = balanced_model.predict(X_test)

Manually Assign Class Weights


In [17]:
custom_weights = {0:1,1:5} # Giving more weight to minority class

weighted_model = CatBoostClassifier(
    iterations=500, 
    depth=6,
    learning_rate=0.1,
    class_weights=custom_weights,
    random_state=42,
    verbose=0
    )

In [18]:
weighted_model.fit(X_train,y_train)

<catboost.core.CatBoostClassifier at 0x166bacd60>

In [19]:
weighted_preds = weighted_model.predict(X_test)

Oversampling with SMOTE

In [20]:
# Evaluate models

print("Default Model:\n", classification_report(y_test, default_preds))

print("Balanced Weights:\n", classification_report(y_test, balanced_preds))

print("Custom Class Weights:\n", classification_report(y_test, weighted_preds))


Default Model:
               precision    recall  f1-score   support

           0       0.97      0.98      0.97      1363
           1       0.78      0.66      0.72       137

    accuracy                           0.95      1500
   macro avg       0.87      0.82      0.85      1500
weighted avg       0.95      0.95      0.95      1500

Balanced Weights:
               precision    recall  f1-score   support

           0       0.97      0.97      0.97      1363
           1       0.71      0.73      0.72       137

    accuracy                           0.95      1500
   macro avg       0.84      0.85      0.85      1500
weighted avg       0.95      0.95      0.95      1500

Custom Class Weights:
               precision    recall  f1-score   support

           0       0.97      0.97      0.97      1363
           1       0.71      0.70      0.71       137

    accuracy                           0.95      1500
   macro avg       0.84      0.84      0.84      1500
weighted avg    