# SPBaDF


## Importing the necessary libraries

In [2]:
from imbalanced_spbdf.ensemble import SPBaDF
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score

## Creating a synthetic dataset

In [3]:
X, y = make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=10, n_clusters_per_class=1, weights=[0.90], flip_y=0, random_state=1)

## Take a look at the dataset

In [4]:
print(f"X shape: {X.shape}, with {X.shape[0]} samples and {X.shape[1]} features")

X shape: (100, 20), with 100 samples and 20 features


In [5]:
print(f"y shape: {y.shape}, with {y.shape[0]} samples, the majority class has {len(y[y==0])} samples and the minority class has {len(y[y==1])} samples")

y shape: (100,), with 100 samples, the majority class has 91 samples and the minority class has 9 samples


## Splitting the dataset into training and testing

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

## Fit a SPBaDF model

In [7]:
spbadf = SPBaDF(weight=10, n_trees=100)

In [8]:
spbadf.fit(X_train, y_train)

In [9]:
pred = spbadf.predict(X_test)

In [10]:
f1_score(y_test, pred)

1.0

## Fit a catboost model

In [11]:
from catboost import CatBoostClassifier

In [12]:
cat = CatBoostClassifier()

In [13]:
cat.fit(X_train, y_train, verbose=False)

<catboost.core.CatBoostClassifier at 0x1c0799a0750>

In [14]:
pred_cat = cat.predict(X_test)

In [15]:
f1_score(y_test, pred_cat)

0.9090909090909091