## SPBoDF

In [1]:
from imbalanced_spdf.ensemble import SPBaDF
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score

Creating Synthetic Data

In [2]:
X, y = make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=10, n_clusters_per_class=1, weights=[0.90], flip_y=0, random_state=1)

In [3]:
print(f"X shape: {X.shape}, with {X.shape[0]} samples and {X.shape[1]} features")

X shape: (100, 20), with 100 samples and 20 features


### A look at the dataset

In [4]:
X

array([[ 0.92957561,  0.39442803,  1.17437625, ...,  0.91549927,
         0.67545381, -0.15050433],
       [ 0.86404969, -1.55694156,  1.08060284, ..., -1.21333813,
        -0.72853749, -2.69836174],
       [ 0.5706656 ,  1.20845633,  0.69211449, ...,  0.35016716,
        -0.10288722, -0.4791571 ],
       ...,
       [ 0.52234942, -0.79954749,  0.62567337, ..., -0.01771832,
         0.1892932 , -1.1077125 ],
       [ 1.03150227, -0.22676019,  1.30770407, ...,  0.86089124,
         0.94980882,  1.63169151],
       [ 0.657069  ,  0.14676526,  0.83916143, ...,  0.45794708,
         0.11236849,  0.57296273]])

In [5]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

### Fitting and Predicting using SPBoDF Model

In [7]:
spbadf = SPBaDF(weight=10, n_trees=10)

In [8]:
spbadf.fit(X_train, y_train)

In [9]:
pred = spbadf.predict(X_test)

In [10]:
f1_score(y_test, pred)

1.0