## SPBoDF

In [1]:
from imbalanced_spdf.ensemble import SPBoDF
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

Creating Synthetic Data

In [10]:
X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10, n_clusters_per_class=1, weights=[0.90], flip_y=0, random_state=1)

In [11]:
print(f"X shape: {X.shape}, with {X.shape[0]} samples and {X.shape[1]} features")

X shape: (1000, 20), with 1000 samples and 20 features


### A look at the dataset

In [12]:
X

array([[-0.20245394,  0.75116708, -1.5118818 , ..., -1.56978108,
         0.6370882 , -0.46505859],
       [-0.2933976 , -0.4618495 , -1.03503817, ..., -1.36749482,
         0.77655426, -0.30663438],
       [-0.23353927,  1.59413738, -0.24302373, ..., -1.25174607,
         0.64451726,  0.74205658],
       ...,
       [ 0.40607453,  0.38551581, -0.68063573, ..., -0.4895647 ,
        -0.68961613,  2.60755891],
       [-0.19983642,  0.40406811,  0.1310922 , ..., -1.28099316,
         0.5854406 , -0.76433963],
       [-0.01938808, -2.10390849, -0.73355857, ...,  0.05138838,
         0.0283964 ,  0.40036396]])

In [13]:
y

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

### Fitting and Predicting using SPBoDF Model

In [15]:
spbodf = SPBoDF(weight=10, n_trees=10)

In [16]:
spbodf.fit(X_train, y_train)

In [17]:
pred = spbodf.predict(X_test)

In [18]:
f1_score(y_test, pred)

0.8387096774193549