# Addressing class imbalance 

In [11]:
import imblearn

# note: imblearn is a package that is not part of the standard Python distribution
# using command prompt, install imblearn using the following command:
# conda install -c conda-forge imbalanced-learn


### Create synthetic imbalanced data

In [12]:
from sklearn.datasets import make_classification
from collections import Counter


X, y = make_classification(n_samples=10000, weights=[0.99], flip_y=0)

print(Counter(y))

Counter({0: 9900, 1: 100})


### Undersampling

In [13]:
from imblearn.under_sampling import RandomUnderSampler

undersample = RandomUnderSampler(sampling_strategy='majority')

X_under, y_under = undersample.fit_resample(X, y)

print(Counter(y_under))

Counter({0: 100, 1: 100})


### Undersampling

In [15]:
from imblearn.over_sampling import RandomOverSampler

oversample = RandomOverSampler(sampling_strategy='minority')

X_over, y_over = oversample.fit_resample(X, y)

print(Counter(y_over))

Counter({0: 9900, 1: 9900})
