In [1]:
import pandas as pd
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import ClusterCentroids, RandomUnderSampler, NearMiss

### Oversampling minority class

In [2]:
def oversamplingSMOTE(ft, ft_index, neighbors):
    smote = SMOTE(sampling_strategy = 'minority', k_neighbors = neighbors)
    smote_features, smote_indicator = smote.fit_sample(ft, ft_index)
    smote_features['seizure'] = smote_indicator
    return smote_features

In [3]:
def oversamplingADASYN(ft, ft_index, neighbors):
    adasyn = ADASYN(sampling_strategy='minority', n_neighbors = neighbors)
    adasyn_features, adasyn_indicator = adasyn.fit_sample(ft, ft_index)
    adasyn_features['seizure'] = adasyn_indicator
    return adasyn_features

In [4]:
def minorityOversampling(ft, ft_index, neighbors, method):
    if method.upper() == 'ADASYN':
        return oversamplingADASYN(ft, ft_index, neighbors)
    else:
        return oversamplingSMOTE(ft, ft_index, neighbors)

### Undersampling majority class

In [5]:
def undersamplingClusterCentroids(ft, ft_index, rate):
    cc = ClusterCentroids(sampling_strategy = rate)
    cc_features, cc_indicator = cc.fit_resample(ft, ft_index)
    cc_features['seizure'] = cc_indicator
    return cc_features

In [7]:
pip install --upgrade scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [6]:
def undersamplingNearMiss(ft, ft_index, rate, neighbors):
    nm = NearMiss(sampling_strategy=rate, n_neighbors=neighbors)
    nm_features, nm_indicator = nm.fit_sample(ft, ft_index)
    nm_features['seizure'] = nm_indicator
    return nm_features

In [9]:
def undersamplingRandom (ft, ft_index, rate):
    ru = RandomUnderSampler(sampling_strategy = rate)
    ru_features, ru_indicator = ru.fit_sample(ft, ft_index)
    ru_features['seizure'] = ru_indicator
    return ru_features

In [10]:
def majorityUndersampling (ft, ft_index, rate, neighbors, method):
    if method.upper() == 'RANDOM':
        return undersamplingRandom (ft, ft_index, rate)
    elif method.upper() == 'NEARMISS':
        return undersamplingNearMiss(ft, ft_index, rate, neighbors)
    else:
        return undersamplingClusterCentroids(ft, ft_index, rate)