In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from skmultiflow.data import ConceptDriftStream, \
    AGRAWALGenerator, \
    HyperplaneGenerator, \
    SEAGenerator

In [2]:
def scale_and_save(x, y, name):  # normalize and save the data being provided
    data = pd.DataFrame(MinMaxScaler().fit_transform(x.copy()))  # normalize data
    data['class'] = y.astype(int)
    
    data.to_csv('data/synthetic/'+name+'.csv', index=False, header=False)

## SEA Generator (Sudden Drift)
250.000 instances, 3 features, Drift @ 50.000,100.000,200.000 observations

In [3]:
# Drift @ 200.000
alternate2 = ConceptDriftStream(
    stream=SEAGenerator(balance_classes=False, classification_function=2, random_state=112, noise_percentage=0.1),
    drift_stream=SEAGenerator(balance_classes=False, classification_function=3, random_state=112, noise_percentage=0.1),
    position=100000,
    width=1,
    random_state=0)

# Drift @ 100.000
alternate1 = ConceptDriftStream(
    stream=SEAGenerator(balance_classes=False, classification_function=1, random_state=112, noise_percentage=0.1),
    drift_stream=alternate2,
    position=50000,
    width=1,
    random_state=0)

# Drift @ 50.000
stream = ConceptDriftStream(
    stream=SEAGenerator(balance_classes=False, classification_function=0, random_state=112, noise_percentage=0.1),
    drift_stream=alternate1,
    position=50000,
    width=1,
    random_state=0)

stream.prepare_for_use()
x, y = stream.next_sample(250000)

# Normalize and save data
scale_and_save(x, y, 'sea')

New instances of the Stream class are now ready to use after instantiation.
  probability_drift = 1.0 / (1.0 + np.exp(x))


## Agrawal Generator (Gradual Drift)
250.000 instances, 9 features, Drift @ 50.000-100.000, 150.000-200.000

In [4]:
# Drift @ 150.000-200.000
alternate1 = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False, classification_function=1, random_state=112, perturbation=0.1),
    drift_stream=AGRAWALGenerator(balance_classes=False, classification_function=2, random_state=112, perturbation=0.1),
    position=50000,
    width=50000,
    random_state=0)

# Drift @ 50.000-100.000
stream = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False, classification_function=0, random_state=112, perturbation=0.1),
    drift_stream=alternate1,
    position=50000,
    width=50000,
    random_state=0)

stream.prepare_for_use()
x, y = stream.next_sample(250000)

# Normalize and save data
scale_and_save(x, y, 'agrawal')

New instances of the Stream class are now ready to use after instantiation.


## Hyperplane Generator (Incremental Drift)
250.000 instances, 50 features

In [5]:
stream = HyperplaneGenerator(random_state=112,
                             n_features=50,
                             n_drift_features=25,
                             mag_change=0.5, 
                             noise_percentage=0.1,
                             sigma_percentage=0.1)

stream.prepare_for_use()

x, y = stream.next_sample(250000)

# Normalize and save data
scale_and_save(x, y, 'hyperplane')

New instances of the Stream class are now ready to use after instantiation.
