Skip to content

Commit

Permalink
One generator to rule them all.
Browse files Browse the repository at this point in the history
  • Loading branch information
xehivs committed Oct 4, 2019
1 parent 5f28155 commit daa5008
Show file tree
Hide file tree
Showing 16 changed files with 161 additions and 159 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
*.lprof
*.png
doc/modules
doc/auto_examples
doc/generated
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,17 @@ stream-learn is available on the PyPi and you may install it with pip:
pip install stream-learn
```

## Streams

### Stationary stream
![plots/0_stationary.png]

### Sudden drift
![plots/2_sudden_drift.png]

### Stationary stream
![plots/1_incremental_drift.png]

## Example usage

```python
Expand Down
107 changes: 107 additions & 0 deletions examples/drift_plotter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
"""
==========================
Incremental drift
==========================
This example shows a basic stream processing using WAE algorithm.
"""

# Authors: Paweł Ksieniewicz <pawel.ksieniewicz@pwr.edu.pl>
# License: MIT

from strlearn.streams import StreamGenerator
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np


streams = {
"1_incremental_drift": StreamGenerator(
n_chunks=100,
chunk_size=100,
random_state=105,
n_features=2,
n_classes=2,
n_drifts=2,
n_informative=2,
n_redundant=0,
n_repeated=0,
sigmoid_spacing=5,
n_clusters_per_class=1,
),
"0_stationary": StreamGenerator(
n_chunks=100,
chunk_size=100,
random_state=105,
n_features=2,
n_classes=2,
n_drifts=0,
n_informative=2,
n_redundant=0,
n_repeated=0,
sigmoid_spacing=5,
n_clusters_per_class=1,
),
"2_sudden_drift": StreamGenerator(
n_chunks=100,
chunk_size=100,
random_state=105,
n_features=2,
n_classes=2,
n_drifts=2,
n_informative=2,
n_redundant=0,
n_repeated=0,
sigmoid_spacing=999,
n_clusters_per_class=1,
),
}

for stream_name in streams:
print(stream_name)
stream = streams[stream_name]

checkpoints = np.linspace(0, stream.n_chunks - 1, 8).astype(int)

fig = plt.figure(constrained_layout=True, figsize=(8, 4))

gs = GridSpec(3, len(checkpoints), figure=fig)

# Scatter plots
a, b = [], []
for i in range(100):
X, y = stream.get_chunk()

start, end = (stream.chunk_size * i, stream.chunk_size * i + stream.chunk_size)
cs = stream.concept_selector[start:end]
a.append(np.sum(cs == 0))
b.append(np.sum(cs == 1))

if i in checkpoints:
index = np.where(checkpoints == i)[0][0]
ax = fig.add_subplot(gs[2, index])
ax.scatter(X[:, 0], X[:, 1], c=y, s=10, alpha=0.5, cmap="bwr")
ax.set_xlim(-5, 5)
ax.set_ylim(-5, 5)
ax.set_xticks([])
ax.set_yticks([])

# Concept presence
ax = fig.add_subplot(gs[1, :])
ax.set_title("Concept presence")
ax.plot(a, c="black", ls=":")
ax.plot(b, c="black", ls="--")
ax.set_ylim(-10, stream.chunk_size + 10)
ax.set_xticks(checkpoints)

# Periodical sigmoid
ax = fig.add_subplot(gs[0, :])
ax.set_title(
"Periodical sigmoid (ss=%.1f, n_drifts=%i)"
% (stream.sigmoid_spacing, stream.n_drifts)
)
ax.plot(stream.period_sigmoid, lw=1, c="black")
ax.set_ylim(-0.05, 1.05)

plt.savefig("plots/%s.png" % stream_name)
72 changes: 0 additions & 72 deletions examples/gradual_drift.py

This file was deleted.

Binary file added plots/0_stationary.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/1_incremental_drift.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/2_sudden_drift.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions strlearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from . import classifiers
from . import ensembles
from . import evaluators
from . import generators
from . import streams
from . import utils

"""
__all__ = ["classifiers", "ensembles", "eval", "generators", "utils", "__version__"]
__all__ = ["classifiers", "ensembles", "eval", "streams", "utils", "__version__"]
"""
52 changes: 0 additions & 52 deletions strlearn/generators/StationaryStream.py

This file was deleted.

4 changes: 0 additions & 4 deletions strlearn/generators/__init__.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from scipy.stats import logistic


class DriftedStream:
class StreamGenerator:
def __init__(
self,
n_chunks=250,
Expand Down Expand Up @@ -43,6 +43,9 @@ def get_chunk(self):
if hasattr(self, "X"):
self.previous_chunk = self.current_chunk
else:
# To pomocniczo
n_samples = self.n_chunks * self.chunk_size

X_a, y_a = make_classification(
n_samples=self.n_chunks * self.chunk_size,
random_state=self.random_state,
Expand All @@ -68,24 +71,33 @@ def get_chunk(self):
big_X = np.array([X_a, X_b])
big_y = np.array([y_a, y_b])

# To pomocniczo
n_samples = self.n_chunks * self.chunk_size

# Okres
period = int((n_samples) / (self.n_drifts))
period = (
int((n_samples) / (self.n_drifts))
if self.n_drifts > 0
else int(n_samples)
)

# Sigmoid
self.period_sigmoid = logistic.cdf(
np.concatenate(
[
np.linspace(
-self.sigmoid_spacing if i % 2 else self.sigmoid_spacing,
self.sigmoid_spacing if i % 2 else -self.sigmoid_spacing,
period,
)
for i in range(self.n_drifts)
]
self.period_sigmoid = (
logistic.cdf(
np.concatenate(
[
np.linspace(
-self.sigmoid_spacing
if i % 2
else self.sigmoid_spacing,
self.sigmoid_spacing
if i % 2
else -self.sigmoid_spacing,
period,
)
for i in range(self.n_drifts)
]
)
)
if self.n_drifts > 0
else np.ones(n_samples)
)
# Szum
self.noise = np.random.rand(n_samples)
Expand Down
3 changes: 3 additions & 0 deletions strlearn/streams/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .StreamGenerator import StreamGenerator

__all__ = ["StreamGenerator"]
7 changes: 4 additions & 3 deletions strlearn/tests/test_classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@


def test_ACS_TestThanTrain():
stream = sl.generators.DriftedStream(sigmoid_spacing=999)
stream = sl.streams.StreamGenerator()
clf = sl.classifiers.AccumulatedSamplesClassifier()
evaluator = sl.evaluators.TestThenTrainEvaluator()
evaluator.process(clf, stream)


def test_ACS_Prequential():
stream = sl.generators.DriftedStream(sigmoid_spacing=999)
stream = sl.streams.StreamGenerator()
clf = sl.classifiers.AccumulatedSamplesClassifier()
evaluator = sl.evaluators.PrequentialEvaluator()
evaluator.process(clf, stream)
evaluator.process(clf, stream)
4 changes: 2 additions & 2 deletions strlearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def test_mlp_drifted():
stream = sl.generators.DriftedStream(sigmoid_spacing=999)
clf = MLPClassifier(hidden_layer_sizes=(100,))
stream = sl.streams.StreamGenerator()
clf = MLPClassifier()
evaluator = sl.evaluators.TestThenTrainEvaluator()
evaluator.process(clf, stream)
4 changes: 2 additions & 2 deletions strlearn/tests/test_ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def test_ACS():
stream = sl.generators.DriftedStream(sigmoid_spacing=999)
stream = sl.streams.StreamGenerator()
clf = sl.ensembles.ChunkBasedEnsemble()
evaluator = sl.evaluators.TestThenTrainEvaluator()
evaluator.process(clf, stream)
evaluator.process(clf, stream)
Loading

0 comments on commit daa5008

Please sign in to comment.