Skip to content

Commit

Permalink
Changing CBE to SEA.
Browse files Browse the repository at this point in the history
  • Loading branch information
xehivs committed Dec 24, 2019
1 parent 1dbf6e4 commit 60fdee2
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 29 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
metex.py
example.png
example.py
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
2 changes: 1 addition & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ API
:toctree: generated/
:template: class.rst

ensembles.ChunkBasedEnsemble
ensembles.SEA
ensembles.OnlineBagging
ensembles.OOB
ensembles.UOB
Expand Down
6 changes: 3 additions & 3 deletions doc/ensembles.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ equal to the size of the chunk.
Chunk-Based Ensemble
--------------------

The ``ChunkBasedEnsemble`` class implements a basic multi classifier approach for data stream classification. This model takes the base classifier as the ``base_estimator`` parameter and the pool size as the ``n_estimators``. A single base classifier is trained on each observed data chunk and added to the ensemble. If the fixed pool size is exceeded, the oldest model is removed. The final decision is obtained by accumulating the supports of base classifiers.
The ``SEA`` class implements a basic multi classifier approach for data stream classification. This model takes the base classifier as the ``base_estimator`` parameter and the pool size as the ``n_estimators``. A single base classifier is trained on each observed data chunk and added to the ensemble. If the fixed pool size is exceeded, the oldest model is removed. The final decision is obtained by accumulating the supports of base classifiers.

**Example**

.. code-block:: python
from strlearn.evaluators import TestThenTrain
from strlearn.streams import StreamGenerator
from strlearn.ensembles import ChunkBasedEnsemble
from strlearn.ensembles import SEA
from sklearn.naive_bayes import GaussianNB
stream = StreamGenerator()
clf = ChunkBasedEnsemble(base_estimator=GaussianNB(), n_estimators=5)
clf = SEA(base_estimator=GaussianNB(), n_estimators=5)
evaluator = TestThenTrain()
evaluator.process(stream, clf)
Expand Down
20 changes: 10 additions & 10 deletions doc/evaluators.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ three-dimensional array of shape (n_classifiers, n_chunks, n_metrics).
.. code-block:: python
from strlearn.evaluators import TestThenTrain
from strlearn.ensembles import ChunkBasedEnsemble
from strlearn.ensembles import SEA
from strlearn.utils.metrics import bac, f_score
from strlearn.streams import StreamGenerator
from sklearn.naive_bayes import GaussianNB
stream = StreamGenerator(chunk_size=200, n_chunks=250)
clf = ChunkBasedEnsemble(base_estimator=GaussianNB())
clf = SEA(base_estimator=GaussianNB())
evaluator = TestThenTrain(metrics=(bac, f_score))
evaluator.process(stream, clf)
Expand All @@ -55,15 +55,15 @@ three-dimensional array of shape (n_classifiers, n_chunks, n_metrics).
.. code-block:: python
from strlearn.evaluators import TestThenTrain
from strlearn.ensembles import ChunkBasedEnsemble
from strlearn.ensembles import SEA
from strlearn.utils.metrics import bac, f_score
from strlearn.streams import StreamGenerator
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
stream = StreamGenerator(chunk_size=200, n_chunks=250)
clf1 = ChunkBasedEnsemble(base_estimator=GaussianNB())
clf2 = ChunkBasedEnsemble(base_estimator=DecisionTreeClassifier())
clf1 = SEA(base_estimator=GaussianNB())
clf2 = SEA(base_estimator=DecisionTreeClassifier())
clfs = (clf1, clf2)
evaluator = TestThenTrain(metrics=(bac, f_score))
Expand Down Expand Up @@ -96,13 +96,13 @@ the instance of ``StreamGenerator`` class.
.. code-block:: python
from strlearn.evaluators import Prequential
from strlearn.ensembles import ChunkBasedEnsemble
from strlearn.ensembles import SEA
from strlearn.utils.metrics import bac, f_score
from strlearn.streams import StreamGenerator
from sklearn.naive_bayes import GaussianNB
stream = StreamGenerator()
clf = ChunkBasedEnsemble(base_estimator=GaussianNB())
clf = SEA(base_estimator=GaussianNB())
evaluator = TestThenTrain(metrics=(bac, f_score))
evaluator.process(stream, clf, interval=100)
Expand All @@ -114,15 +114,15 @@ the instance of ``StreamGenerator`` class.
.. code-block:: python
from strlearn.evaluators import Prequential
from strlearn.ensembles import ChunkBasedEnsemble
from strlearn.ensembles import SEA
from strlearn.utils.metrics import bac, f_score
from strlearn.streams import StreamGenerator
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
stream = StreamGenerator(chunk_size=200, n_chunks=250)
clf1 = ChunkBasedEnsemble(base_estimator=GaussianNB())
clf2 = ChunkBasedEnsemble(base_estimator=DecisionTreeClassifier())
clf1 = SEA(base_estimator=GaussianNB())
clf2 = SEA(base_estimator=DecisionTreeClassifier())
clfs = (clf1, clf2)
evaluator = Prequential(metrics=(bac, f_score))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Chunk based ensemble."""

from sklearn.base import ClassifierMixin, clone
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaseEnsemble
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np


class ChunkBasedEnsemble(ClassifierMixin, BaseEnsemble):
class SEA(ClassifierMixin, BaseEnsemble):
"""
Chunk based ensemble classifier.
Expand All @@ -15,7 +16,7 @@ class ChunkBasedEnsemble(ClassifierMixin, BaseEnsemble):
Parameters
----------
n_estimators : integer, optional (default=5)
The maximum number of estimators trained using consecutive data chunks
and maintained in the ensemble.
Expand All @@ -31,7 +32,7 @@ class ChunkBasedEnsemble(ClassifierMixin, BaseEnsemble):
--------
>>> import strlearn as sl
>>> stream = sl.streams.StreamGenerator()
>>> clf = sl.ensembles.ChunkBasedEnsemble()
>>> clf = sl.ensembles.SEA()
>>> evaluator = sl.evaluators.TestThenTrainEvaluator()
>>> evaluator.process(clf, stream)
>>> print(evaluator.scores_)
Expand All @@ -45,15 +46,15 @@ class ChunkBasedEnsemble(ClassifierMixin, BaseEnsemble):
[0.935 0.93569212 0.93540766 0.93569212 0.93467337]]
"""

def __init__(self, base_estimator=None, n_estimators=10):
def __init__(self, base_estimator=None, n_estimators=10, metric=accuracy_score):
"""Initialization."""
self.base_estimator = base_estimator
self.n_estimators = n_estimators
self.metric = metric

def fit(self, X, y):
"""Fitting."""
self.partial_fit(X, y)

return self

def partial_fit(self, X, y, classes=None):
Expand All @@ -73,11 +74,14 @@ def partial_fit(self, X, y, classes=None):
if self.classes_ is None:
self.classes_, _ = np.unique(y, return_inverse=True)

# Append new estimator
self.ensemble_.append(clone(self.base_estimator).fit(self.X_, self.y_))

# Remove the worst when ensemble becomes too large
if len(self.ensemble_) > self.n_estimators:
del self.ensemble_[0]

del self.ensemble_[
np.argmin([self.metric(y, clf.predict(X)) for clf in self.ensemble_])
]
return self

def ensemble_support_matrix(self, X):
Expand Down
2 changes: 1 addition & 1 deletion strlearn/ensembles/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .ChunkBasedEnsemble import ChunkBasedEnsemble
from .SEA import SEA
from .WAE import WAE
from .OnlineBagging import OnlineBagging
from .OOB import OOB
Expand Down
25 changes: 18 additions & 7 deletions strlearn/tests/test_ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@

sys.path.insert(0, "../..")


def get_stream():
return sl.streams.StreamGenerator(n_chunks=10, n_features=10)


def get_different_stream():
return sl.streams.StreamGenerator(n_chunks=10, n_features=4)


def test_ensembles_fit():
clf1 = sl.ensembles.ChunkBasedEnsemble(GaussianNB())
clf1 = sl.ensembles.SEA(GaussianNB())
clf2 = sl.ensembles.WAE(GaussianNB())
clf3 = sl.ensembles.OOB(GaussianNB())
clf4 = sl.ensembles.OnlineBagging(GaussianNB())
Expand All @@ -35,7 +38,7 @@ def test_ensembles_fit():
def test_features():
"Bare CBE"
clfs = [
sl.ensembles.ChunkBasedEnsemble(GaussianNB()),
sl.ensembles.SEA(GaussianNB()),
sl.ensembles.OOB(GaussianNB()),
sl.ensembles.UOB(GaussianNB()),
sl.ensembles.WAE(GaussianNB()),
Expand All @@ -52,10 +55,11 @@ def test_features():
with pytest.raises(ValueError):
clf.partial_fit(X_b, y_b)


def test_pred():
"""Pred error"""
clfs = [
sl.ensembles.ChunkBasedEnsemble(GaussianNB()),
sl.ensembles.SEA(GaussianNB()),
sl.ensembles.OOB(GaussianNB()),
sl.ensembles.UOB(GaussianNB()),
sl.ensembles.WAE(GaussianNB()),
Expand All @@ -72,13 +76,15 @@ def test_pred():
with pytest.raises(ValueError):
clf.predict(X_b)

def test_CBE():
"Bare CBE"

def test_SEA():
"Bare SEA"
stream = get_stream()
clf = sl.ensembles.ChunkBasedEnsemble(GaussianNB(), n_estimators=5)
clf = sl.ensembles.SEA(GaussianNB(), n_estimators=5)
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)


def test_WAE():
"""Bare WAE."""
stream = get_stream()
Expand All @@ -94,20 +100,23 @@ def test_OOB():
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)


def test_OB():
"""Bare WAE."""
stream = get_stream()
clf = sl.ensembles.OnlineBagging(GaussianNB())
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)


def test_UOB():
"""Bare WAE."""
stream = get_stream()
clf = sl.ensembles.UOB(GaussianNB())
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)


def test_pp_WAE():
"""Post pruned WAE."""
stream = get_stream()
Expand All @@ -119,7 +128,9 @@ def test_pp_WAE():
def test_WAE_wcm1():
"""Various weight computation methods of WAE."""
stream = get_stream()
clf = sl.ensembles.WAE(GaussianNB(), weight_calculation_method="same_for_each", n_estimators=5)
clf = sl.ensembles.WAE(
GaussianNB(), weight_calculation_method="same_for_each", n_estimators=5
)
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)

Expand Down

0 comments on commit 60fdee2

Please sign in to comment.