Skip to content

Commit

Permalink
AWE
Browse files Browse the repository at this point in the history
  • Loading branch information
xehivs committed Mar 11, 2020
2 parents 6898b82 + 94bb478 commit fae8773
Show file tree
Hide file tree
Showing 37 changed files with 310 additions and 163 deletions.
5 changes: 3 additions & 2 deletions _vapor/drift_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
# License: MIT


import numpy as np
from strlearn.streams import StreamGenerator
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.gridspec import GridSpec

from strlearn.streams import StreamGenerator

mcargs = {
"n_classes": 3,
"n_chunks": 100,
Expand Down
4 changes: 2 additions & 2 deletions _vapor/same_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
# License: MIT


import numpy as np
from strlearn.streams import StreamGenerator
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.gridspec import GridSpec

from strlearn.streams import StreamGenerator

mcargs = {
"n_classes": 2,
Expand Down
5 changes: 3 additions & 2 deletions _vapor/stream_animator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
# License: MIT


import numpy as np
from strlearn.streams import StreamGenerator
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.gridspec import GridSpec
from tqdm import tqdm

from strlearn.streams import StreamGenerator

mcargs = {
"n_classes": 3,
"n_chunks": 100,
Expand Down
7 changes: 4 additions & 3 deletions _vapor/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
Just example.
"""
import strlearn as sl
from sklearn.naive_bayes import GaussianNB
from sklearn.base import clone
import numpy as np
from sklearn.base import clone
from sklearn.naive_bayes import GaussianNB

import strlearn as sl

clf = [GaussianNB(), GaussianNB()]

Expand Down
17 changes: 9 additions & 8 deletions doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import os
import sys

import sphinx_gallery
import sphinx_rtd_theme
from github_link import make_linkcode_resolve
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
from strlearn import __version__

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath("sphinxext"))
from github_link import make_linkcode_resolve
import sphinx_gallery

# -- General configuration ------------------------------------------------

Expand Down Expand Up @@ -64,12 +71,6 @@
copyright = u"2019, P. Ksieniewicz, P. Zyblewski"
html_logo = "_static/logo.png"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
from strlearn import __version__

version = __version__
# The full version, including alpha/beta/rc tags.
Expand Down
4 changes: 2 additions & 2 deletions doc/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ The next element is the data stream that we aim to process. In the example we wi
The third requirement of the experiment is to specify the metrics used in the evaluation of the methods. In the example, we will use the *accuracy* metric available in ``scikit-learn`` and the *balanced accuracy* from the ``stream-learn`` module::

from sklearn.metrics import accuracy_score
from strlearn.utils.metrics import bac
metrics = [accuracy_score, bac]
from strlearn.metrics import balanced_accuracy_score
metrics = [accuracy_score, balanced_accuracy_score]

The last necessary element of processing is the evaluator, i.e. the method of conducting the experiment. For example, we will choose the *Test-Then-Train* paradigm, described in more detail in `User Guide <evaluators.html>`_. It is important to note, that we need to provide the metrics that we will use in processing at the point of initializing the evaluator. In the case of none metrics given, it will use default pair of *accuracy* and *balanced accuracy* scores::

Expand Down
4 changes: 2 additions & 2 deletions doc/sphinxext/github_link.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from operator import attrgetter
import inspect
import subprocess
import os
import subprocess
import sys
from functools import partial
from operator import attrgetter

REVISION_CMD = 'git rev-parse --short HEAD'

Expand Down
7 changes: 4 additions & 3 deletions examples/plot_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
"""


import strlearn as sl
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

import strlearn as sl

# Initialize list of scikit-learn classifiers with partial_fit() function
clfs = [MLPClassifier(), GaussianNB()]
Expand Down Expand Up @@ -41,7 +43,6 @@
# Plotting
##############################################################################

import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, len(metrics), figsize=(12, 4))
labels = ["MLP", "GNB"]
Expand Down
11 changes: 6 additions & 5 deletions examples/plot_simplest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,24 @@
"""

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB

from strlearn.evaluators import TestThenTrain
from strlearn.metrics import recall
from strlearn.streams import StreamGenerator

clf = GaussianNB()

from strlearn.streams import StreamGenerator

stream = StreamGenerator(n_chunks=30, n_drifts=1)


from sklearn.metrics import accuracy_score
from strlearn.metrics import recall

metrics = [accuracy_score, recall]


from strlearn.evaluators import TestThenTrain

evaluator = TestThenTrain(metrics)

Expand All @@ -39,7 +41,6 @@
##############################################################################
# Olaboga, jakie ważne bardzo.

import matplotlib.pyplot as plt

plt.figure(figsize=(6, 3), dpi=400)

Expand Down
3 changes: 2 additions & 1 deletion examples/plot_stream_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
"""


import strlearn as sl
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

import strlearn as sl

cm = LinearSegmentedColormap.from_list(
"lokomotiv", colors=[(0.3, 0.7, 0.3), (0.7, 0.3, 0.3)]
)
Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@

import codecs
import os

from setuptools import find_packages, setup

# read the contents of your README file
from os import path

from setuptools import find_packages, setup

this_directory = path.abspath(path.dirname(__file__))
with open(path.join(this_directory, "README.md"), encoding="utf-8") as f:
long_description = f.read()
Expand Down
14 changes: 9 additions & 5 deletions strlearn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from . import classifiers, ensembles, evaluators, metrics, streams
from ._version import __version__

from . import classifiers
from . import ensembles
from . import evaluators
from . import streams
from . import metrics
__all__ = [
"classifiers",
"ensembles",
"evaluators",
"metrics",
"streams",
"__version__",
]
12 changes: 7 additions & 5 deletions strlearn/classifiers/AccumulatedSamplesClassifier.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Accumulated samples classifier."""

from sklearn.naive_bayes import GaussianNB
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.naive_bayes import GaussianNB
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y


class AccumulatedSamplesClassifier(BaseEstimator, ClassifierMixin):
Expand Down Expand Up @@ -65,10 +65,12 @@ def partial_fit(self, X, y, classes=None):
self.classes_, _ = np.unique(y, return_inverse=True)

self._X = (
np.concatenate((self._X, X), axis=0) if hasattr(self, "_X") else np.copy(X)
np.concatenate((self._X, X), axis=0) if hasattr(
self, "_X") else np.copy(X)
)
self._y = (
np.concatenate((self._y, y), axis=0) if hasattr(self, "_y") else np.copy(y)
np.concatenate((self._y, y), axis=0) if hasattr(
self, "_y") else np.copy(y)
)

self._clf = self._base_clf().fit(self._X, self._y)
Expand Down
10 changes: 5 additions & 5 deletions strlearn/classifiers/SampleWeightedMetaEstimator.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.naive_bayes import GaussianNB


class SampleWeightedMetaEstimator(BaseEstimator, ClassifierMixin):
"""
Sample Weighted Meta Estimator.
"""
def __init__(self, base_classifier= GaussianNB()):

def __init__(self, base_classifier=GaussianNB()):
self.base_classifier = base_classifier

def fit(self, X, y):
Expand All @@ -16,7 +17,6 @@ def fit(self, X, y):

self.clf_.fit(X, y)


def partial_fit(self, X, y, classes, sample_weight):
if not hasattr(self, 'clf_'):
self.clf_ = clone(self.base_classifier)
Expand Down
5 changes: 5 additions & 0 deletions strlearn/classifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
from .AccumulatedSamplesClassifier import AccumulatedSamplesClassifier
from .SampleWeightedMetaEstimator import SampleWeightedMetaEstimator

__all__ = [
"AccumulatedSamplesClassifier",
"SampleWeightedMetaEstimator"
]
2 changes: 0 additions & 2 deletions strlearn/ensembles/AWE.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,9 @@ def partial_fit(self, X, y, classes=None):

# Remove the worst when ensemble becomes too large
if len(self.ensemble_) > self.n_estimators:
#print(self.weights_)
worst_idx = np.argmax(self.weights_)
del self.ensemble_[worst_idx]
del self.weights_[worst_idx]
#print(self.weights_)

return self

Expand Down
20 changes: 13 additions & 7 deletions strlearn/ensembles/OOB.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Oversamping-based Online Bagging."""

from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.base import ClassifierMixin, clone
from sklearn.ensemble import BaseEnsemble
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y


class OOB(BaseEnsemble, ClassifierMixin):
"""
Expand Down Expand Up @@ -75,22 +76,27 @@ def partial_fit(self, X, y, classes=None):
label == 1
and self.chunk_tdcs[instance][1] < self.chunk_tdcs[instance][0]
):
lmbda = self.chunk_tdcs[instance][0] / self.chunk_tdcs[instance][1]
lmbda = self.chunk_tdcs[instance][0] / \
self.chunk_tdcs[instance][1]
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
[np.random.poisson(lmbda, 1)[0]
for i in range(self.n_estimators)]
)
elif (
label == 0
and self.chunk_tdcs[instance][0] < self.chunk_tdcs[instance][1]
):
lmbda = self.chunk_tdcs[instance][1] / self.chunk_tdcs[instance][0]
lmbda = self.chunk_tdcs[instance][1] / \
self.chunk_tdcs[instance][0]
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
[np.random.poisson(lmbda, 1)[0]
for i in range(self.n_estimators)]
)
else:
lmbda = 1
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
[np.random.poisson(lmbda, 1)[0]
for i in range(self.n_estimators)]
)

self.weights.append(K)
Expand Down
4 changes: 2 additions & 2 deletions strlearn/ensembles/OnlineBagging.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Online Bagging."""

import numpy as np
from sklearn.base import ClassifierMixin, clone
from sklearn.ensemble import BaseEnsemble
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y


class OnlineBagging(BaseEnsemble, ClassifierMixin):
Expand Down
9 changes: 5 additions & 4 deletions strlearn/ensembles/SEA.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Chunk based ensemble."""

import numpy as np
from sklearn.base import ClassifierMixin, clone
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaseEnsemble
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y


class SEA(ClassifierMixin, BaseEnsemble):
Expand Down Expand Up @@ -82,7 +82,8 @@ def partial_fit(self, X, y, classes=None):
# Remove the worst when ensemble becomes too large
if len(self.ensemble_) > self.n_estimators:
del self.ensemble_[
np.argmin([self.metric(y, clf.predict(X)) for clf in self.ensemble_])
np.argmin([self.metric(y, clf.predict(X))
for clf in self.ensemble_])
]
return self

Expand Down
Loading

0 comments on commit fae8773

Please sign in to comment.