AWE

w4k2 · Mar 11, 2020 · fae8773 · fae8773
2 parents 6898b82 + 94bb478
commit fae8773
Show file tree

Hide file tree

Showing 37 changed files with 310 additions and 163 deletions.
diff --git a/_vapor/drift_plotter.py b/_vapor/drift_plotter.py
@@ -11,11 +11,12 @@
 # License: MIT
 
 
-import numpy as np
-from strlearn.streams import StreamGenerator
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.gridspec import GridSpec
 
+from strlearn.streams import StreamGenerator
+
 mcargs = {
     "n_classes": 3,
     "n_chunks": 100,

diff --git a/_vapor/same_stream.py b/_vapor/same_stream.py
@@ -11,11 +11,11 @@
 # License: MIT
 
 
-import numpy as np
-from strlearn.streams import StreamGenerator
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.gridspec import GridSpec
 
+from strlearn.streams import StreamGenerator
 
 mcargs = {
     "n_classes": 2,

diff --git a/_vapor/stream_animator.py b/_vapor/stream_animator.py
@@ -11,12 +11,13 @@
 # License: MIT
 
 
-import numpy as np
-from strlearn.streams import StreamGenerator
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.gridspec import GridSpec
 from tqdm import tqdm
 
+from strlearn.streams import StreamGenerator
+
 mcargs = {
     "n_classes": 3,
     "n_chunks": 100,

diff --git a/_vapor/test.py b/_vapor/test.py
@@ -6,10 +6,11 @@
 Just example.
 
 """
-import strlearn as sl
-from sklearn.naive_bayes import GaussianNB
-from sklearn.base import clone
 import numpy as np
+from sklearn.base import clone
+from sklearn.naive_bayes import GaussianNB
+
+import strlearn as sl
 
 clf = [GaussianNB(), GaussianNB()]
 

diff --git a/doc/conf.py b/doc/conf.py
@@ -1,13 +1,20 @@
 import os
 import sys
+
+import sphinx_gallery
 import sphinx_rtd_theme
+from github_link import make_linkcode_resolve
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+from strlearn import __version__
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath("sphinxext"))
-from github_link import make_linkcode_resolve
-import sphinx_gallery
 
 # -- General configuration ------------------------------------------------
 
@@ -64,12 +71,6 @@
 copyright = u"2019, P. Ksieniewicz, P. Zyblewski"
 html_logo = "_static/logo.png"
 
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-from strlearn import __version__
 
 version = __version__
 # The full version, including alpha/beta/rc tags.

diff --git a/doc/quickstart.rst b/doc/quickstart.rst
@@ -31,8 +31,8 @@ The next element is the data stream that we aim to process. In the example we wi
 The third requirement of the experiment is to specify the metrics used in the evaluation of the methods. In the example, we will use the *accuracy* metric available in ``scikit-learn`` and the *balanced accuracy* from the ``stream-learn`` module::
 
   from sklearn.metrics import accuracy_score
-  from strlearn.utils.metrics import bac
-  metrics = [accuracy_score, bac]
+  from strlearn.metrics import balanced_accuracy_score
+  metrics = [accuracy_score, balanced_accuracy_score]
 
 The last necessary element of processing is the evaluator, i.e. the method of conducting the experiment. For example, we will choose the *Test-Then-Train* paradigm, described in more detail in `User Guide <evaluators.html>`_. It is important to note, that we need to provide the metrics that we will use in processing at the point of initializing the evaluator. In the case of none metrics given, it will use default pair of *accuracy* and *balanced accuracy* scores::
 

diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py
@@ -1,9 +1,9 @@
-from operator import attrgetter
 import inspect
-import subprocess
 import os
+import subprocess
 import sys
 from functools import partial
+from operator import attrgetter
 
 REVISION_CMD = 'git rev-parse --short HEAD'
 

diff --git a/examples/plot_basic.py b/examples/plot_basic.py
@@ -8,9 +8,11 @@
 """
 
 
-import strlearn as sl
-from sklearn.neural_network import MLPClassifier
+import matplotlib.pyplot as plt
 from sklearn.naive_bayes import GaussianNB
+from sklearn.neural_network import MLPClassifier
+
+import strlearn as sl
 
 # Initialize list of scikit-learn classifiers with partial_fit() function
 clfs = [MLPClassifier(), GaussianNB()]
@@ -41,7 +43,6 @@
 # Plotting
 ##############################################################################
 
-import matplotlib.pyplot as plt
 
 fig, ax = plt.subplots(1, len(metrics), figsize=(12, 4))
 labels = ["MLP", "GNB"]

diff --git a/examples/plot_simplest.py b/examples/plot_simplest.py
@@ -7,22 +7,24 @@
 
 """
 
+import matplotlib.pyplot as plt
+from sklearn.metrics import accuracy_score
 from sklearn.naive_bayes import GaussianNB
 
+from strlearn.evaluators import TestThenTrain
+from strlearn.metrics import recall
+from strlearn.streams import StreamGenerator
+
 clf = GaussianNB()
 
-from strlearn.streams import StreamGenerator
 
 stream = StreamGenerator(n_chunks=30, n_drifts=1)
 
 
-from sklearn.metrics import accuracy_score
-from strlearn.metrics import recall
 
 metrics = [accuracy_score, recall]
 
 
-from strlearn.evaluators import TestThenTrain
 
 evaluator = TestThenTrain(metrics)
 
@@ -39,7 +41,6 @@
 ##############################################################################
 # Olaboga, jakie ważne bardzo.
 
-import matplotlib.pyplot as plt
 
 plt.figure(figsize=(6, 3), dpi=400)
 

diff --git a/examples/plot_stream_plotting.py b/examples/plot_stream_plotting.py
@@ -14,11 +14,12 @@
 """
 
 
-import strlearn as sl
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.colors import LinearSegmentedColormap
 
+import strlearn as sl
+
 cm = LinearSegmentedColormap.from_list(
     "lokomotiv", colors=[(0.3, 0.7, 0.3), (0.7, 0.3, 0.3)]
 )

diff --git a/setup.py b/setup.py
@@ -4,12 +4,11 @@
 
 import codecs
 import os
-
-from setuptools import find_packages, setup
-
 # read the contents of your README file
 from os import path
 
+from setuptools import find_packages, setup
+
 this_directory = path.abspath(path.dirname(__file__))
 with open(path.join(this_directory, "README.md"), encoding="utf-8") as f:
     long_description = f.read()

diff --git a/strlearn/__init__.py b/strlearn/__init__.py
@@ -1,7 +1,11 @@
+from . import classifiers, ensembles, evaluators, metrics, streams
 from ._version import __version__
 
-from . import classifiers
-from . import ensembles
-from . import evaluators
-from . import streams
-from . import metrics
+__all__ = [
+    "classifiers",
+    "ensembles",
+    "evaluators",
+    "metrics",
+    "streams",
+    "__version__",
+]
diff --git a/strlearn/classifiers/AccumulatedSamplesClassifier.py b/strlearn/classifiers/AccumulatedSamplesClassifier.py
@@ -1,9 +1,9 @@
 """Accumulated samples classifier."""
 
-from sklearn.naive_bayes import GaussianNB
-from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.naive_bayes import GaussianNB
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 
 
 class AccumulatedSamplesClassifier(BaseEstimator, ClassifierMixin):
@@ -65,10 +65,12 @@ def partial_fit(self, X, y, classes=None):
             self.classes_, _ = np.unique(y, return_inverse=True)
 
         self._X = (
-            np.concatenate((self._X, X), axis=0) if hasattr(self, "_X") else np.copy(X)
+            np.concatenate((self._X, X), axis=0) if hasattr(
+                self, "_X") else np.copy(X)
         )
         self._y = (
-            np.concatenate((self._y, y), axis=0) if hasattr(self, "_y") else np.copy(y)
+            np.concatenate((self._y, y), axis=0) if hasattr(
+                self, "_y") else np.copy(y)
         )
 
         self._clf = self._base_clf().fit(self._X, self._y)

diff --git a/strlearn/classifiers/SampleWeightedMetaEstimator.py b/strlearn/classifiers/SampleWeightedMetaEstimator.py
@@ -1,13 +1,14 @@
-from sklearn.base import  BaseEstimator, ClassifierMixin, clone
-from sklearn.neural_network import MLPClassifier
-from sklearn.naive_bayes import GaussianNB
 import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.naive_bayes import GaussianNB
+
 
 class SampleWeightedMetaEstimator(BaseEstimator, ClassifierMixin):
     """
     Sample Weighted Meta Estimator.
     """
-    def __init__(self, base_classifier= GaussianNB()):
+
+    def __init__(self, base_classifier=GaussianNB()):
         self.base_classifier = base_classifier
 
     def fit(self, X, y):
@@ -16,7 +17,6 @@ def fit(self, X, y):
 
         self.clf_.fit(X, y)
 
-
     def partial_fit(self, X, y, classes, sample_weight):
         if not hasattr(self, 'clf_'):
             self.clf_ = clone(self.base_classifier)

diff --git a/strlearn/classifiers/__init__.py b/strlearn/classifiers/__init__.py
@@ -1,2 +1,7 @@
 from .AccumulatedSamplesClassifier import AccumulatedSamplesClassifier
 from .SampleWeightedMetaEstimator import SampleWeightedMetaEstimator
+
+__all__ = [
+    "AccumulatedSamplesClassifier",
+    "SampleWeightedMetaEstimator"
+]
diff --git a/strlearn/ensembles/AWE.py b/strlearn/ensembles/AWE.py
@@ -61,11 +61,9 @@ def partial_fit(self, X, y, classes=None):
 
         # Remove the worst when ensemble becomes too large
         if len(self.ensemble_) > self.n_estimators:
-            #print(self.weights_)
             worst_idx = np.argmax(self.weights_)
             del self.ensemble_[worst_idx]
             del self.weights_[worst_idx]
-            #print(self.weights_)
 
         return self
 

diff --git a/strlearn/ensembles/OOB.py b/strlearn/ensembles/OOB.py
@@ -1,9 +1,10 @@
 """Oversamping-based Online Bagging."""
 
-from sklearn.base import BaseEstimator, ClassifierMixin, clone
-from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 import numpy as np
+from sklearn.base import ClassifierMixin, clone
 from sklearn.ensemble import BaseEnsemble
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
+
 
 class OOB(BaseEnsemble, ClassifierMixin):
     """
@@ -75,22 +76,27 @@ def partial_fit(self, X, y, classes=None):
                 label == 1
                 and self.chunk_tdcs[instance][1] < self.chunk_tdcs[instance][0]
             ):
-                lmbda = self.chunk_tdcs[instance][0] / self.chunk_tdcs[instance][1]
+                lmbda = self.chunk_tdcs[instance][0] / \
+                    self.chunk_tdcs[instance][1]
                 K = np.asarray(
-                    [np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
+                    [np.random.poisson(lmbda, 1)[0]
+                     for i in range(self.n_estimators)]
                 )
             elif (
                 label == 0
                 and self.chunk_tdcs[instance][0] < self.chunk_tdcs[instance][1]
             ):
-                lmbda = self.chunk_tdcs[instance][1] / self.chunk_tdcs[instance][0]
+                lmbda = self.chunk_tdcs[instance][1] / \
+                    self.chunk_tdcs[instance][0]
                 K = np.asarray(
-                    [np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
+                    [np.random.poisson(lmbda, 1)[0]
+                     for i in range(self.n_estimators)]
                 )
             else:
                 lmbda = 1
                 K = np.asarray(
-                    [np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
+                    [np.random.poisson(lmbda, 1)[0]
+                     for i in range(self.n_estimators)]
                 )
 
             self.weights.append(K)

diff --git a/strlearn/ensembles/OnlineBagging.py b/strlearn/ensembles/OnlineBagging.py
@@ -1,9 +1,9 @@
 """Online Bagging."""
 
+import numpy as np
 from sklearn.base import ClassifierMixin, clone
 from sklearn.ensemble import BaseEnsemble
-from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
-import numpy as np
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 
 
 class OnlineBagging(BaseEnsemble, ClassifierMixin):

diff --git a/strlearn/ensembles/SEA.py b/strlearn/ensembles/SEA.py
@@ -1,10 +1,10 @@
 """Chunk based ensemble."""
 
+import numpy as np
 from sklearn.base import ClassifierMixin, clone
-from sklearn.metrics import accuracy_score
 from sklearn.ensemble import BaseEnsemble
-from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
-import numpy as np
+from sklearn.metrics import accuracy_score
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 
 
 class SEA(ClassifierMixin, BaseEnsemble):
@@ -82,7 +82,8 @@ def partial_fit(self, X, y, classes=None):
         # Remove the worst when ensemble becomes too large
         if len(self.ensemble_) > self.n_estimators:
             del self.ensemble_[
-                np.argmin([self.metric(y, clf.predict(X)) for clf in self.ensemble_])
+                np.argmin([self.metric(y, clf.predict(X))
+                           for clf in self.ensemble_])
             ]
         return self