Skip to content

Commit

Permalink
More useful ensemble API.
Browse files Browse the repository at this point in the history
  • Loading branch information
xehivs committed Dec 5, 2019
1 parent 964cebb commit bc97336
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 69 deletions.
64 changes: 41 additions & 23 deletions strlearn/ensembles/OOB.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,20 @@
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaseEnsemble


class OOB(BaseEstimator, ClassifierMixin):
class OOB(BaseEnsemble, ClassifierMixin):
"""
"""

def __init__(self, ensemble_size=5, time_decay_factor=0.9):
def __init__(self, base_estimator=None, n_estimators=5, time_decay_factor=0.9):
"""Initialization."""
self.ensemble_size = ensemble_size
self.base_estimator = base_estimator
self.n_estimators = n_estimators
self.time_decay_factor = time_decay_factor

def set_base_clf(self, base_clf=GaussianNB):
"""Establishing base classifier."""
self._base_clf = base_clf
self.ensemble_ = []
for size in range(self.ensemble_size):
self.ensemble_.append(self._base_clf())

def fit(self, X, y):
"""Fitting."""
self.partial_fit(X, y)
Expand All @@ -31,8 +25,10 @@ def fit(self, X, y):
def partial_fit(self, X, y, classes=None):
"""Partial fitting."""
X, y = check_X_y(X, y)
if not hasattr(self, "_base_clf"):
self.set_base_clf()
if not hasattr(self, "ensemble_"):
self.ensemble_ = [
clone(self.base_estimator) for i in range(self.n_estimators)
]

# Check feature consistency
if hasattr(self, "X_"):
Expand All @@ -55,11 +51,19 @@ def partial_fit(self, X, y, classes=None):

for iteration, label in enumerate(self.y_):
if label == 0:
self.current_tdcs_[0, 0] = (self.current_tdcs_[0, 0]*self.time_decay_factor) + (1-self.time_decay_factor)
self.current_tdcs_[0, 1] = self.current_tdcs_[0, 1] * self.time_decay_factor
self.current_tdcs_[0, 0] = (
self.current_tdcs_[0, 0] * self.time_decay_factor
) + (1 - self.time_decay_factor)
self.current_tdcs_[0, 1] = (
self.current_tdcs_[0, 1] * self.time_decay_factor
)
else:
self.current_tdcs_[0, 1] = (self.current_tdcs_[0, 1]*self.time_decay_factor) + (1-self.time_decay_factor)
self.current_tdcs_[0, 0] = self.current_tdcs_[0, 0] * self.time_decay_factor
self.current_tdcs_[0, 1] = (
self.current_tdcs_[0, 1] * self.time_decay_factor
) + (1 - self.time_decay_factor)
self.current_tdcs_[0, 0] = (
self.current_tdcs_[0, 0] * self.time_decay_factor
)

self.chunk_tdcs[iteration] = self.current_tdcs_

Expand All @@ -68,22 +72,36 @@ def partial_fit(self, X, y, classes=None):
# improved OOB
self.weights = []
for instance, label in enumerate(self.y_):
if label == 1 and self.chunk_tdcs[instance][1] < self.chunk_tdcs[instance][0]:
if (
label == 1
and self.chunk_tdcs[instance][1] < self.chunk_tdcs[instance][0]
):
lmbda = self.chunk_tdcs[instance][0] / self.chunk_tdcs[instance][1]
K = np.asarray([np.random.poisson(lmbda, 1)[0] for i in range(self.ensemble_size)])
elif label == 0 and self.chunk_tdcs[instance][0] < self.chunk_tdcs[instance][1]:
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
)
elif (
label == 0
and self.chunk_tdcs[instance][0] < self.chunk_tdcs[instance][1]
):
lmbda = self.chunk_tdcs[instance][1] / self.chunk_tdcs[instance][0]
K = np.asarray([np.random.poisson(lmbda, 1)[0] for i in range(self.ensemble_size)])
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
)
else:
lmbda = 1
K = np.asarray([np.random.poisson(lmbda, 1)[0] for i in range(self.ensemble_size)])
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
)

self.weights.append(K)

self.weights = np.asarray(self.weights).T

for w, base_model in enumerate(self.ensemble_):
base_model.partial_fit(self.X_, self.y_, self.classes_, sample_weight=self.weights[w])
base_model.partial_fit(
self.X_, self.y_, self.classes_, sample_weight=self.weights[w]
)

return self

Expand Down
35 changes: 17 additions & 18 deletions strlearn/ensembles/OnlineBagging.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,20 @@
"""Online Bagging."""

from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.base import ClassifierMixin, clone
from sklearn.ensemble import BaseEnsemble
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier


class OnlineBagging(BaseEstimator, ClassifierMixin):
class OnlineBagging(BaseEnsemble, ClassifierMixin):
"""
"""

def __init__(self, ensemble_size=5):
def __init__(self, base_estimator=None, n_estimators=10):
"""Initialization."""
self.ensemble_size = ensemble_size

def set_base_clf(self, base_clf=GaussianNB):
"""Establishing base classifier."""
self._base_clf = base_clf
self.ensemble_ = []
for size in range(self.ensemble_size):
self.ensemble_.append(self._base_clf())
self.base_estimator = base_estimator
self.n_estimators = n_estimators

def fit(self, X, y):
"""Fitting."""
Expand All @@ -31,8 +24,11 @@ def fit(self, X, y):
def partial_fit(self, X, y, classes=None):
"""Partial fitting."""
X, y = check_X_y(X, y)
if not hasattr(self, "_base_clf"):
self.set_base_clf()

if not hasattr(self, "ensemble_"):
self.ensemble_ = [
clone(self.base_estimator) for i in range(self.n_estimators)
]

# Check feature consistency
if hasattr(self, "X_"):
Expand All @@ -47,14 +43,17 @@ def partial_fit(self, X, y, classes=None):

self.weights = []
for instance in range(self.X_.shape[0]):
K = np.asarray([np.random.poisson(1, 1)[0] for i in range(self.ensemble_size)])
K = np.asarray(
[np.random.poisson(1, 1)[0] for i in range(self.n_estimators)]
)
self.weights.append(K)

self.weights = np.asarray(self.weights).T


for w, base_model in enumerate(self.ensemble_):
base_model.partial_fit(self.X_, self.y_, self.classes_, sample_weight=self.weights[w])
base_model.partial_fit(
self.X_, self.y_, self.classes_, sample_weight=self.weights[w]
)

return self

Expand Down
68 changes: 43 additions & 25 deletions strlearn/ensembles/UOB.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,22 @@
"""Undersampling-based Online Bagging."""

from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.base import ClassifierMixin, clone
from sklearn.ensemble import BaseEnsemble
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.naive_bayes import GaussianNB


class UOB(BaseEstimator, ClassifierMixin):
class UOB(BaseEnsemble, ClassifierMixin):
"""
"""

def __init__(self, ensemble_size=5, time_decay_factor=0.9):
def __init__(self, base_estimator=None, n_estimators=5, time_decay_factor=0.9):
"""Initialization."""
self.ensemble_size = ensemble_size
self.base_estimator = base_estimator
self.n_estimators = n_estimators
self.time_decay_factor = time_decay_factor

def set_base_clf(self, base_clf=GaussianNB):
"""Establishing base classifier."""
self._base_clf = base_clf
self.ensemble_ = []
for size in range(self.ensemble_size):
self.ensemble_.append(self._base_clf())

def fit(self, X, y):
"""Fitting."""
self.partial_fit(X, y)
Expand All @@ -31,8 +25,10 @@ def fit(self, X, y):
def partial_fit(self, X, y, classes=None):
"""Partial fitting."""
X, y = check_X_y(X, y)
if not hasattr(self, "_base_clf"):
self.set_base_clf()
if not hasattr(self, "ensemble_"):
self.ensemble_ = [
clone(self.base_estimator) for i in range(self.n_estimators)
]

# Check feature consistency
if hasattr(self, "X_"):
Expand All @@ -55,11 +51,19 @@ def partial_fit(self, X, y, classes=None):

for iteration, label in enumerate(self.y_):
if label == 0:
self.current_tdcs_[0, 0] = (self.current_tdcs_[0, 0] * self.time_decay_factor) + (1 - self.time_decay_factor)
self.current_tdcs_[0, 1] = self.current_tdcs_[0, 1] * self.time_decay_factor
self.current_tdcs_[0, 0] = (
self.current_tdcs_[0, 0] * self.time_decay_factor
) + (1 - self.time_decay_factor)
self.current_tdcs_[0, 1] = (
self.current_tdcs_[0, 1] * self.time_decay_factor
)
else:
self.current_tdcs_[0, 1] = (self.current_tdcs_[0, 1] * self.time_decay_factor) + (1 - self.time_decay_factor)
self.current_tdcs_[0, 0] = self.current_tdcs_[0, 0] * self.time_decay_factor
self.current_tdcs_[0, 1] = (
self.current_tdcs_[0, 1] * self.time_decay_factor
) + (1 - self.time_decay_factor)
self.current_tdcs_[0, 0] = (
self.current_tdcs_[0, 0] * self.time_decay_factor
)

self.chunk_tdcs[iteration] = self.current_tdcs_

Expand All @@ -68,21 +72,35 @@ def partial_fit(self, X, y, classes=None):
# improved UOB
self.weights = []
for instance, label in enumerate(self.y_):
if label == 1 and self.chunk_tdcs[instance][1] > self.chunk_tdcs[instance][0]:
if (
label == 1
and self.chunk_tdcs[instance][1] > self.chunk_tdcs[instance][0]
):
lmbda = self.chunk_tdcs[instance][0] / self.chunk_tdcs[instance][1]
K = np.asarray([np.random.poisson(lmbda, 1)[0] for i in range(self.ensemble_size)])
elif label == 0 and self.chunk_tdcs[instance][0] > self.chunk_tdcs[instance][1]:
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
)
elif (
label == 0
and self.chunk_tdcs[instance][0] > self.chunk_tdcs[instance][1]
):
lmbda = self.chunk_tdcs[instance][1] / self.chunk_tdcs[instance][0]
K = np.asarray([np.random.poisson(lmbda, 1)[0] for i in range(self.ensemble_size)])
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
)
else:
lmbda =1
K = np.asarray([np.random.poisson(lmbda, 1)[0] for i in range(self.ensemble_size)])
lmbda = 1
K = np.asarray(
[np.random.poisson(lmbda, 1)[0] for i in range(self.n_estimators)]
)
self.weights.append(K)

self.weights = np.asarray(self.weights).T

for w, base_model in enumerate(self.ensemble_):
base_model.partial_fit(self.X_, self.y_, self.classes_, sample_weight=self.weights[w])
base_model.partial_fit(
self.X_, self.y_, self.classes_, sample_weight=self.weights[w]
)

return self

Expand Down
6 changes: 3 additions & 3 deletions strlearn/tests/test_ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,23 @@ def test_WAE():
def test_OOB():
"""Bare WAE."""
stream = get_stream()
clf = sl.ensembles.OOB()
clf = sl.ensembles.OOB(GaussianNB())
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)


def test_OB():
"""Bare WAE."""
stream = get_stream()
clf = sl.ensembles.OnlineBagging()
clf = sl.ensembles.OnlineBagging(GaussianNB())
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)


def test_UOB():
"""Bare WAE."""
stream = get_stream()
clf = sl.ensembles.UOB()
clf = sl.ensembles.UOB(GaussianNB())
evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)

Expand Down

0 comments on commit bc97336

Please sign in to comment.