In [1]:
import warnings
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
X,y = load_iris(return_X_y=True)

estimators_ = []
learning_rate = 1
n_estimators = 5
random_state = 100

def _boost_real(iboost, X, y, sample_weight, random_state):
        """Implement a single boost using the SAMME.R real algorithm."""
        estimator = LogisticRegression(random_state=random_state)
        estimators_.append(estimator)      
        estimator.fit(X, y, sample_weight=sample_weight)
        y_predict_proba = estimator.predict_proba(X)

        classes_ = getattr(estimator, "classes_", None)
        n_classes_ = len(classes_)

        y_predict = classes_.take(np.argmax(y_predict_proba, axis=1), axis=0)
        # Instances incorrectly classified
        incorrect = y_predict != y
        
        # Error fraction
        estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))

        # Stop if classification is perfect
        if estimator_error <= 0: return sample_weight, 1.0, 0.0

        # Construct y coding as described in Zhu et al [2]:
        #
        #    y_k = 1 if c == k else -1 / (K - 1)
        #
        # where K == n_classes_ and c, k in [0, K) are indices along the second
        # axis of the y coding with c being the index corresponding to the true
        # class label.
        n_classes = n_classes_
        classes = classes_
        y_codes = np.array([-1.0 / (n_classes - 1), 1.0])
        y_coding = y_codes.take(classes == y[:, np.newaxis])

        proba = y_predict_proba  # alias for readability
        np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)

        # Boost weight using multi-class AdaBoost SAMME.R alg
        from scipy.special import xlogy
        estimator_weight = (
            -1.0
            * learning_rate
            * ((n_classes - 1.0) / n_classes)
            * xlogy(y_coding, y_predict_proba).sum(axis=1)
        )
        # if iboost==0:
        #     print(y_predict_proba,n_classes,classes,y_codes,y_coding,estimator_weight)

        # Only boost the weights if it will fit again
        if not iboost == n_estimators - 1:
            # Only boost positive weights
            sample_weight *= np.exp(
                estimator_weight * ((sample_weight > 0) | (estimator_weight < 0))
            )

        return sample_weight, 1.0, estimator_error


sample_weight = np.ones(X.shape[0], dtype=np.float64)
sample_weight /= sample_weight.sum()
# Clear any previous fit results
# estimators_ = []
estimator_weights_ = np.zeros(n_estimators, dtype=np.float64)
estimator_errors_ = np.ones(n_estimators, dtype=np.float64)


for iboost in range(n_estimators):
    # Boosting step
    sample_weight, estimator_weight, estimator_error = _boost_real(
        iboost, X, y, sample_weight, random_state
    )

    # Early termination
    if sample_weight is None: break
    estimator_weights_[iboost] = estimator_weight
    estimator_errors_[iboost] = estimator_error

    # Stop if error is zero
    if estimator_error == 0: break
    sample_weight_sum = np.sum(sample_weight)
    if not np.isfinite(sample_weight_sum):
        warnings.warn(
            "Sample weights have reached infinite values,"
            f" at iteration {iboost}, causing overflow. "
            "Iterations stopped. Try lowering the learning rate.",
            stacklevel=2,
        )
        break
    # Stop if the sum of sample weights has become non-positive
    if sample_weight_sum <= 0: break
    if iboost < n_estimators - 1: sample_weight /= sample_weight_sum  #normalize
    # print(sample_weight)


def _samme_proba(estimator, n_classes, X):
    proba = estimator.predict_proba(X)
    np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)
    log_proba = np.log(proba)
    return (n_classes - 1) * (log_proba - (1.0 / n_classes) * log_proba.sum(axis=1)[:, np.newaxis])

pred = sum(
    _samme_proba(estimator, len(getattr(estimator, "classes_", None)), X) for estimator in estimators_
)
pred /= estimator_weights_.sum()
res1 = getattr(estimators_[0], "classes_", None).take(np.argmax(pred, axis=1), axis=0)


from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(LogisticRegression(), n_estimators= 5, algorithm='SAMME.R')
clf.fit(X, y)
res2 = clf.predict(X)

res1 == res2



array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,