In [199]:
import numpy as np

class DecisionStump:

    def __init__(self):
        self.polarity = 1
        self.threshold = None
        self.alpha = None
        self.feature = None

    def predict(self, X):

        n_sample, n_feature = X.shape
        X_column = X[:,self.feature]

        predictions = np.ones(n_sample)

        if self.polarity == 1:
            predictions[X_column < self.threshold] = -1
        else:
            predictions[X_column > self.threshold] = -1

        return predictions

class AdaboostClassifier:

    def __init__(self, n_estimators = 5, learning_rate = 1.0):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate


    def fit(self, X_train, y_train):

        n_sample, n_feature = X_train.shape

        # Initialize weights
        weights = [1 / n_sample for i in range(n_sample)]

        iteration = 0

        # Fit weak learners
        self._estimators = []
        self._estimator_errors = []

        for _ in range(self.n_estimators):

            stump = DecisionStump()
            min_error = float('inf')
            for feature in range(n_feature):
                X_column = X_train[:, feature]
                thresholds = np.unique(X_column)

                for threshold in thresholds:
                    polarity = 1
                    predictions = np.ones(n_sample)
                    predictions[X_column < threshold] = -1

                    # Calculate the error
                    error = np.dot(np.array(weights),np.array([predictions != y_train]).T)

                    if error > 0.5:
                        polarity = -1
                        error = 1 - error

                    if error < min_error:
                        min_error = error
                        stump.polarity = polarity
                        stump.threshold = threshold
                        stump.feature = feature

            self._estimator_errors.append(min_error)

            # Amount of say
            EPS = 1e-10
            stump.alpha = np.log((1-min_error - EPS)/(min_error+EPS))
            self._estimators.append(stump)
            predictions = stump.predict(X_train)

            # update weight
            weights *= np.exp(-0.5*y_train*stump.alpha*predictions)
            weights /= sum(weights)

    def predict(self, X_test):
        
        # combine all stumps with their amount of say
        estimator_preds = [self.learning_rate * estimator.alpha * estimator.predict(X_test) for estimator in self._estimators]
        base_learner = np.sum(estimator_preds,axis=0)
        pred = np.sign(base_learner)

        return pred


In [200]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()
data.target[data.target==0]=-1
data.target[data.target==2]=1
X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.33,random_state=101)
ad = AdaboostClassifier(n_estimators=2, learning_rate=1)
ad.fit(X_train,y_train)
y_pred = ad.predict(X_test)

In [203]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test,y_pred))
print('\n')
print(classification_report(y_test,y_pred))

[[15  0]
 [ 1 34]]


              precision    recall  f1-score   support

          -1       0.94      1.00      0.97        15
           1       1.00      0.97      0.99        35

    accuracy                           0.98        50
   macro avg       0.97      0.99      0.98        50
weighted avg       0.98      0.98      0.98        50



In [204]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()
data.target[data.target==0]=-1
data.target[data.target==2]=1
X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.33,random_state=101)
adb = AdaBoostClassifier(n_estimators=5,learning_rate=0.01)
adb.fit(X_train,y_train)
y_pred = adb.predict(X_test)
adb.estimators_

[DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                        max_depth=1, max_features=None, max_leaf_nodes=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, presort='deprecated',
                        random_state=924238476, splitter='best')]

In [205]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test,y_pred))
print('\n')
print(classification_report(y_test,y_pred))

[[15  0]
 [ 0 35]]


              precision    recall  f1-score   support

          -1       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        35

    accuracy                           1.00        50
   macro avg       1.00      1.00      1.00        50
weighted avg       1.00      1.00      1.00        50

