In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import clone
from collections import defaultdict

class AdaBoost:
    def __init__(self, base_classifier=DecisionTreeClassifier(max_depth=1), n_estimators=50):
        self.base_classifier = base_classifier
        self.n_estimators = n_estimators
        self.ensemble = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Initialize weights
        w = np.ones(n_samples) / n_samples

        for k in range(self.n_estimators):
            # Clone the base classifier and fit it with the current weights
            clf = clone(self.base_classifier)
            clf.fit(X, y, sample_weight=w)
            
            # Make predictions
            y_pred = clf.predict(X)
            
            # Compute weighted classification error
            incorrect = (y_pred != y)
            epsilon_k = np.sum(w * incorrect)
            
            # Compute scaling factor
            beta_k = epsilon_k / (1 - epsilon_k)
            
            # Add the classifier and its weight to the ensemble
            self.ensemble.append((clf, beta_k))
            
            # Update weights
            w = w * np.where(incorrect, 1, beta_k)
            
            # Normalize weights
            w /= np.sum(w)
            
    def predict(self, X):
        B = len(self.ensemble)
        # Initialize the score for each class
        class_scores = defaultdict(float)

        # Sum log(1/beta_k) for classifiers that predict class y
        for clf, beta_k in self.ensemble:
            y_pred = clf.predict(X)
            log_inv_beta_k = np.log(1 / beta_k)
            for idx, pred in enumerate(y_pred):
                class_scores[pred] += log_inv_beta_k

        # Select the class with the highest score
        y_pred = [max(class_scores, key=class_scores.get) for _ in range(X.shape[0])]
        
        return np.array(y_pred)

# Example usage:
# Generate a sample dataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train AdaBoost classifier
ada = AdaBoost(n_estimators=50)
ada.fit(X_train, y_train)

# Predict
y_pred = ada.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.47
