In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Function to create bootstrap samples
def bootstrap_sample(X, y):
    n_samples = X.shape[0]
    indices = np.random.choice(n_samples, n_samples, replace=True)
    return X[indices], y[indices]

# Function to create a bagging ensemble using DecisionTreeClassifier
def bagging(X, y, n_trees=10, max_depth=None, min_samples_split=2):
    ensemble = []
    for _ in range(n_trees):
        X_sample, y_sample = bootstrap_sample(X, y)
        tree = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split)
        tree.fit(X_sample, y_sample)
        ensemble.append(tree)
    return ensemble

# Function to make predictions using the bagging ensemble
def predict_bagging(ensemble, X):
    # Get predictions from all trees
    tree_predictions = np.array([tree.predict(X) for tree in ensemble])
    # Take the majority vote across all trees
    final_predictions = [np.bincount(tree_predictions[:, i]).argmax() for i in range(X.shape[0])]
    return np.array(final_predictions)

# Example dataset
X, y = make_classification(n_samples=100, n_features=5, n_informative=3, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the bagging ensemble
ensemble = bagging(X_train, y_train, n_trees=10, max_depth=5, min_samples_split=2)

# Make predictions
predictions = predict_bagging(ensemble, X_test)
print("Bagging Predictions:", predictions)

# Calculate accuracy
accuracy = np.mean(predictions == y_test)
print("Bagging Accuracy:", accuracy)


Bagging Predictions: [0 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 1 1 1]
Bagging Accuracy: 0.8


In [2]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Function to implement boosting using DecisionTreeClassifier
def boosting(X, y, n_classifiers=10, max_depth=1):
    n_samples = X.shape[0]
    # Initialize weights equally for all samples
    sample_weights = np.ones(n_samples) / n_samples
    classifiers = []
    classifier_weights = []

    for _ in range(n_classifiers):
        # Train a weak learner (stump) on weighted samples
        tree = DecisionTreeClassifier(max_depth=max_depth)
        tree.fit(X, y, sample_weight=sample_weights)
        predictions = tree.predict(X)

        # Calculate error rate
        incorrect = (predictions != y)
        error = np.dot(sample_weights, incorrect) / np.sum(sample_weights)

        # Calculate classifier weight (alpha)
        alpha = np.log((1 - error) / (error + 1e-10)) / 2

        # Update sample weights
        sample_weights *= np.exp(alpha * incorrect)
        sample_weights /= np.sum(sample_weights)  # Normalize

        # Store classifier and its weight
        classifiers.append(tree)
        classifier_weights.append(alpha)

    return classifiers, classifier_weights

# Function to make predictions using the boosting ensemble
def predict_boosting(classifiers, classifier_weights, X):
    # Get weighted predictions from all classifiers
    classifier_predictions = np.array([alpha * clf.predict(X) for clf, alpha in zip(classifiers, classifier_weights)])
    # Combine the predictions by taking the sign (majority vote)
    final_predictions = np.sign(np.sum(classifier_predictions, axis=0))
    return np.array([1 if pred > 0 else 0 for pred in final_predictions])

# Example dataset
X, y = make_classification(n_samples=100, n_features=5, n_informative=3, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the boosting ensemble
classifiers, classifier_weights = boosting(X_train, y_train, n_classifiers=10, max_depth=1)

# Make predictions
predictions = predict_boosting(classifiers, classifier_weights, X_test)
print("Boosting Predictions:", predictions)

# Calculate accuracy
accuracy = np.mean(predictions == y_test)
print("Boosting Accuracy:", accuracy)


Boosting Predictions: [1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Boosting Accuracy: 0.55
