<a href="https://colab.research.google.com/github/wahyuozorahmanurung/adaboost-dan-gradient-adaboost/blob/main/ML_adaboost_dan_gradient_boosting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from pathlib import Path


def accuracy(y, pred):
    return np.sum(y == pred) / float(len(y))


def parse_spambase_data(filename="spambase.train"):
    """
    Given a "spambase.train" return X and Y numpy arrays

    X is of size number of rows x num_features
    Y is an array of size the number of rows
    Y is the last element of each row. (Convert 0 to -1)
    """
    p = Path("spambase.train")
    data = p.read_text().split("\n")
    data.remove("")

    X = np.zeros((len(data), len(data[0].split(",")) - 1))
    Y = np.zeros((len(data),))

    for i in range(len(data)):
        row = data[i].split(",")
        for j in range(len(row)):
            if j != len(row) - 1:
                X[i][j] = np.float64(row[j])
            else:
                Y[i] = -1 if int(row[j]) == 0 else 1

    return X, Y


def adaboost(X, y, num_iter, max_depth=1):
    """
    Given an numpy matrix X, a array y and num_iter return trees and weights

    Input: X, y, num_iter
    Outputs: array of trees from DecisionTreeClassifier
             trees_weights array of floats

    Assumes y is {-1, 1}
    """
    trees = []
    trees_weights = []
    N, _ = X.shape
    d = np.ones(N) / N

    w = d
    for m in range(num_iter):
        h = DecisionTreeClassifier(max_depth=1, random_state=0)
        h.fit(X, y, sample_weight=w)
        y_pred = h.predict(X)

        trees.append(h)

        err = np.sum(w * (y_pred != y)) / np.sum(w)
        alpha = np.log((1 - err) / err)
        w *= np.exp(alpha * (y_pred != y))
        trees_weights.append(alpha)

    return trees, trees_weights


def adaboost_predict(X, trees, trees_weights):
    """
    Given X, trees and weights predict Y
    """
    # X input, y output
    N, _ = X.shape
    y = np.zeros(N)

    preds = []
    for i in range(len(trees)):
        y_pred = trees[i].predict(X)
        preds.append(trees_weights[i] * y_pred)
    y = np.sign(np.sum(preds, axis=0))

    return y


# **ADABOOST**

In [2]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
X, y = parse_spambase_data("spambase.train")
X_test, y_test = parse_spambase_data("spambase.test")

# Create the base estimator
base_estimator = DecisionTreeClassifier(max_depth=1)

# Train the AdaBoost ensemble
num_iter = 10
model = AdaBoostClassifier(estimator=base_estimator, n_estimators=num_iter)  # Use 'estimator' instead of 'base_estimator'
model.fit(X, y)

# Make predictions using the trained ensemble
y_hat = model.predict(X)
y_pred = model.predict(X_test)

# Calculate accuracy for the training set
acc_train = accuracy_score(y, y_hat)

# Calculate accuracy for the test set
acc_test = accuracy_score(y_test, y_pred)

# Print the accuracies
print("Train Accuracy: %.4f" % acc_train)
print("Test Accuracy: %.4f" % acc_test)




Train Accuracy: 0.9211
Test Accuracy: 0.9211


# **GRADIENTBOOSTING**

In [3]:
import gradient_boosting_mse

# Load the dataset
X, y = gradient_boosting_mse.load_dataset("tiny.rent.train")
X_test, y_test = gradient_boosting_mse.load_dataset("tiny.rent.test")

# Check if datasets were loaded correctly
if X is None or y is None:
    raise ValueError("Training dataset could not be loaded.")
if X_test is None or y_test is None:
    raise ValueError("Test dataset could not be loaded.")

# Train the Gradient Boosting ensemble
num_iter = 10  # Number of boosting iterations
max_depth = 1  # Maximum depth of each tree
nu = 0.1      # Learning rate

# Train the model
y_mean, trees = gradient_boosting_mse.gradient_boosting_mse(X, y, num_iter, max_depth, nu)

# Make predictions using the trained ensemble
y_hat = gradient_boosting_mse.gradient_boosting_predict(X, trees, y_mean, nu)
y_hat_test = gradient_boosting_mse.gradient_boosting_predict(X_test, trees, y_mean, nu)

# Calculate R2 Score for the training set
r2_train = gradient_boosting_mse.r2_score(y, y_hat)

# Calculate R2 Score for the test set
r2_test = gradient_boosting_mse.r2_score(y_test, y_hat_test)

# Print the R2 Scores
print("Train R2 Score: %.4f" % r2_train)
print("Test R2 Score: %.4f" % r2_test)


Train R2 Score: 0.6466
Test R2 Score: 0.5297
