##### Here I am implementing Adaboost classifier from scratch. Its also an ensemble technique like: Random Forest but it makes use of Boosting technique unless the bagging technique used in Random Forests.

In [1]:
# imports
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

In [2]:
# defining helper functions like: compute error, alpha and updating weights.
def total_error(y,y_pred,wi):
    # calculating the total error which is used to calculate error for weak classifier and used in further steps.
    return (sum(wi*(np.not_equal(y,y_pred)).astype(int)))/sum(wi)

In [4]:
def alpha(error):
    # Calculate the weight of a weak classifier in the majority vote of the final classifier.it is also the importance of the feature.
    return np.log((1-error)/error)

In [5]:
def update_weights(wi,alpha,y,y_pred):
    # Update individual weights w_i after a boosting iteration
    return wi*np.exp(alpha*(np.not_equal(y,y_pred)).astype(int))

##### Defining a class AdaBoost which define structure of the algorithm.

In [6]:
class AdaBoost:
    def __init__(self):
        self.alphas = []
        self.D_T = []
        self.n = None
        self.training_errors = []
        self.prediction_errors = []
    
    # defining the fit method:
    def fit(self,X,y,n=100):
        # here X=independent variable,y=target,n=no. of boosting rounds,defaule=100
        self.alphas=[] 
        self.training_errors=[]
        self.n=n

        for i in range(0,n):
            # for current boosting iteration set weights
            if i==0:
                wi=np.ones(len(y))*1/len(y) #bcoz at n=0 all weights are at same value
            else:
                wi=update_weights(wi,alpha_n,y,y_pred)
            
            # fit the weak classifier & make predictions:
            # using a stump which is of depth 1
            dt=DecisionTreeClassifier(max_depth=1)
            dt.fit(X,y,sample_weight=wi)
            y_pred=dt.predict(X)
            self.D_T.append(dt)

            # calculating error
            error_n=total_error(y,y_pred,wi)
            self.training_errors.append(error_n)

            # calculating alpha
            alpha_n=alpha(error_n)
            self.alphas.append(alpha_n)

    # defining predict method:
    def predict(self,X):
        weak_preds=pd.DataFrame(index=range(len(X)),columns=range(self.n))
        # making a prediction for class label for weak classifier:
        for i in range(self.n):
            y_pred_n=self.D_T[i].predict(X)*self.alphas[i]
            weak_preds.iloc[:,i]=y_pred_n
        y_pred = (1 * np.sign(weak_preds.T.sum())).astype(int)
        return y_pred
    
    # Get the error rates of each weak classifier
    def error_rates(self, X, y):
        self.prediction_errors=[]
        # Predict class label for each weak classifier
        for i in range(self.n):
            y_pred_n = self.D_T[i].predict(X)          
            error_n =total_error(y=y,y_pred=y_pred_n,wi=np.ones(len(y)))
            self.prediction_errors.append(error_n)

##### Testing the class on some dataset and see the performance.Also I am going to compare the performance of my implemented class with inbuilt sklean AdaBoost Classifier.

In [7]:
# again some imports 
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier

In [11]:
# making a synthetic dataset
X,y=make_classification(n_samples=800,n_features=10,random_state=42)

In [13]:
# y has only 0 and 1 but Adaboost uses -1 and 1 so converting 0 and 1 to -1 and 1 by:
y=y*2-1

In [14]:
# splitting dataset into train and test set
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.33,random_state=42)

In [15]:
adb=AdaBoost()
adb.fit(X_train,y_train,n=100)
# making predictions:
y_pred=adb.predict(X_test)
# calculating auc_roc
print('The ROC-AUC score is : ',round(roc_auc_score(y_test,y_pred),3))

The ROC-AUC score is :  0.951


In [16]:
# Comparing with sklearn implementation:
adb_sklearn = AdaBoostClassifier(n_estimators=100)
adb_sklearn.fit(X_train, y_train)
y_pred_sklearn = adb_sklearn.predict(X_test)
print('The ROC-AUC score of the model is:', round(roc_auc_score(y_test, y_pred_sklearn), 3))

The ROC-AUC score of the model is: 0.932


##### The class implemented customly and the sklearn inbuilt adaboost have comparable auc_roc score.Also sklearn use some different method for implementing AdaBoost.