In [37]:
import numpy as np
np.random.seed(7)
class AdaBoostClassifier:
    def __init__(self,numIt=500):
        self.numIt=numIt
        self.fitted=None
              
    def fit(self,dataArray,classLabels):
        classLabels=2*classLabels-1
        self.estimators_,self.estimator_weights_,self.estimator_errors_=AdaBoostClassifier.adaBoostTrainDS(dataArray,
                                                                                                           classLabels,self.numIt)
        self.fitted=True
        return self
        
    def predict(self,dataArray):
        if self.fitted is None:
            raise Exception('The model has not yet been trained.')
        return (AdaBoostClassifier.adaClassify(dataArray,self.estimators_)+1)/2
     
    def stumpClassify(dataArray,dimen,threshVal,threshIneq):
        retArray = np.ones((dataArray.shape[0],1))
        if threshIneq == 'lt':
            retArray[dataArray[:,dimen] <= threshVal] = -1.0
        else:
            retArray[dataArray[:,dimen] > threshVal] = -1.0
        return retArray
    
    
    def buildStump(dataArray,classLabels,D):
        #D>errorRate->alpha->D
        m,n=dataArray.shape
        numSteps = 10.0;      
        bestStump = {}  
        bestClasEst = np.zeros((m,1)) ##
        minError = np.inf 

        for i in range(n):
            rangeMin = dataArray[:,i].min()
            rangeMax = dataArray[:,i].max();
            stepSize = (rangeMax-rangeMin)/numSteps

            for j in range(-1,int(numSteps)+1):
                for inequal in ['lt', 'gt']: 
                    threshVal = (rangeMin + float(j) * stepSize)
                    predictedVals =AdaBoostClassifier.stumpClassify(dataArray,i,threshVal,inequal)
                    errArr = np.ones((m,1))
                    errArr[predictedVals == classLabels.reshape(-1,1)] = 0
                    weightedError = D.T@errArr           
                    if weightedError < minError:
                        minError = weightedError
                        bestClasEst = predictedVals.copy()
                        bestStump['dim'] = i
                        bestStump['thresh'] = threshVal
                        bestStump['ineq'] = inequal
        return bestStump,minError,bestClasEst
        
    def adaBoostTrainDS(dataArr,classLabels,numIt=500):
        weakClassArr = []              #week classifier list
        m,_= dataArr.shape
        D = np.ones((m,1))/m         #initalize the predicted  value
        aggClassEst = np.zeros((m,1))   #initalize the pedicted value
        alphas=[]
        estimator_errors_=[]
        for i in range(numIt):
            bestStump,error,classEst = AdaBoostClassifier.buildStump(dataArr,classLabels,D) #get the best split dim and value,predict list
            alpha = float(0.5*np.log((1.0-error)/max(error,1e-16)))  #calculate alpha
            alphas.append(alpha)
            bestStump['alpha'] = alpha  
            weakClassArr.append(bestStump)                
            expon = np.multiply(-alpha*classLabels.reshape(-1,1),classEst)                              
            D = np.multiply(D,np.exp(expon))/D.sum()                #update weight
            aggClassEst += alpha*classEst           #update the prrdicted value of the loop
            aggErrors = np.multiply(np.sign(aggClassEst) != classLabels.reshape(-1,1),np.ones((m,1))) #calculate the error rate
            errorRate = aggErrors.sum()/m           #calculate the  error rate
            estimator_errors_.append(errorRate)
            if errorRate == 0.0: 
                break
                print('The error rate has been reduced to 0, break the loop')
        return weakClassArr,np.array(alphas),np.array(estimator_errors_)
    
    def adaClassify(datToClass,classifierArr):
        m,_= datToClass.shape
        aggClassEst = np.zeros((m,1))
        for i in range(len(classifierArr)):
            classEst = AdaBoostClassifier.stumpClassify(datToClass,classifierArr[i]['dim'],\
                                     classifierArr[i]['thresh'],\
                                     classifierArr[i]['ineq'])
            aggClassEst += classifierArr[i]['alpha']*classEst
        return np.sign(aggClassEst).flatten()  

In [38]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X,y=load_breast_cancer().data,load_breast_cancer().target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
model=AdaBoostClassifier(numIt=500).fit(X_train,y_train)
model.predict(X_test)

array([1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1.,
       1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1., 0.,
       1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1.,
       0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 1., 0.,
       1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.,
       0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.,
       1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1.,
       0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1.,
       0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0.,
       0.])

In [39]:
accuracy_score(model.predict(X_test),y_test)

0.9766081871345029