## Manually computing AdaBoost

In [102]:
import numpy as np

### AdaBoost is best for classification problem. Here the output is +1 or -1 as part of a decision tree

In [103]:
rng = np.random.RandomState(0)
X = np.arange(0.1,1.01,0.1)  # create a numpy array
y = np.asanyarray([1, 1, 1, 1, -1, -1, -1, 1, 1, 1]) #asanyarray does not copy an array unlike np.array
D = np.ones_like(X)/ X.size #ones_like creates 1s with the same size and shape of X

print(X)
print(y)
print(D)

[ 0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1. ]
[ 1  1  1  1 -1 -1 -1  1  1  1]
[ 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1]


In [104]:
idx = rng.choice(10, size=(X.size,), p=D) #choice generates a random sample from given 1-D array. Here p is probability
idx.sort()
X_ = X[idx]
y_ = y[idx]

print(X_)
print(y_)

[ 0.4  0.5  0.5  0.6  0.6  0.7  0.7  0.8  0.9  1. ]
[ 1 -1 -1 -1 -1 -1 -1  1  1  1]


## $ \alpha = 0.5 \log \frac{1 - \epsilon ^ *} {\epsilon ^ *} $  <nbsp><em>Here epsilon is the weighted error</em></nbsp>

In [105]:
predictor_1 = lambda a: 1 if a >= 0.75 else -1 #lambda is like an anonymous function
output_1 = list(map(predictor_1, X))
is_correct = np.asanyarray(output_1 == y).astype(np.float)

print("y is         ", list(y))
print("prediction is", output_1)
print("Are predicted values and y the same?", is_correct)
print("D values are here                     ", D)

y is          [1, 1, 1, 1, -1, -1, -1, 1, 1, 1]
prediction is [-1, -1, -1, -1, -1, -1, -1, 1, 1, 1]
Are predicted values and y the same? [ 0.  0.  0.  0.  1.  1.  1.  1.  1.  1.]
D values are here                      [ 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1]


## Error

 $$ \epsilon = \frac{\bar y_i * D_i}{\sum D_i}   $$

In [106]:
eps_1 = ((1.0 - is_correct) * D).sum() #here 1 - is_correct = error. This has to be done elementwise
alpha_1 = 0.5 * np.log((1 - eps_1) / eps_1)
print(eps_1)
print(alpha_1)

0.4
0.202732554054


## $ D_{t+1} (x) \rightarrow D_t(x)  \exp (\pm \alpha_t) $

## Correctly classified samples will have low weights while misclassified samples will have higher weights

In [107]:
sample_signs_1 = (0.5 - is_correct) * 2.0
print(sample_signs_1)
D = D * np.exp(sample_signs_1 * alpha_1)
print(D)

[ 1.  1.  1.  1. -1. -1. -1. -1. -1. -1.]
[ 0.12247449  0.12247449  0.12247449  0.12247449  0.08164966  0.08164966
  0.08164966  0.08164966  0.08164966  0.08164966]


In [111]:
D /= D.sum()
print(D)
print(D.sum())

[ 0.125       0.125       0.125       0.125       0.08333333  0.08333333
  0.08333333  0.08333333  0.08333333  0.08333333]
1.0
