In [24]:
import matplotlib
import numpy as np
import random

In [25]:
X = np.array([[-0.5,0.25,-0.8,-1],[-1,-0.1,-0.1,-1],
              [0.5,0,0.25,0.1],[-0.2,-0.3,0.2,0],
              [-0.8,0,-0.8,-1],[-0.15,-0.5,0.05,-0.25],
              [-1,0,-1,-1],[0,-0.25,0.25,0.1]
             ])
# add a feature 1 to the dataset, then we do not need to consider the bias and weight separately
# x_in = np.concatenate([np.ones([np.shape(X)[0], 1]), X], axis=1)
# we normalize the data so that each has regularity
# x_in = preprocessing.normalize(x_in)
x_in = X
y_target = np.array([1,1,-1,-1,1,-1,1,-1])

In [26]:
def gradfn(weights, X, y):
    '''
    Given `weights` - a current "Guess" of what our weights should be
          `X` - matrix of shape (N,d+1) of input features including the feature $1$
          `y` - target y values
    Return gradient of each weight evaluated at the current value
    '''
    return y * (y * np.dot(weights, X) -1) * X

In [27]:
def solve_via_gradient_descent(X, y, niter=50, eta=0.5):
    N, D = np.shape(X)
    # initialize all the weights to zeros
    w = np.zeros([D])
    for k in range(niter):
        # compute the gradient
        dw = np.zeros([D])
        margin = y * (X @ w)
        for i in range(N):
            if margin[i] < 1:
                dw += gradfn(w, X[i], y[i])
        dw /= N
        # gradient descent
        w = w - eta * dw
        print('after %d iteration w equals: %s' % (k+1,w))

In [1]:
solve_via_gradient_descent( X=x_in, y=y_target)

NameError: name 'solve_via_gradient_descent' is not defined

In [29]:
def solve_via_sgd(X, y, niter=100, eta=0.1):
    N, D = np.shape(X)
    # initialize all the weights to zeros
    w = np.zeros([D])
    tset = list(range(N))
    for k in range(niter):
        it = k % 8 + 1
        #sample batch of data
        sample_X = X[it - 1, :]
        sample_y = y[it - 1]
        margin = sample_y * (sample_X @ w)
        dw = np.zeros([D])
        if margin < 1:
            dw = gradfn(w, sample_X, sample_y)
        w = w - eta * dw        
        print('after %d iteration w equals: %s' % (k+1,w))

In [30]:
#solve_via_sgd( X=x_in, y=y_target)

In [31]:
def solve_via_momentum(X, y, niter=30, eta=0.5, alpha=0.5):
    N, D = np.shape(X)
    # initialize all the weights to zeros
    w = np.zeros([D])
    v = np.zeros([D])
    for k in range(niter):
        dw = np.zeros([D])
        margin = y * (X @ w)
        for i in range(N):
            if margin[i] < 1:
                dw += gradfn(w, X[i], y[i])
        dw /= N
        v = alpha * v - eta * dw
        w = w + v 
        print('after %d iteration w equals: %s' % (k+1,w))

In [32]:
#solve_via_momentum( X=x_in, y=y_target)

In [33]:
def solve_via_adagrad(X, y, niter=100, eta=0.1):
    N, D = np.shape(X)
    # initialize all the weights to zeros
    w = np.zeros([D])
    tset = list(range(N))
    gradients_sum = np.zeros([D])
    delta = 1e-6
    for k in range(niter):
        it = k % 8 + 1
        #sample batch of data
        sample_X = X[it - 1, :]
        sample_y = y[it - 1]
        margin = sample_y * (sample_X @ w)
        dw = np.zeros([D])
        if margin < 1:
            dw = gradfn(w, sample_X, sample_y)        
        #compute square of sum of gradients
        gradients_sum += dw ** 2
        w = w - eta * dw / (np.sqrt(gradients_sum) + delta)
        # print(w)
        print('after %d iteration w equals: %s' % (k+1,w))

In [34]:
#solve_via_adagrad( X=x_in, y=y_target)

In [36]:
class Perceptron():
    def __init__(self, max_iter = 10):
        # we initialize an instance
        self.max_iter = max_iter
        self.w = []
        self.no_examples = 0
        self.no_features = 0
    
    def train(self, X, Y):
        '''
        This function applies the perceptron algorithm to train a model w based on X and Y.
        It changes both w and b of the class.
        '''
        # we set the number of examples and the number of features according to the matrix X
        self.no_examples, self.no_features = np.shape(X)  
        # we initialize the weight vector as the zero vector
        self.w = np.zeros(self.no_features)
        
        # we only run a limited number of iterations
        for ii in range(0, self.max_iter):
            # at the begining of each iteration, we set the w_updated to be false (meaning we have not yet found misclassified example)
            w_updated = False
            # we traverse all the training examples
            for jj in range(0, self.no_examples):
                # To do: Insert your code to finish the update of the model by the perceptron algorithm
                # we compute the predicted value and assign it to the variable a
                a = np.dot(self.w, X[jj])
                # if we find a misclassified example
                if Y[jj] * a <= 0:
                    # we set w_updated = true as we have found a misclassified example at this iteration
                    w_updated = True
                    # we now update w and b
                    self.w += Y[jj] * X[jj]
                    print('after %d iteration w equals: %s' % (jj+1,self.w))
            # if we do not find any misclassified example, we can return the model
            if not w_updated:
                print("Convergence reached in %i iterations." % ii)
                break
        # after finishing the iterations we can still find a misclassified example
        if w_updated:
            print(
            """
            WARNING: convergence not reached in %i iterations.
            Either dataset is not linearly separable, 
            or max_iter should be increased
            """ % self.max_iter
                )

In [41]:
p = Perceptron()
p.train(X=x_in, Y=y_target)
#print(x_in @ p.w, y_target)

after 1 iteration w equals: [-0.5   0.25 -0.8  -1.  ]
after 6 iteration w equals: [-0.35  0.75 -0.85 -0.75]
Convergence reached in 1 iterations.
[ 1.7925  1.11   -0.4625 -0.325   1.71   -0.1775  1.95   -0.475 ] [ 1  1 -1 -1  1 -1  1 -1]
