In [13]:
import matplotlib
import numpy as np
import random

In [14]:
X = np.array([[-0.5,0.25,-0.8,-1],[-1,-0.1,-0.1,-1],
              [0.5,0,0.25,0.1],[-0.2,-0.3,0.2,0],
              [-0.8,0,-0.8,-1],[-0.15,-0.5,0.05,-0.25],
              [-1,0,-1,-1],[0,-0.25,0.25,0.1]
             ])
# add a feature 1 to the dataset, then we do not need to consider the bias and weight separately
# x_in = np.concatenate([np.ones([np.shape(X)[0], 1]), X], axis=1)
# we normalize the data so that each has regularity
# x_in = preprocessing.normalize(x_in)
x_in = X
y_target = np.array([1,1,-1,-1,1,-1,1,-1])

In [15]:
def gradfn(weights, X, y):
    '''
    Given `weights` - a current "Guess" of what our weights should be
          `X` - matrix of shape (N,d+1) of input features including the feature $1$
          `y` - target y values
    Return gradient of each weight evaluated at the current value
    '''
    return y * (y * np.dot(weights, X) -1) * X

In [16]:
def solve_via_gradient_descent(X, y, niter=50, eta=0.5):
    N, D = np.shape(X)
    # initialize all the weights to zeros
    w = np.zeros([D])
    for k in range(niter):
        # compute the gradient
        dw = np.zeros([D])
        margin = y * (X @ w)
        for i in range(N):
            if margin[i] < 1:
                dw += gradfn(w, X[i], y[i])
        dw /= N
        # gradient descent
        w = w - eta * dw
        print('after %d iteration w equals: %s' % (k+1,w))

In [17]:
#solve_via_gradient_descent( X=x_in, y=y_target)

In [18]:
def solve_via_sgd(X, y, niter=100, eta=0.1):
    N, D = np.shape(X)
    # initialize all the weights to zeros
    w = np.zeros([D])
    tset = list(range(N))
    for k in range(niter):
        it = k % 8 + 1
        #sample batch of data
        sample_X = X[it - 1, :]
        sample_y = y[it - 1]
        margin = sample_y * (sample_X @ w)
        dw = np.zeros([D])
        if margin < 1:
            dw = gradfn(w, sample_X, sample_y)
        w = w - eta * dw        
        print('after %d iteration w equals: %s' % (k+1,w))

In [19]:
#solve_via_sgd( X=x_in, y=y_target)

In [20]:
def solve_via_momentum(X, y, niter=30, eta=0.5, alpha=0.5):
    N, D = np.shape(X)
    # initialize all the weights to zeros
    w = np.zeros([D])
    v = np.zeros([D])
    for k in range(niter):
        dw = np.zeros([D])
        margin = y * (X @ w)
        for i in range(N):
            if margin[i] < 1:
                dw += gradfn(w, X[i], y[i])
        dw /= N
        v = alpha * v - eta * dw
        w = w + v 
        print('after %d iteration w equals: %s' % (k+1,w))

In [21]:
solve_via_momentum( X=x_in, y=y_target)

after 1 iteration w equals: [-0.215625  0.075    -0.215625 -0.246875]
after 2 iteration w equals: [-0.41426709  0.18143604 -0.43223877 -0.47141748]
after 3 iteration w equals: [-0.51730832  0.29761412 -0.57866927 -0.58156556]
after 4 iteration w equals: [-0.56462802  0.41650627 -0.68608919 -0.62819071]
after 5 iteration w equals: [-0.58339058  0.53346497 -0.77144475 -0.64304651]
after 6 iteration w equals: [-0.58800028  0.64597898 -0.84367065 -0.64236936]
after 7 iteration w equals: [-0.58604671  0.75280905 -0.90751762 -0.63443796]
after 8 iteration w equals: [-0.58148515  0.85345506 -0.96554994 -0.62344906]
after 9 iteration w equals: [-0.5763369   0.94783501 -1.01918744 -0.61150604]
after 10 iteration w equals: [-0.57159769  1.03609513 -1.06924887 -0.59963986]
after 11 iteration w equals: [-0.56772267  1.11849939 -1.1162353  -0.58833349]
after 12 iteration w equals: [-0.56488495  1.19536622 -1.16047794 -0.57779143]
after 13 iteration w equals: [-0.56311322  1.26703296 -1.20221537 -0.