Stochastic GD

In [None]:
import numpy as np
np.set_printoptions(precision=2)
from sklearn.datasets import make_regression

#x, y = make_regression(n_samples=10, n_features=2, noise=2, random_state=123)
#x = np.array([[0,2,3],[1,6,8]]).T
x = np.array([[0,1],[2,6],[3,8]]) #x1, x2
y = np.array([1,1,4])

x_b = np.c_[np.ones((x.shape[0],1)),x]

def cost_function(theta, x, y, N):
    y_hat = x.dot(theta)
    c = (1/(2*N))*np.sum((y_hat - y)**2)
    return c

def stochastic_gradient_descent(alpha, x, y, ep=0.001, max_iter=10000):
    converged = False
    iter = 0
    N = x.shape[0]  # number of samples
    print("Num of data = ", N)

    # Initial theta
    theta = np.random.random((x.shape[1], 1))
    print("Init theta.shape = ", theta.shape)

    # Total error, J(theta)
    J = cost_function(theta, x, y, N)
    print("First J = ", J)

    # Iterate Loop
    while not converged:
        # Shuffle data
        indices = np.arange(N)
        np.random.shuffle(indices)
        x_shuffled = x[indices]
        y_shuffled = y[indices]

        for i in range(N):
            xi = x_shuffled[i:i+1]  # Get the i-th training example
            yi = y_shuffled[i:i+1]  # Get the corresponding label

            y_hat = xi.dot(theta)
            diff = y_hat - yi
            grad = xi.T.dot(diff)
            theta = theta - alpha * grad

        # Error after one full pass over the data
        J2 = cost_function(theta, x, y, N)

        if abs(J - J2) <= ep:
            print("       Converged, iterations: ", iter, "/", max_iter)
            converged = True

        J = J2  # Update error
        iter += 1  # Update iteration count

        if iter == max_iter:
            print('       Max iterations exceeded!')
            converged = True

    return theta

if __name__ == '__main__':
    print("start main")
    print(x_b.shape)
    y = y.reshape(-1, 1)
    print(y.shape)

    alpha = 0.01  # learning rate
    # Training process
    theta = stochastic_gradient_descent(alpha, x_b, y, ep=0.000000000001, max_iter=1000000)
    print("Theta = ", theta)

    # Predict trained x
    xtest = np.array([[4, 9]])
    xtest_b = np.c_[np.ones((xtest.shape[0], 1)), xtest]
    y_p = xtest_b.dot(theta)
    print("y predict = ", y_p)

start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  1.6112777440949486
       Converged, iterations:  72444 / 1000000
Theta =  [[ 7.]
 [15.]
 [-6.]]
y predict =  [[13.]]


Mini-batch GD (size = 2)

In [None]:
import numpy as np
def cost_function(theta, x, y, N):
  y_hat = x.dot(theta)
  c = (1/(2*N))*np.sum((y_hat-y)**2)
  return c

def mini_batch_gradient_descent(alpha, x, y, batch_size, ep=0.001, max_iter=10000):
  converged = False
  iter = 0
  N = x.shape[0] # number of samples
  print("Num of data = ",N)

  # initial theta
  theta =  np.random.random((x.shape[1],1))
  print("Init theta.shape = ",theta.shape)

  # total error, J(theta)
  J = cost_function(theta, x, y, N)
  print("First J = ",J)

  # Iterate Loop
  while not converged:
    for i in range(0, N, batch_size):
      x_batch = x[i: i + batch_size]
      y_batch = y[i: i + batch_size]
      y_hat = x_batch.dot(theta)
      diff = y_hat - y_batch
      grad = x_batch.T.dot(diff)
      theta = theta - alpha * (1/batch_size) * grad

    # error
    J2 = cost_function(theta, x, y, N)

    if abs(J-J2) <= ep:
        print("       Converged, iterations: ", iter, "/", max_iter)
        converged = True

    J = J2   # update error s
    iter += 1  # update iter

    if iter == max_iter:
        print('       Max iterations exceeded!')
        converged = True

  return theta
if __name__ == '__main__':

  print("start main")
  print(x_b.shape)
  y = y.reshape(-1,1)
  print(y.shape)

  alpha = 0.01 # learning rate
  batch_size = 2
  #Training process
  theta = mini_batch_gradient_descent(alpha, x_b, y, batch_size, ep=0.000000000001, max_iter=1000000)
  print ("Theta = ", theta)

  #predict trainned x
  xtest = np.array([[4,9]])
  xtest_b = np.c_[np.ones((xtest.shape[0],1)),xtest]
  y_p = xtest_b.dot(theta)
  print("y predict = ",y_p)

start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  0.5089249512702181
       Converged, iterations:  169658 / 1000000
Theta =  [[ 7.]
 [15.]
 [-6.]]
y predict =  [[13.]]
