<a href="https://colab.research.google.com/github/unique91/Optimization-Algorithms/blob/main/GradientDescent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [13]:
plt.rcParams["figure.figsize"] = (15, 6)
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['axes.labelsize'] = 14
block_plot = False

In [5]:
def create_data():
  # Random manual seed for consistency
  tf.random.set_seed(42)
  num_data = 30
  # Create some data that is rougly linear
  x = 10 * tf.random.uniform(shape=[num_data])
  y = x + tf.random.normal(stddev=0.3, shape=[num_data])

  return x, y

x, y = create_data()

In [2]:
# Parameter settings
num_iter0 = 50
lr0 = 0.005
# Initial guess for m
m0 = 2
max_los = 30. # For plot scale

In [15]:
def plot_linear_model(x, y, m_best, xlim=(0, 10), ylim=(0, 10)):
  # Generate the line based on the optimal slope.
  xmin = tf.reduce_min(x)
  xmax = tf.reduce_max(x)
  ymin = tf.reduce_min(y)
  ymax = tf.reduce_max(y)

  xplot = np.linspace(xmin, xmax, 2)
  yplot = m_best * xplot

  # Plot the data and the model.
  plt.figure
  plt.xlim(xlim); plt.ylim(ylim)
  plt.plot(xplot, yplot, 'c-')
  plt.scatter(x, y, color='blue', s=20)
  plt.xlabel('x'); plt.ylabel('y')
  xc = .05 * (xmax - xmin)
  yc = .95 * (ymax - ymin)
  plt.text(xc, yc, 'Slope: ' + str(int(m_best*1000)/1000), fontsize=14)
  plt.show(block=block_plot)

In [None]:
num_iter = num_iter0
lr = lr0
m = m0

# Loss for gradient descent
loss_gd = tf.Variable(tf.zeros(shape=[num_iter]))

# Calculate the loss
for i in range(0, num_iter):
  g = -2 * tf.reduce_sum(x * (y - m * x)) / len(x)
  m = m - lr * g
  # Compute the loss for the update value of m
  e = y - m * x
  loss_gd[i].assign(tf.reduce_sum(tf.multiply(e,e))/len(x))

m_best = m.numpy()
print('Minimum loss: ', loss_gd[-1].numpy())
print('Best parameter: ', m_best)

# Plot loss vs m
plt.figure
plt.plot(loss_gd.numpy(), 'c-')
plt.xlim(0, num_iter); plt.ylim(0, max_los)
plt.ylabel('Loss'); plt.xlabel('Iterations')
plt.title('Gradient Descent')
plt.show(block=block_plot)

plot_linear_model(x, y, m_best)

In [None]:
num_iter = num_iter0
lr = lr0
m = m0

# Loss for stochastic gradient descent
loss_sgd = tf.Variable(tf.zeros(num_iter))

for i in range(0, num_iter):
  # Randomly select a training data point
  k = tf.random.uniform([1], minval=0, maxval=len(y), dtype=tf.dtypes.int32)

  # Calculate the gradient using a single data point
  g = -2 * tf.gather(x, k) * (tf.gather(y, k) - m * tf.gather(x, k))

  # Update the parameter m
  m = m - lr * g

  # Compute the loss for the updated value of m
  e = y - m * x
  loss_sgd[i].assign(tf.reduce_sum(tf.multiply(e, e)))

m_best = m.numpy()
print('Minimum loss: ', loss_sgd[-1].numpy())
print('Best parameter: ', m_best)

# Plot loss vs m
plt.figure
plt.plot(loss_sgd.numpy(), 'c-')
plt.xlim(0, num_iter); plt.ylim(0, max_los)
plt.ylabel('Loss'); plt.xlabel('Iterations')
plt.title('Stochastic Gradient Descent')
plt.show(block=block_plot)

plot_linear_model(x, y, m_best)

In [None]:
num_iter = num_iter0
lr = lr0
m = m0
batch_size = 10

# Loss for Gradient Descent with Mini-Batch
loss_sgd_mb = tf.Variable(tf.zeros(num_iter))

for i in range(0, num_iter):
  # Randomly select a batch of data points
  k = tf.random.uniform([batch_size], minval=0, maxval=len(y)-1, dtype=tf.dtypes.int32)

  # Calculate the gradient using a mini-batch
  g = -2 * tf.reduce_sum(tf.gather(x, k) * (tf.gather(y, k) - m * tf.gather(x, k))) / batch_size

  # Update the parameter m
  m = m - lr * g

  # Compute the loss for the update value of m
  e = y - m * x
  loss_sgd_mb[i].assign(tf.reduce_sum(tf.multiply(e, e)) / batch_size)

b_best = m.numpy()

print('Minimum loss: ', loss_sgd_mb[-1].numpy())
print('Best parameter: ', m.numpy())

# Plot loss vs m
plt.figure
plt.plot(loss_sgd_mb.numpy(), 'c-')
plt.xlim(0, num_iter); plt.ylim(0, max_los)
plt.ylabel('Loss'); plt.xlabel('Iterations')
plt.title('Stochastic Gradient Descent with Mini-Batch')
plt.show(block=block_plot)

plot_linear_model(x, y, b_best)

In [None]:
# Compare all three methods together
plt.figure(figsize=(20, 8))

plt.subplot(131); plt.plot(loss_gd.numpy(), 'c-'); plt.xlim(0, num_iter); plt.ylim(0, max_los);
plt.ylabel('loss'); plt.xlabel('iterations'); plt.title('Gradient Descent')

plt.subplot(132); plt.plot(loss_sgd.numpy(), 'c-'); plt.xlim(0, num_iter); plt.ylim(0, max_los);
plt.ylabel('loss'); plt.xlabel('iterations'); plt.title('Stochastic Gradient Descent')

plt.subplot(133); plt.plot(loss_sgd_mb.numpy(), 'c-'); plt.xlim(0, num_iter); plt.ylim(0, max_los);
plt.ylabel('loss'); plt.xlabel('iterations'); plt.title('Stochastic Gradient Descent with Mini-Batch')