# Week 4
# day1: 29 Aug 2022

4. Learn about Gradient Descent and its below variants:

    Momentum

    Nesterov

    Adagrad

    RMSProp

    Adam
5. Implement all the above in Numpy
6. How does the "Exponential weighted average" lecture given in the readings   relate to some of the variants of Gradient Descent?

In [1]:
import numpy as np

In [2]:
x = np.array([1.0, 3.0, 7.0, 2.0, 5.0, 4.0])
y = np.array([4.0, 9.0, 20.0, 7.0, 15.0, 11.0])


def linear_regression(x, m, b):
    yhat = x * m + b
    return yhat

In [3]:
# Gradient descent
class GradientDescent:
    def __init__(self):
        self.learning_rate = 0.001
        self.w = 1
        self.b = 0

    def update_weight(self, x, y):
        yhat = self.w * x + self.b
        self.w = self.w + self.learning_rate * (-2 * sum(x * (yhat - y)).mean())
        self.b = self.b + self.learning_rate * (-2 * sum(yhat - y).mean())

    def MSE(self, y, yhat):
        return np.square(np.subtract(y, yhat)).mean()

    def fit(self, x, y, epochs=200):
        history = []
        for e in range(epochs):
            self.update_weight(x, y)
            loss = self.MSE(y, (self.w * x + self.b))
            if e % 10 == 0:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
            history.append(loss)
            if loss <= 1:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
                return history
        return history

In [4]:
gd = GradientDescent()
gd.fit(x, y)
print(f"yhat:{linear_regression(x,gd.w,gd.b)}")

Epoch: 0, Loss: 39.748544)
weight:1.4,bias:0.088
Epoch: 9, Loss: 0.8453337832672307)
weight:2.67852137518646,bias:0.38249332229004845
yhat:[ 3.0610147   8.41805745 19.13214295  5.73953607 13.7751002  11.09657882]


In [5]:
# Momentum
class MomentumGradientDescent:
    def __init__(self):
        self.learning_rate = 0.01
        self.momentum = 0.9
        self.w = 0.1
        self.b = 0
        self.update_w = 0
        self.update_b = 0

    def update_weight(self, x, y):
        yhat = self.w * x + self.b
        self.update_w = self.momentum * self.update_w + self.learning_rate * (
            -2 * sum(x * (y - yhat)).mean()
        )
        self.update_b = self.momentum * self.update_b + self.learning_rate * (
            -2 * sum(y - yhat).mean()
        )

    def MSE(self, y, yhat):
        return np.square(np.subtract(y, yhat)).mean()

    def fit(self, x, y, epochs=200):
        history = []
        for e in range(epochs):
            self.update_weight(x, y)
            self.w -= self.update_w
            self.b -= self.update_b
            loss = self.MSE(y, (self.w * x + self.b))
            if e % 10 == 0:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
            history.append(loss)
            if loss <= 1:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
                return history
        return history

In [6]:
mgd = MomentumGradientDescent()
mgd.fit(x, y)
print(f"yhat:{linear_regression(x,mgd.w,mgd.b)}")

Epoch: 0, Loss: 191.13175999999984)
weight:5.971999999999999,bias:1.276
Epoch: 10, Loss: 41.27352476256328)
weight:1.193298498062794,bias:0.90349420802679
Epoch: 20, Loss: 15.495612372224036)
weight:1.7100754931247293,bias:1.294901243640644
Epoch: 25, Loss: 0.4361180426951019)
weight:2.5191200581112025,bias:1.372701473355113
yhat:[ 3.89182153  8.93006165 19.00654188  6.41094159 13.96830176 11.44918171]


In [7]:
# Nestrov
class NestrovAcceleratedGradientDescent:
    def __init__(self):
        self.learning_rate = 0.01
        self.momentum = 0.9
        self.w = 0.1
        self.b = 0
        self.update_w = 0
        self.update_b = 0

    def update_weight(self, x, y):
        yhat = self.w * x + self.b
        self.w_look_ahead = (
            -2 * sum(x * (y - yhat)).mean()
        ) - self.momentum * self.update_w
        self.b_look_ahead = (-2 * sum(y - yhat).mean()) - self.momentum * self.update_b
        yhat_look_ahead = self.w_look_ahead * x + self.b_look_ahead
        self.update_w = self.momentum * self.update_w + self.learning_rate * (
            -2 * sum(x * (y - yhat_look_ahead)).mean()
        )
        self.update_b = self.momentum * self.update_b + self.learning_rate * (
            -2 * sum(y - yhat_look_ahead).mean()
        )

    def MSE(self, y, yhat):
        return np.square(np.subtract(y, yhat)).mean()

    def fit(self, x, y, epochs=200):
        history = []
        for e in range(epochs):
            self.update_weight(x, y)
            self.w -= self.update_w
            self.b -= self.update_b
            loss = self.MSE(y, (self.w * x + self.b))
            if e % 10 == 0:
                print(f"Epoch: {e}, Loss: {loss})")
            history.append(loss)
            if loss <= 1:
                print(f"Epoch: {e}, Loss: {loss})")
                return history
        return history

In [8]:
def linear_regression(x, m, b):
    yhat = x * m + b
    return yhat


x = np.array([1.0, 3.0, 7.0, 2.0, 5.0, 4.0])
y = np.array([4.0, 9.0, 20.0, 7.0, 15.0, 11.0])
mgd = MomentumGradientDescent()
mgd.fit(x, y)
print(gd.w, mgd.b)
print(linear_regression(x, gd.w, mgd.b))

Epoch: 0, Loss: 191.13175999999984)
weight:5.971999999999999,bias:1.276
Epoch: 10, Loss: 41.27352476256328)
weight:1.193298498062794,bias:0.90349420802679
Epoch: 20, Loss: 15.495612372224036)
weight:1.7100754931247293,bias:1.294901243640644
Epoch: 25, Loss: 0.4361180426951019)
weight:2.5191200581112025,bias:1.372701473355113
2.67852137518646 1.372701473355113
[ 4.05122285  9.4082656  20.1223511   6.72974422 14.76530835 12.08678697]
