# Week 4
# day2: 30 Aug 2022

4. Learn about Gradient Descent and its below variants:

    Momentum

    Nesterov

    Adagrad

    RMSProp

    Adam
5. Implement all the above in Numpy

[nestrov and adagrad added]

In [1]:
import numpy as np

In [2]:
x = np.array([1.0, 3.0, 7.0, 2.0, 5.0, 4.0])
y = np.array([4.0, 9.0, 20.0, 7.0, 15.0, 11.0])


def linear_regression(x, m, b):
    yhat = x * m + b
    return yhat

## Gradient Descent

In [3]:
# Gradient descent
class GradientDescent:
    def __init__(self):
        self.learning_rate = 0.001
        self.w = 9
        self.b = 0

    def update_weight(self, x, y):
        yhat = self.w * x + self.b
        self.w = self.w - self.learning_rate * (-2 * sum(x * (y - yhat)).mean())
        self.b = self.b - self.learning_rate * (-2 * sum(y - yhat).mean())

    def MSE(self, y, yhat):
        return np.square(np.subtract(y, yhat)).mean()

    def fit(self, x, y, epochs=200):
        history = []
        for e in range(epochs):
            self.update_weight(x, y)
            loss = self.MSE(y, (self.w * x + self.b))
            if e % 10 == 0:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
            history.append(loss)
            if loss <= 1:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
                return history
        return history

In [4]:
gd = GradientDescent()
gd.fit(x, y)
print("Prediction:")
print(f"yhat:{linear_regression(x,gd.w,gd.b)}")

Epoch: 0, Loss: 392.9781973333333
weight:7.736,bias:-0.264
Epoch: 10, Loss: 4.440984551944799
weight:3.569367530187586,bias:-1.0900302710296121
Epoch: 20, Loss: 1.4910552860496364
weight:3.1983207104846607,bias:-1.1047724125864504
Epoch: 30, Loss: 1.4063155896133022
weight:3.15457713984864,bias:-1.0510573004841588
Epoch: 40, Loss: 1.3460008693529024
weight:3.1393310956949874,bias:-0.9928443907244823
Epoch: 50, Loss: 1.2888678977593233
weight:3.1268323692561566,bias:-0.9356082483828032
Epoch: 60, Loss: 1.2346081693321485
weight:3.1148551006538137,bias:-0.8797857821942265
Epoch: 70, Loss: 1.1830761406341836
weight:3.103200278433236,bias:-0.8253807833155423
Epoch: 80, Loss: 1.1341346792547704
weight:3.09184369185081,bias:-0.7723605909420802
Epoch: 90, Loss: 1.0876535546911934
weight:3.0807763694016863,bias:-0.7206902437229002
Epoch: 100, Loss: 1.0435090833202458
weight:3.069990827772471,bias:-0.6703354005758341
Epoch: 110, Loss: 1.0015837992201375
weight:3.059479882703988,bias:-0.62126257

## Momentum Accelerated Gradient Descent

In [5]:
# Momentum
class MomentumGradientDescent:
    def __init__(self):
        self.learning_rate = 0.01
        self.momentum = 0.9
        self.w = 9
        self.b = 0
        self.update_w = 0
        self.update_b = 0

    def update_weight(self, x, y):
        yhat = self.w * x + self.b
        self.update_w = self.momentum * self.update_w + self.learning_rate * (
            -2 * sum(x * (y - yhat)).mean()
        )
        self.update_b = self.momentum * self.update_b + self.learning_rate * (
            -2 * sum(y - yhat).mean()
        )

    def MSE(self, y, yhat):
        return np.square(np.subtract(y, yhat)).mean()

    def fit(self, x, y, epochs=200):
        history = []
        for e in range(epochs):
            self.update_weight(x, y)
            self.w -= self.update_w
            self.b -= self.update_b
            loss = self.MSE(y, (self.w * x + self.b))
            if e % 10 == 0:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
            history.append(loss)
            if loss <= 1:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
                return history
        return history

In [6]:
mgd = MomentumGradientDescent()
mgd.fit(x, y)
print("Prediction:")
print("yhat:", linear_regression(x, mgd.w, mgd.b))

Epoch: 0, Loss: 882.6997333333334
weight:-3.6400000000000006,bias:-2.64
Epoch: 10, Loss: 189.83126432585706
weight:5.850688050169891,bias:1.7932931384780255
Epoch: 20, Loss: 70.90183470769233
weight:4.4046935820368285,bias:2.5186095375955104
Epoch: 30, Loss: 36.11273870396934
weight:1.268422195917521,bias:1.019642338552188
Epoch: 36, Loss: 0.41546583455003255
weight:2.8148830446765354,bias:1.0091843554380326
Prediction:
yhat: [ 3.8240674   9.45383349 20.71336567  6.63895044 15.08359958 12.26871653]


## Nestrov Accelerated Gradient Descent


In [7]:
# Nestrov
class NestrovAcceleratedGradientDescent:
    def __init__(self):
        self.learning_rate = 0.001
        self.momentum = 0.9
        self.w = 9
        self.b = 0
        self.update_w = 0
        self.update_b = 0
        self.prev_w = 1
        self.prev_b = 0

    def update_weight(self, x, y):
        self.update_w = self.momentum * self.prev_w
        self.update_b = self.momentum * self.prev_b
        yhat_look_ahead = (self.w - self.update_w) * x + (self.b - self.update_b)
        self.update_w = self.momentum * self.prev_w + self.learning_rate * (
            -2 * sum(x * (y - yhat_look_ahead)).mean()
        )
        self.update_b = self.momentum * self.prev_b + self.learning_rate * (
            -2 * sum(y - yhat_look_ahead).mean()
        )
        self.w -= self.update_w
        self.b -= self.update_b
        self.prev_w = self.update_w
        self.prev_b = self.update_b

    def MSE(self, y, yhat):
        return np.square(np.subtract(y, yhat)).mean()

    def fit(self, x, y, epochs=200):
        history = []
        for e in range(epochs):
            self.update_weight(x, y)

            loss = self.MSE(y, (self.w * x + self.b))
            history.append(loss)
            if e % 10 == 0:
                print(f"Epoch: {e}, Loss: {loss})")
                print(f"weight:{self.w},bias:{self.b}")
            if loss <= 1:
                print(f"Epoch: {e}, Loss: {loss})")
                print(f"weight:{self.w},bias:{self.b}")
                return history
        return history

In [8]:
ngd = NestrovAcceleratedGradientDescent()
ngd.fit(x, y)
print(ngd.w, ngd.b)
print("Prediction:")
print("yhat:", linear_regression(x, ngd.w, ngd.b))

Epoch: 0, Loss: 285.3847069333333)
weight:7.0232,bias:-0.2244
Epoch: 10, Loss: 7.521395494661022)
weight:3.4862203458884564,bias:0.37097203124805433
Epoch: 14, Loss: 0.9061846024676353)
weight:2.986027250291028,bias:0.576608846920296
2.986027250291028 0.576608846920296
Prediction:
yhat: [ 3.5626361   9.5346906  21.4787996   6.54866335 15.5067451  12.52071785]


## AdaGrad

In [9]:
x = np.array([1.0,0.0,0.0, 3.0,0.0,0.0,0.0, 7.0,0.0, 2.0, 5.0,0.0,0.0,0.0,0.0, 4.0,0.0,0.0,0.0])
y = np.array([4.0,0.0,-1.0, 9.0,0.0,0.0,0.0,20.0,0.0, 7.0, 15.0,0.0,0.1,0.0,0.0,11.0,0.0,1.0,0.0])


In [12]:
# Adagrad
class AdaGrad:
    def __init__(self):
        self.learning_rate = 0.5
        self.w = 9
        self.b = 0
        self.update_w = 0
        self.update_b = 0
        self.epsilon = 1e-9

    def update_weight(self, x, y):
        yhat = self.w * x + self.b
        self.update_w = self.update_w + (-2 * sum(x * (y - yhat)).mean()) ** 2
        self.update_b = self.update_b + (-2 * sum(y - yhat).mean()) ** 2
        self.w -= (self.learning_rate / np.sqrt(self.update_w + self.epsilon)) * (
            -2 * sum(x * (y - yhat)).mean()
        )
        self.b -= (self.learning_rate / np.sqrt(self.update_b + self.epsilon)) * (
            -2 * sum(y - yhat).mean()
        )

    def MSE(self, y, yhat):
        return np.square(np.subtract(y, yhat)).mean()

    def fit(self, x, y, epochs=2000):
        history = []
        for e in range(epochs):
            self.update_weight(x, y)
            loss = self.MSE(y, (self.w * x + self.b))
            
            history.append(loss)
            if e % 100 == 0:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
            if loss <= 1:
                print(f"Epoch: {e}, Loss: {loss}")
                print(f"weight:{self.w},bias:{self.b}")
                
                return history
        return history

In [13]:
Agd = AdaGrad()
Agd.fit(x, y)
print("Prediction:")
print(Agd.w,Agd.b)
print("yhat:", linear_regression(x, Agd.w, Agd.b))

Epoch: 0, Loss: 164.41368421052633
weight:8.5,bias:-0.49999999999999645
Epoch: 100, Loss: 2.8292549995701672
weight:3.6307071174486465,bias:-1.299572613840077
Epoch: 146, Loss: 0.9819553165685037
weight:3.27670928643533,bias:-0.651028926261302
Prediction:
3.27670928643533 -0.651028926261302
yhat: [ 2.62568036 -0.65102893 -0.65102893  9.17909893 -0.65102893 -0.65102893
 -0.65102893 22.28593608 -0.65102893  5.90238965 15.73251751 -0.65102893
 -0.65102893 -0.65102893 -0.65102893 12.45580822 -0.65102893 -0.65102893
 -0.65102893]
