The objective of this notebook is to create a gradient descent algorithm with changing alfa, that is alfa is bigger if the gradient's module is bigger.

In [4]:
import random
import numpy as np
from numpy import linalg

In [5]:
def func(x1,x2):
    ff =(x1-3)**2 + (x2-2)**2
    return ff

In [6]:
def gradFunc(x1,x2):
    g1 = 2*(x1-3)
    g2 = 2*(x2-2)
    return g1, g2

In [11]:
def grad_optim(func,gradFunc):  
    itmax = 1000 #maximum nº of steps
    normagrad = 1e-3 #minimum norm of gradient
    alfa = 0.1 #rate of learning
    x1,x2 = random.uniform(-2,2),random.uniform(-2,2)
    i = 0
    g = gradFunc(x1,x2)
    norm_g = linalg.norm(g)
    while (norm_g > normagrad) & (i < itmax):
        f = func(x1,x2)
        g = gradFunc(x1,x2)
        norm_g = linalg.norm(g)
        print("Iteration = "+ str(i)+" | gradient norm = "+ str(round(norm_g,3))+
              " | Position = "+"("+str(round(x1,3))+","+ str(round(x2,3))+")")
        i += 1
        x1,x2 = x1-alfa*g[0], x2-alfa*g[1]

    return x1,x2

In [12]:
grad_optim(func,gradFunc)

Iteration = 0 | gradient norm = 5.462 | Position = (0.441,1.045)
Iteration = 1 | gradient norm = 4.37 | Position = (0.953,1.236)
Iteration = 2 | gradient norm = 3.496 | Position = (1.363,1.388)
Iteration = 3 | gradient norm = 2.797 | Position = (1.69,1.511)
Iteration = 4 | gradient norm = 2.237 | Position = (1.952,1.609)
Iteration = 5 | gradient norm = 1.79 | Position = (2.162,1.687)
Iteration = 6 | gradient norm = 1.432 | Position = (2.329,1.75)
Iteration = 7 | gradient norm = 1.146 | Position = (2.463,1.8)
Iteration = 8 | gradient norm = 0.916 | Position = (2.571,1.84)
Iteration = 9 | gradient norm = 0.733 | Position = (2.657,1.872)
Iteration = 10 | gradient norm = 0.586 | Position = (2.725,1.897)
Iteration = 11 | gradient norm = 0.469 | Position = (2.78,1.918)
Iteration = 12 | gradient norm = 0.375 | Position = (2.824,1.934)
Iteration = 13 | gradient norm = 0.3 | Position = (2.859,1.947)
Iteration = 14 | gradient norm = 0.24 | Position = (2.887,1.958)
Iteration = 15 | gradient norm 

(2.999659916701209, 1.9998729952370973)

The following is an adaptative alfa test:

In [13]:
def grad_optim_ada(func,gradFunc):  
    itmax = 1000 #maximum nº of steps
    normagrad = 1e-3 #minimum norm of gradient
    alfa = 0.1 #rate of learning
    x1,x2 = random.uniform(-2,2),random.uniform(-2,2)
    i = 0
    g = gradFunc(x1,x2)
    norm_g = linalg.norm(g)
    while (norm_g > normagrad) & (i < itmax):
        f = func(x1,x2)
        g = gradFunc(x1,x2)
        norm_g = linalg.norm(g)
        alfa = norm_g/10 #test1 for adaptative alpha
        #alfa = alfa*log(norm_g+100) #test2 for adaptative alpha
        print("Iteration = "+ str(i)+" | gradient norm = "+ str(round(norm_g,3))+
              " | Position = "+"("+str(round(x1,3))+","+ str(round(x2,3))+")"+
              " | Alpha = "+ str(round(alfa,7)))
        i += 1
        x1,x2 = x1-alfa*g[0], x2-alfa*g[1]

    return x1,x2

In [14]:
grad_optim_ada(func,gradFunc)

Iteration = 0 | gradient norm = 7.134 | Position = (0.763,-0.778) | Alpha = 0.0713427
Iteration = 1 | gradient norm = 6.116 | Position = (1.082,-0.382) | Alpha = 0.0436355
Iteration = 2 | gradient norm = 5.583 | Position = (1.249,-0.174) | Alpha = 0.0243597
Iteration = 3 | gradient norm = 5.311 | Position = (1.335,-0.068) | Alpha = 0.0129364
Iteration = 4 | gradient norm = 5.173 | Position = (1.378,-0.015) | Alpha = 0.0066922
Iteration = 5 | gradient norm = 5.104 | Position = (1.399,0.012) | Alpha = 0.0034156
Iteration = 6 | gradient norm = 5.069 | Position = (1.41,0.026) | Alpha = 0.0017314
Iteration = 7 | gradient norm = 5.052 | Position = (1.416,0.033) | Alpha = 0.0008746
Iteration = 8 | gradient norm = 5.043 | Position = (1.419,0.036) | Alpha = 0.000441
Iteration = 9 | gradient norm = 5.038 | Position = (1.42,0.038) | Alpha = 0.0002222
Iteration = 10 | gradient norm = 5.036 | Position = (1.421,0.039) | Alpha = 0.0001119
Iteration = 11 | gradient norm = 5.035 | Position = (1.421,0.0

(1.421380252479755, 0.03976497236495051)