In [1]:
import numpy as np
from scipy.linalg import sqrtm
import matplotlib.pyplot as plt

In [2]:
def evalf(x):
  assert type(x) is np.ndarray and len(x) == 2
  return np.sqrt(x[0]**2+4) + np.sqrt(x[1]**2+4)

In [3]:
def evalg(x):
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([x[0]/np.sqrt(x[0]**2+4),x[1]/np.sqrt(x[1]**2+4)])

In [4]:
def evalh(x):
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([[4/np.power(x[0]**2+4,3/2),0],[0,4/np.power(x[1]**2+4,3/2)]])

In [5]:
def find_minimizer(start_x, tol):
  assert type(start_x) is np.ndarray and len(start_x) == 2
  assert type(tol) is float and tol>=0

  x = start_x
  gradf = evalg(x)
  hesf = evalh(x)
  step_length = 1
  k = 0

  while np.linalg.norm(gradf) > tol:
    x = np.subtract(x,np.multiply(step_length,np.matmul(np.linalg.inv(hesf),gradf)))
    k += 1
    gradf = evalg(x)
    hesf = evalh(x)
    print('x:',x)

  return x, k, evalf(x)

In [6]:
start_x = np.array([2,2])
tol = 1e-9

In [None]:
find_minimizer(start_x,tol)

In [8]:
def compute_steplength_backtracking_scaled_direction(x, gradf, alpha_start, rho, gamma, d_k):
  assert type(x) is np.ndarray and len(x) == 2
  assert type(gradf) is np.ndarray and len(gradf) == 2
  
  alpha = alpha_start

  while evalf(x+alpha*np.matmul(d_k,-gradf)) > evalf(x) + gamma*alpha*(np.matmul(gradf.transpose(),np.matmul(d_k,-gradf))):
    alpha = rho*alpha

  return alpha

In [9]:
def find_minimizer_newton_backtracking(start_x, tol, *args):
  assert type(start_x) is np.ndarray and len(start_x) == 2
  assert type(tol) is float and tol>=0

  x = start_x
  gradf = evalg(x)
  hesf = evalh(x)

  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  dk = np.linalg.inv(hesf)

  k = 0

  while np.linalg.norm(gradf) > tol:
    
    step_length = compute_steplength_backtracking_scaled_direction(x,gradf,alpha_start,rho,gamma,dk)
    x = np.subtract(x,np.multiply(step_length,np.matmul(dk,gradf)))

    gradf = evalg(x)
    hesf = evalh(x)
    k += 1
    dk = np.linalg.inv(hesf)
    print('x:',x,'alpha:',step_length)

  return x,k,evalf(x)

In [10]:
find_minimizer_newton_backtracking(start_x, tol, 1.0, 0.5, 0.5)

x: [0. 0.] alpha: 0.5


(array([0., 0.]), 1, 4.0)

#Ans 2:

We can observe that Newton's Method with constant step length = 1 is diverging and oscillates between two values.

As for Newton's Method with Backtracking Line Search, it takes 1 iteration to find optimizer.

Minimizer : [0,0]

Minimum Function Value : 4.0

In [11]:
def compute_steplength_backtracking_prev(x, gradf, alpha_start, rho, gamma):
  assert type(x) is np.ndarray and len(x) == 2
  assert type(gradf) is np.ndarray and len(gradf) == 2
  
  alpha = alpha_start

  while evalf(x+alpha*-gradf) > evalf(x) + gamma*alpha*np.matmul(gradf.transpose(),-gradf):
    alpha = rho*alpha

  return alpha

In [15]:
def find_minimizer_prev(start_x, tol,*args):
  assert type(start_x) is np.ndarray and len(start_x) == 2
  assert type(tol) is float and tol>=0 

  x = start_x
  g_x = evalg(x)

  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]
  hes_x = evalh(x)

  k = 0

  while (np.linalg.norm(g_x) > tol):

    step_length = compute_steplength_backtracking_prev(x,g_x, alpha_start,rho, gamma)
    x = np.subtract(x, np.multiply(step_length,g_x))
    k += 1 
    print('x:',x,'alpha:',step_length)
    g_x = evalg(x) 
    hes_x = evalh(x)
    
    #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))
  return x,k,evalf(x)

In [16]:
find_minimizer_prev(start_x, tol, 1.0, 0.5, 0.5)

x: [1.29289322 1.29289322] alpha: 1.0
x: [0.7500044 0.7500044] alpha: 1.0
x: [0.39887915 0.39887915] alpha: 1.0
x: [0.20329151 0.20329151] alpha: 1.0
x: [0.10216681 0.10216681] alpha: 1.0
x: [0.05114993 0.05114993] alpha: 1.0
x: [0.02558332 0.02558332] alpha: 1.0
x: [0.01279271 0.01279271] alpha: 1.0
x: [0.00639649 0.00639649] alpha: 1.0
x: [0.00319826 0.00319826] alpha: 1.0
x: [0.00159913 0.00159913] alpha: 1.0
x: [0.00079957 0.00079957] alpha: 1.0
x: [0.00039978 0.00039978] alpha: 1.0
x: [0.00019989 0.00019989] alpha: 1.0
x: [9.99457603e-05 9.99457603e-05] alpha: 1.0
x: [4.99728802e-05 4.99728802e-05] alpha: 1.0
x: [2.49864401e-05 2.49864401e-05] alpha: 1.0
x: [1.24932201e-05 1.24932201e-05] alpha: 1.0
x: [6.24661003e-06 6.24661003e-06] alpha: 1.0
x: [3.12330501e-06 3.12330501e-06] alpha: 1.0
x: [1.56165251e-06 1.56165251e-06] alpha: 1.0
x: [7.80826253e-07 7.80826253e-07] alpha: 1.0
x: [3.90413127e-07 3.90413127e-07] alpha: 1.0
x: [1.95206563e-07 1.95206563e-07] alpha: 1.0
x: [9.7603

(array([7.62525638e-10, 7.62525638e-10]), 32, 4.0)

#Ans 3:

We can observe that Newton's Method with constant step length = 1 is diverging and oscillates between two values.

As for Newton's Method with Backtracking Line Search, it takes 1 iteration to find optimizer.

In Gradient Descent with backtracking line search, it takes 32 iterations.

Newton's Method with Backtracking Line Search takes way less iterations than Gradient Descent with Backtracking Line Search.

Minimizer for Newton's Method with Backtracking Line Search : [0,0]

Minimum Function Value for Newton's Method with Backtracking Line Search : 4.0

Minimizer for Gradient Descent with backtracking line search : [7.62525638e-10, 7.62525638e-10]

Minimizer for Gradient Descent with backtracking line search : 4.0

The minimizer is equal to the actual minimizer for Newton's method with Backtracking Line Search, but it is approximately equal in the case of Gradient Descent with Backtracking Line Search.

The minimum function value is same in both cases.


In [17]:
start_x_4 = np.array([8.,8.])

In [None]:
find_minimizer(start_x_4, tol)

In [18]:
find_minimizer_newton_backtracking(start_x_4, tol, 1.0, 0.5, 0.5)

x: [-0.5 -0.5] alpha: 0.0625
x: [-0.234375 -0.234375] alpha: 0.5
x: [-0.11557817 -0.11557817] alpha: 0.5
x: [-0.0575961 -0.0575961] alpha: 0.5
x: [-0.02877417 -0.02877417] alpha: 0.5
x: [-0.0143841 -0.0143841] alpha: 0.5
x: [-0.00719168 -0.00719168] alpha: 0.5
x: [-0.00359579 -0.00359579] alpha: 0.5
x: [-0.00179789 -0.00179789] alpha: 0.5
x: [-0.00089894 -0.00089894] alpha: 0.5
x: [-0.00044947 -0.00044947] alpha: 0.5
x: [-0.00022474 -0.00022474] alpha: 0.5
x: [2.83764947e-12 2.83764947e-12] alpha: 1.0


(array([2.83764947e-12, 2.83764947e-12]), 13, 4.0)

#Ans 4:

We can observe that Newton's Method with constant step length = 1, it is diverging and approaches infinity.

As for Newton's Method with Backtracking Line Search, it takes 13 iterations to find optimizer.

Minimizer : [2.83764947e-12, 2.83764947e-12]

Minimum Function Value : 4.0

In [19]:
find_minimizer_prev(start_x_4, tol, 1.0, 0.5, 0.5)

x: [7.0298575 7.0298575] alpha: 1.0
x: [6.06802585 6.06802585] alpha: 1.0
x: [5.11828321 5.11828321] alpha: 1.0
x: [4.18686723 4.18686723] alpha: 1.0
x: [3.28453054 3.28453054] alpha: 1.0
x: [2.43041522 2.43041522] alpha: 1.0
x: [1.6582483 1.6582483] alpha: 1.0
x: [1.01997818 1.01997818] alpha: 1.0
x: [0.56565981 0.56565981] alpha: 1.0
x: [0.2935057 0.2935057] alpha: 1.0
x: [0.14830804 0.14830804] alpha: 1.0
x: [0.07435706 0.07435706] alpha: 1.0
x: [0.0372042 0.0372042] alpha: 1.0
x: [0.01860532 0.01860532] alpha: 1.0
x: [0.00930306 0.00930306] alpha: 1.0
x: [0.00465158 0.00465158] alpha: 1.0
x: [0.0023258 0.0023258] alpha: 1.0
x: [0.0011629 0.0011629] alpha: 1.0
x: [0.00058145 0.00058145] alpha: 1.0
x: [0.00029072 0.00029072] alpha: 1.0
x: [0.00014536 0.00014536] alpha: 1.0
x: [7.26812126e-05 7.26812126e-05] alpha: 1.0
x: [3.63406063e-05 3.63406063e-05] alpha: 1.0
x: [1.81703032e-05 1.81703032e-05] alpha: 1.0
x: [9.08515158e-06 9.08515158e-06] alpha: 1.0
x: [4.54257579e-06 4.54257579e

(array([8.3177047e-10, 8.3177047e-10]), 39, 4.0)

#Ans 5:

We can observe that Newton's Method with constant step length = 1 is diverging and approaches infinity.

As for Newton's Method with Backtracking Line Search, it takes 13 iterations to find optimizer.

In Gradient Descent with backtracking line search, it takes 39 iterations.

Gradient Descent with Backtracking Line Search takes 3 times more iterations than Newton's Method with Backtracking Line Search.

Minimizer for Newton's Method with Backtracking Line Search : [2.83764947e-12, 2.83764947e-12]

Minimum Function Value for Newton's Method with Backtracking Line Search : 4.0

Minimizer for Gradient Descent with backtracking line search : [8.3177047e-10, 8.3177047e-10]

Minimizer for Gradient Descent with backtracking line search : 4.0



For both cases, the minimizers are approximately equal to the actual minimizer. The minimizer for Newton's method with Backtracking Line Search is better than that in the case of Gradient Descent with Backtracking Line Search with a factor of $10^{-2}$

The minimum function value is same in both cases.
