# HW13.3 DFP & BFGS
By ZincCat

$\min_{\mathbf{x}}f(\mathbf{x})=(3−x_1)^2+7(x_2−x_1^2)^2+ (x_3−x_1-x_2^2)^2$


In [1]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt

# 设置随机种子
np.random.seed(19890817)

In [2]:
# 定义一些辅助函数

def f(x):
    # 计算函数值
    return (3-x[0])**2 + 7*(x[1]-x[0]*x[0])**2 + 9*(x[2]-x[0]-x[1]*x[1])**2

def gradient_f(x):
    # 计算函数梯度
    g = np.zeros(3)
    g[0] = 2*x[0] - 6 - 28*x[0]*(x[1]-x[0]*x[0]) - 18*(x[2]-x[0]-x[1]*x[1])
    g[1] = 14*(x[1]-x[0]*x[0]) - 36*x[1]*(x[2]-x[0]-x[1]*x[1])
    g[2] = 18*(x[2]-x[0]-x[1]*x[1])
    return g

def linesearch_Armijo(f, x, g, d, alpha=0.4, beta=0.8):
    # backtrack linesearch using Armijo rules
    t = 0.5
    value = f(x)
    while f(x + t*d) > value + alpha*t*np.dot(g, d):
        t *= beta
    return t

def linesearch_Wolfe(f, x, g, d, start=0,end=1e10,rho=0.1,sigma=0.8):
    # linesearch using strong Wolfe rules
    value = f(x)
    reg1 = np.dot(g, d)
    reg2 = sigma*g
    t = 0
    while t < 50:
        alpha = (start + end)/2
        x_new = x + alpha*d
        cond1 =  (f(x_new) < value + rho*alpha*reg1)
        cond2 = (np.abs(np.dot(gradient_f(x_new), d)) < np.abs(np.dot(reg2, d)))
        if (cond1 and cond2):
            break
        if not cond1:
            end = alpha
        else:
            start = alpha
        t += 1
    return alpha

def DFP(H, dx, dg):
    t = H@dg
    return H + np.outer(dx, dx)/np.dot(dx, dg) - np.outer(t, t)/np.dot(dg, t)

def BFGS(H, dx, dg, eps=1e-30):
    t1 = H@dg
    t2 = np.dot(dg, dx)
    return H + (1+np.dot(dg, t1)/t2)*np.outer(dx, dx)/t2 - (np.outer(t1, dx)+np.outer(dx, t1))/t2

In [3]:
alpha = 0.4
beta = 0.8
maxIter = 3000

# 设置变量初值
x = np.array([0, 0, 0], dtype=np.float64) #x0
H = np.eye(3) #H_0
g = gradient_f(x)
d = -g
eta = 1e-5
eps = 1e-20
timestep = 0
lam = []
while True:
    print(timestep, "th iteration, x=", x, ", f(x)=", f(x))
    # print("H=", np.linalg.svd(H)[1][2])
    # print("g=", g)
    lam.append(np.mean(np.linalg.svd(H)[1]))
    if np.linalg.norm(g) < eta:
        break
    # alpha = linesearch_Armijo(f, x, g, d, alpha, beta)
    alpha = linesearch_Wolfe(f, x, g, d)
    dx = alpha*d
    x += dx
    dg = gradient_f(x) - g
    g += dg
    # H = DFP(H, dx, dg)
    H = BFGS(H, dx, dg)
    d = -H@g
    timestep += 1
    if timestep%3 == 0:
        d = -g
        # print(g)
print("result: x:", x, "\nf(x):",f(x))
print(np.mean(lam))

0 th iteration, x= [0. 0. 0.] , f(x)= 9.0
1 th iteration, x= [0.46815885 0.         0.        ] , f(x)= 8.719030691382894
2 th iteration, x= [0.95935802 0.24763874 0.68009537] , f(x)= 8.376169675917236
3 th iteration, x= [0.97807705 0.53665697 0.72202827] , f(x)= 7.986750127517768
4 th iteration, x= [0.66429007 0.4524167  0.90015991] , f(x)= 5.465163604781935
5 th iteration, x= [0.97798628 0.66692536 1.27784077] , f(x)= 4.864395396482811
6 th iteration, x= [0.90311931 0.69087695 1.33533122] , f(x)= 4.524147585688882
7 th iteration, x= [0.9693146  0.87271233 1.57159107] , f(x)= 4.383506357917406
8 th iteration, x= [1.30010547 1.3570212  3.04848064] , f(x)= 3.7451055720928808
9 th iteration, x= [1.17063637 1.2905051  2.98814978] , f(x)= 3.5994789912954928
10 th iteration, x= [1.20499954 1.36494848 2.963248  ] , f(x)= 3.3740166182024143
11 th iteration, x= [1.42098792 1.97943184 5.19208444] , f(x)= 2.6989770716336503
12 th iteration, x= [1.43740359 1.95376093 5.3146299 ] , f(x)= 2.5625418