# HW13.5 Majorant Minimization
By ZincCat

$\min_{\mathbf{x}} \frac{1}{2} \|\mathbf{A}\mathbf{x}-\mathbf{b}\|^{2} + \lambda\|\mathbf{x}\|_{1}$

In [1]:
import numpy as np
from lbfgs import LBFGS
from bfgs import BFGS
from matplotlib import pyplot as plt

# 设置随机种子
np.random.seed(19890817)

In [2]:
# 定义一些辅助函数
n = 20
A = np.random.normal(3, 5, (n, n))
b = np.random.normal(3, 3, n) 
x0 = np.ones(n)

In [3]:
L = np.linalg.svd(A.T@A)[1][0]
print(L)

4353.79001549459


In [7]:
lam = 1e-3 # lambda
def f(x):
    # 原函数
    return 1/2*(np.linalg.norm(A@x-b))**2 + lam*np.linalg.norm(x, ord=1)
def grad1(xk):
    # Lipschitz majorant function的梯度
    return lambda x: A.T@(A@xk-b) + L*(x-xk) + lam*np.sign(x)
def grad2(xk):
    # 第二个majorant function的梯度
    return lambda x: A.T@(A@x-b) + lam*x/np.abs(xk)
def grad(x):
    # 原函数的梯度
    return A.T@(A@x-b) + lam*np.sign(x)
def G1(xk):
    # Lipschitz gradient majorant
    return lambda x: 1/2*(np.linalg.norm(A@xk-b))**2 + np.dot(A.T@(A@xk-b), x-xk) + L/2*(np.linalg.norm(x-xk))**2 + lam*np.linalg.norm(x, ord=1)
def G2(xk):
    # variational majorant function
    return lambda x: 1/2*(np.linalg.norm(A@x-b))**2 + lam*np.dot(x, x/np.abs(xk))/2

In [10]:
# 1. Lipschitz gradient majorant
x = x0.copy()
eta = 1e-5
maxIter = 30000
g = G1(x)
for i in range(maxIter):
    print(i, "th iteration, f(x)=", f(x))
    x = BFGS(x, g, grad1(x), n=n, eps = 1e-30, eta=1e-5, maxIter=10000, display=False)
    g = G1(x)
    if np.linalg.norm(grad(x)) < eta:
        break

 f(x)= 0.007156736217091667
15436 th iteration, f(x)= 0.007156736217028679
15437 th iteration, f(x)= 0.007156736216965811
15438 th iteration, f(x)= 0.007156736216903058
15439 th iteration, f(x)= 0.007156736216840425
15440 th iteration, f(x)= 0.007156736216777909
15441 th iteration, f(x)= 0.007156736216715508
15442 th iteration, f(x)= 0.007156736216653224
15443 th iteration, f(x)= 0.007156736216591059
15444 th iteration, f(x)= 0.007156736216529008
15445 th iteration, f(x)= 0.00715673621646707
15446 th iteration, f(x)= 0.0071567362164052534
15447 th iteration, f(x)= 0.007156736216343547
15448 th iteration, f(x)= 0.007156736216281962
15449 th iteration, f(x)= 0.007156736216220488
15450 th iteration, f(x)= 0.007156736216159131
15451 th iteration, f(x)= 0.007156736216097886
15452 th iteration, f(x)= 0.007156736216036756
15453 th iteration, f(x)= 0.007156736215975744
15454 th iteration, f(x)= 0.00715673621591484
15455 th iteration, f(x)= 0.007156736215854051
15456 th iteration, f(x)= 0.00715

In [13]:
# 2. variational majorant function
x = x0.copy()
eta = 1e-5
maxIter = 500
g = G2(x)
for i in range(maxIter):
    print(i, "th iteration, f(x)=", f(x))
    x = BFGS(x, g, grad2(x), n=n, eps = 1e-30, eta=1e-5, maxIter=50, display=False)
    g = G2(x)
    if np.linalg.norm(grad(x)) < eta:
        break

0 th iteration, f(x)= 34014.40633230235
1 th iteration, f(x)= 0.007157022779348722
2 th iteration, f(x)= 0.00715673620460766


In [9]:
# 3. Optimize original function
x = x0.copy()
x = BFGS(x, f, grad, n=n, eta=1e-5, maxIter=200)

0 th iteration, f(x)= 34014.40633230235
1 th iteration, f(x)= 2291.679326044753
2 th iteration, f(x)= 1957.1101064777113
3 th iteration, f(x)= 1882.4287405418183
4 th iteration, f(x)= 1857.0881833929786
5 th iteration, f(x)= 1838.6493876902111
6 th iteration, f(x)= 1800.7849290664635
7 th iteration, f(x)= 1760.3617652784192
8 th iteration, f(x)= 1681.4262954544977
9 th iteration, f(x)= 1576.937191454927
10 th iteration, f(x)= 1402.1502767324323
11 th iteration, f(x)= 1271.0111857486038
12 th iteration, f(x)= 910.8846708762891
13 th iteration, f(x)= 448.1025462048118
14 th iteration, f(x)= 330.877280085147
15 th iteration, f(x)= 280.123771258122
16 th iteration, f(x)= 232.5162053287782
17 th iteration, f(x)= 88.92704246172858
18 th iteration, f(x)= 67.38263969220485
19 th iteration, f(x)= 51.09222688445992
20 th iteration, f(x)= 31.48812502815125
21 th iteration, f(x)= 2.9201599464235812
22 th iteration, f(x)= 0.8353168374585138
23 th iteration, f(x)= 0.0851741147946558
24 th iteration,

In [11]:
# True Result
import scipy.optimize as opt
res = opt.minimize(fun=f, x0=x0, options={'xtol': 1e-9, 'disp': True})
print(res)

Optimization terminated successfully.
         Current function value: 0.007157
         Iterations: 28
         Function evaluations: 693
         Gradient evaluations: 33
      fun: 0.007156736184194109
 hess_inv: array([[ 4.23427080e-02, -1.06309180e-02, -1.48198659e-03,
         1.24075698e-03, -3.34344891e-04, -1.31286902e-02,
        -2.24944442e-02, -1.36267548e-02, -4.84116502e-03,
         1.65344109e-02,  9.23167961e-03, -6.58852090e-03,
        -8.21886807e-03,  1.08767300e-02,  1.28608211e-02,
        -8.42232407e-03, -2.61280369e-02,  8.08160159e-03,
         4.43320852e-03, -4.52721976e-03],
       [-1.06309180e-02,  8.27871281e-02,  3.67155213e-02,
        -7.30128839e-02, -2.69442771e-03, -1.49508423e-03,
         4.38572900e-03,  2.79822412e-02,  1.74246664e-03,
        -3.89679723e-02, -1.00243816e-02,  7.33531889e-03,
        -3.09195421e-02,  8.62890484e-03,  1.63281611e-02,
        -2.57996607e-02,  6.04191897e-02, -2.14600806e-02,
        -4.17641906e-02, -1.64650