-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBFGS.py
133 lines (100 loc) · 4.27 KB
/
BFGS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.gaussian_process import GaussianProcessRegressor
from scipy.optimize import line_search
def gradient(model:Pipeline, X:np.array, Y:float, delta_h:float=1e-3):
"""
Gradient function via finite difference
"""
# Create an array of zeros to allocate memory
grad = np.zeros(len(X))
for i in range(len(X)):
Xh = X.copy()
# Add delta H in the i-th dimension
Xh[i] += delta_h
# Estimate the gradient via finite difference
grad[i] = (model.predict(Xh.T) - Y) / delta_h
return grad.reshape(-1,1)
def subdomain_func(x, boundaries):
"""
- The boundaries are 2-by-N-dimensional arrays, e.g.
boundaries = [
[lower boundaries],
[upper boundaries]
]
- x is a point in Algebraic notation (column vector)
"""
for i,coord in enumerate(x):
if not (boundaries[0,i] <= coord <= boundaries[1,i]): return False
return True
def backtracking_line_search(target_func, g, x, p, alpha=1, rho=0.5, c=1e-4):
"""
Backtracking line search algorithm.
Parameters
----------
target_func : callable
The target function to minimize.
grad_func : callable
The gradient function.
x : array-like
The current point.
p : array-like
The search direction.
alpha : float, optional
The initial step size.
rho : float, optional
The reduction factor for the step size.
c : float, optional
The sufficient decrease parameter.
Returns
-------
alpha : float
The step size that satisfies the Armijo-Goldstein condition.
"""
f = target_func(x)
dg = g.T @ p
while target_func(x + alpha * p) > f + c * alpha * dg:
alpha *= rho
if (alpha < 0.01): return 0.01
else: return alpha
def bfgs(fun, dimension:int=2, boundaries=[[-5,-5],[5,5]], sampling_budget:int=100, tol=10e-9, verbose=False):
# Initialization of the surrogate model
model = GaussianProcessRegressor() #Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))])
# Define the two initial points to fit the model (Algebra notation)
boundaries = np.array(boundaries)
x0 = np.random.uniform(low=boundaries[0], high=boundaries[1], size=[2,dimension]).T
B_inv = np.eye(dimension)
x_prev = x0[:,0].reshape(-1,1)
x_k = x0[:,1].reshape(-1,1)
X_log = np.hstack((x_prev, x_k))
Y_log = np.array(fun(X_log))
max_iter = (sampling_budget - 2)
for iter in range(max_iter):
# Fit the model on those few samples
model.fit(X_log.T,Y_log)
# Compute the two factors: differnce in the gradient, difference in the position
x_delta = x_k - x_prev
y = gradient(model, x_k, Y_log[-1]) - gradient(model, x_prev, Y_log[-2])
# Update the Inverse of the Hessian approximation
B_inv = (np.eye(dimension) - (x_delta @ y.T)/ (y.T @ x_delta + 1e-9)) @ B_inv @ (np.eye(dimension) - (y @ x_delta.T)/(y.T @ x_delta + 1e-9)) + (x_delta @ x_delta.T)/(y.T @ x_delta + 1e-9)
# Compute the update
p = - B_inv @ gradient(model, x_k, Y_log[-1])
# project the search direction onto the tangent space of the subdomain
mask = np.logical_not(subdomain_func(x_k, boundaries))
p[mask] = 0
alpha = backtracking_line_search(fun, gradient(model,x_k, Y_log[-1]), x_k, p)
# Update the point
x_new = x_k + alpha*p
# Log the new value
X_log = np.hstack((X_log, x_new))
Y_log = np.hstack((Y_log, fun(x_new)))
if (np.linalg.norm(gradient(model, x_new, Y_log[-1])) < tol):
if (verbose): print(f"Stopped at iteration {iter}")
break
# Update for next iteration
x_prev = x_k.copy().reshape(-1,1)
x_k = x_new.copy().reshape(-1,1)
X_best, Y_best = X_log[:,-1], Y_log[-1]
return X_best, Y_best, X_log.T, Y_log.T