In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as math
import torch
import test_fn

Branin Function:
\begin{align*}
f&:([-5,10]\times [0,15]) \rightarrow \mathbb{R}\\
f(x)&=a(x_2 - bx_1^2 + cx_1 - r)^2 + s(1-t)cos(x_1) + s
\end{align*}

Recommended values:
\begin{align*}
a &= 1\\
b &= \frac{5.1}{4\pi^2}\\
c &= \frac{5}{\pi}\\
r &= 6\\
s &= 10\\
t &= \frac{1}{8\pi}
\end{align*}

In [2]:
class Ackley():
    def __init__(self, noise_var = 0):
        self.params = {
            'a': 20,
            'b': 0.2,
            'c': 2 * torch.pi
        }

    def evaluate_torch(self, x):
        a = self.params['a']
        b = self.params['b']
        c = self.params['c']
        n = len(x)
        
        first_operand = -a * np.exp(np.sqrt(np.sum(x**2) / n) * -b)
        second_operand = np.exp(np.sum(np.cos(c * x)) / n)

        return first_operand - second_operand + a + np.exp(1)

In [3]:
class Branin():
    '''
    Takes in an n x 2 input matrix where each row is an observation of dimension 2.
    Outputs n x 1 output matrix where each output has dimension 1.
    '''
    
    def __init__(self, noise_var=0):
        self.range = np.array([[-5,10],
                             [0,15]])
        self.param = {
            'a':1,
            'b':5.1/(4*math.pi**2),
            'c':5/math.pi,
            'r':6,
            's':10,
            't':1/(8*math.pi)
        }
        
        self.noise_var = noise_var
        self.input_dim = 2
        self.output_dim = 1

    def scale_domain(self,x):
        # Scaling the domain
        x_copy = np.copy(x)
        if len(x_copy.shape) == 1:
            x_copy = x_copy.reshape((1, x_copy.shape[0]))
        for i in range(len(self.range)):
            x_copy[:, i] = x_copy[:, i] * (self.range[i, 1] - self.range[i, 0]) / 2 + (
                        self.range[i, 1] + self.range[i, 0]) / 2
        return x_copy

    def __evaluate_single(self, x):
        a = self.param['a']
        b = self.param['b']
        c = self.param['c']
        r = self.param['r']
        s = self.param['s']
        t = self.param['t']
        
        f = a*(x[1] - b*x[1]**2 + c*x[0] - r)**2 + s*(1-t)*math.cos(x[1]) + s
        
        return f
    
    def evaluate_torch(self, x):
        a = self.param['a']
        b = self.param['b']
        c = self.param['c']
        r = self.param['r']
        s = self.param['s']
        t = self.param['t']
        
        f = a*(x[:,1] - b*x[:,1]**2 + c*x[:,0] - r)**2 + s*(1-t)*torch.cos(x[:,1]) + s
                
        return f
    
    def evaluate_true(self, x):
        x = x.reshape(-1,self.input_dim)
        
        return np.apply_along_axis(self.__evaluate_single, axis = 1, arr = x)

    def evaluate(self, x):
        true_values = self.evaluate_true(x).reshape(x.shape[0],self.output_dim)
        noise = np.random.normal(0, self.noise_var, size = (x.shape[0],self.output_dim))
        
        return true_values + noise

In [4]:
torch.concat([torch.tensor([]),torch.tensor([5]),torch.tensor([10])])

tensor([ 5., 10.])

# Draw sample inputs

# Train simple MLP to twist inputs into desired space
In this case, we're twisting into (a) a same-dimensional space, (b) a lower dimensional space, and (c) a higher dimensional space, just for context in results.

In [5]:
n = 100
x1_sample = np.random.uniform(low = -5, high = 10, size = n)
x2_sample = np.random.uniform(low = 0, high = 15, size = n)

In [9]:
b = Branin()
X_sample = np.array([x1_sample,x2_sample]).reshape(-1,2)
y_sample = b.evaluate(X_sample)

In [10]:
max_y_sample = torch.tensor(max(y_sample))
min_y_sample = torch.tensor(min(y_sample))

print(f"Max:{max(y_sample)}, Min:{min(y_sample)}")
print("Use for calibrating the actual y-values")

Max:[303.06241092], Min:[6.0910223]
Use for calibrating the actual y-values


In [11]:
torch.tensor(max(y_sample))

tensor([303.0624], dtype=torch.float64)

In [22]:
x1 = torch.rand(n) * 2 - 1
x2 = torch.rand(n) * 2 - 1
X = torch.concat([x1.reshape(-1,1), x2.reshape(-1,1)], dim = 1)
y = torch.tensor(b.evaluate(X))

In [23]:
X = X.float()
y = y.float()

In [24]:
print(f"X shape:{X.shape}. y shape: {y.shape}")

X shape:torch.Size([100, 2]). y shape: torch.Size([100, 1])


In [25]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [12]:
class SimpleNetwork(nn.Module):
    def __init__(self, input_dim, output_dim, fn):
        super(SimpleNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_dim, input_dim*4),
            nn.Tanh(),
            nn.Linear(input_dim*4, output_dim)
        )
        self.fn = fn
        (x1 + x2)*(max_y_sample - min_y_sample) + min_y_sample

    def forward(self, x):
        x = self.linear_relu_stack(x)
        x = self.fn.evaluate_torch(x)
        return x

In [13]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cpu device


In [14]:
model = SimpleNetwork(input_dim = 2, output_dim = 200, fn = b)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.9)
criterion = torch.nn.MSELoss()
a = Ackley()

In [15]:
print(f"Min, max y:{max(y):.2f}/{min(y):.2f}")
print("Min, max y-hat")

Min, max y:700.96/-631.03
Min, max y-hat


In [16]:
print(f"Layer 2 weights: {model.linear_relu_stack[2].weight}")

Layer 2 weights: Parameter containing:
tensor([[-0.3480, -0.1966,  0.2682, -0.1952, -0.3004, -0.0714,  0.1989, -0.2211],
        [ 0.0761,  0.3028, -0.2304,  0.2758, -0.2236, -0.1295,  0.2346,  0.3303]],
       requires_grad=True)


In [18]:
for i in range(100):
    y_hat = model(X)
    loss = criterion(y_hat,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_accuracy = torch.mean((y - y_hat)**2)
#     print(f"Layer 1 weights: {model.linear_relu_stack[0].weight}")
#     print(f"Layer 2 weights: {model.linear_relu_stack[2].weight}")
    if i%10 == 0:
        print(f"Min/max y-hat is:{min(y_hat):.2f}, {max(y_hat):.2f}")
        print(f"MSE: {train_accuracy}")

Min/max y-hat is:53.18, 66.42
MSE: 115017.53125
Min/max y-hat is:0.41, 531.43
MSE: 68026.296875
Min/max y-hat is:0.56, 385.01
MSE: 67166.84375
Min/max y-hat is:0.50, 529.34
MSE: 63834.14453125
Min/max y-hat is:0.40, 590.37
MSE: 62119.125
Min/max y-hat is:0.40, 617.15
MSE: 61757.30078125
Min/max y-hat is:0.40, 624.52
MSE: 61600.07421875
Min/max y-hat is:0.40, 627.71
MSE: 61509.25390625
Min/max y-hat is:0.40, 635.25
MSE: 61465.16015625
Min/max y-hat is:0.40, 642.72
MSE: 61415.6796875


In [19]:
top_right = torch.tensor([[1,1]]).float()
bottom_left = torch.tensor([[-1,-1]]).float()
top_left = torch.tensor([[-1,1]]).float()
bottom_right = torch.tensor([[-1,1]]).float()
origin = torch.tensor([[0,0]]).float()

In [20]:
print(f"The prediction in the top right is {model(top_right).item():.2f}.")
print(f"The prediction in the bottom left is {model(bottom_left).item():.2f}.")
print(f"The prediction in the top left is {model(top_left).item():.2f}.")
print(f"The prediction in the bottom right is {model(bottom_right).item():.2f}.")
print(f"The prediction in the origin is {model(origin).item():.2f}.")

The prediction in the top right is 681.46.
The prediction in the bottom left is 0.55.
The prediction in the top left is 25.00.
The prediction in the bottom right is 25.00.
The prediction in the origin is 21.01.


# Problems
1. Requires black box model to be differentiable
2. Requires 70 * 1000 = 70,000 evaluations of expensive target function.

# Next Steps
1. Try using the black box output as the y values and training a small neural net to approximate the black box function. This gets around the differentiability problem.
2. Then, you can perform Bayes Opt in the small "active" subspace.