In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as math
import torch
import test_fn

Branin Function:
\begin{align*}
f&:([-5,10]\times [0,15]) \rightarrow \mathbb{R}\\
f(x)&=a(x_2 - bx_1^2 + cx_1 - r)^2 + s(1-t)cos(x_1) + s
\end{align*}

Recommended values:
\begin{align*}
a &= 1\\
b &= \frac{5.1}{4\pi^2}\\
c &= \frac{5}{\pi}\\
r &= 6\\
s &= 10\\
t &= \frac{1}{8\pi}
\end{align*}

In [72]:
class Ackley():
    def __init__(self, noise_var = 0):
        self.params = {
            'a': 20,
            'b': 0.2,
            'c': 2 * torch.pi
        }

    def evaluate(self, x):
        a = self.params['a']
        b = self.params['b']
        c = self.params['c']
        n = len(x)
        
        first_operand = -a * np.exp(np.sqrt(np.sum(x**2) / n) * -b)
        second_operand = np.exp(np.sum(np.cos(c * x)) / n)

        return first_operand - second_operand + a + np.exp(1)

In [3]:
class Branin():
    '''
    Takes in an n x 2 input matrix where each row is an observation of dimension 2.
    Outputs n x 1 output matrix where each output has dimension 1.
    '''
    
    def __init__(self, noise_var=0):
        self.range = np.array([[-5,10],
                             [0,15]])
        self.param = {
            'a':1,
            'b':5.1/(4*math.pi**2),
            'c':5/math.pi,
            'r':6,
            's':10,
            't':1/(8*math.pi)
        }
        
        self.noise_var = noise_var
        self.input_dim = 2
        self.output_dim = 1

    def scale_domain(self,x):
        # Scaling the domain
        x_copy = np.copy(x)
        if len(x_copy.shape) == 1:
            x_copy = x_copy.reshape((1, x_copy.shape[0]))
        for i in range(len(self.range)):
            x_copy[:, i] = x_copy[:, i] * (self.range[i, 1] - self.range[i, 0]) / 2 + (
                        self.range[i, 1] + self.range[i, 0]) / 2
        return x_copy

    def __evaluate_single(self, x):
        a = self.param['a']
        b = self.param['b']
        c = self.param['c']
        r = self.param['r']
        s = self.param['s']
        t = self.param['t']
        
        f = a*(x[1] - b*x[1]**2 + c*x[0] - r)**2 + s*(1-t)*math.cos(x[1]) + s
        
        return f
    
    def evaluate_torch(self, x):
        a = self.param['a']
        b = self.param['b']
        c = self.param['c']
        r = self.param['r']
        s = self.param['s']
        t = self.param['t']
        
        f = a*(x[:,1] - b*x[:,1]**2 + c*x[:,0] - r)**2 + s*(1-t)*torch.cos(x[:,1]) + s
                
        return f
    
    def evaluate_true(self, x):
        x = x.reshape(-1,self.input_dim)
        
        return np.apply_along_axis(self.__evaluate_single, axis = 1, arr = x)

    def evaluate(self, x):
        true_values = self.evaluate_true(x).reshape(x.shape[0],self.output_dim)
        noise = np.random.normal(0, self.noise_var, size = (x.shape[0],self.output_dim))
        
        return true_values + noise

In [4]:
torch.concat([torch.tensor([]),torch.tensor([5]),torch.tensor([10])])

tensor([ 5., 10.])

# Draw sample inputs

# Train simple MLP to twist inputs into desired space
In this case, we're twisting into (a) a same-dimensional space, (b) a lower dimensional space, and (c) a higher dimensional space, just for context in results.

In [5]:
n = 100
x1_sample = np.random.uniform(low = -5, high = 10, size = n)
x2_sample = np.random.uniform(low = 0, high = 15, size = n)

In [9]:
b = Branin()
X_sample = np.array([x1_sample,x2_sample]).reshape(-1,2)
y_sample = b.evaluate(X_sample)

In [10]:
max_y_sample = torch.tensor(max(y_sample))
min_y_sample = torch.tensor(min(y_sample))

print(f"Max:{max(y_sample)}, Min:{min(y_sample)}")
print("Use for calibrating the actual y-values")

Max:[303.06241092], Min:[6.0910223]
Use for calibrating the actual y-values


In [11]:
torch.tensor(max(y_sample))

tensor([303.0624], dtype=torch.float64)

In [22]:
x1 = torch.rand(n) * 2 - 1
x2 = torch.rand(n) * 2 - 1
X = torch.concat([x1.reshape(-1,1), x2.reshape(-1,1)], dim = 1)
y = torch.tensor(b.evaluate(X))

In [23]:
X = X.float()
y = y.float()

In [24]:
print(f"X shape:{X.shape}. y shape: {y.shape}")

X shape:torch.Size([100, 2]). y shape: torch.Size([100, 1])


In [25]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [26]:
class SimpleNetwork(nn.Module):
    def __init__(self, input_dim, output_dim, fn):
        super(SimpleNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_dim, input_dim*4),
            nn.Tanh(),
            nn.Linear(input_dim*4, output_dim)
        )
        self.fn = (x1 + x2)*(max_y_sample - min_y_sample) + min_y_sample

    def forward(self, x):
        x = self.linear_relu_stack(x)
        x = torch.sum(x, dim = 1)
        return x

In [27]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cpu device


In [28]:
model = SimpleNetwork(input_dim = 2, output_dim = 200, fn = b)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.9)
criterion = torch.nn.MSELoss()
a = Branin()

In [30]:
print(f"Layer 2 weights: {model.linear_relu_stack[2].weight}")

Layer 2 weights: Parameter containing:
tensor([[ 0.2581,  0.1143,  0.0056,  ...,  0.2669, -0.1954, -0.2674],
        [-0.3270,  0.1050,  0.0486,  ..., -0.1244, -0.0149, -0.1125],
        [ 0.0231,  0.1114, -0.3117,  ..., -0.3082, -0.2682,  0.1664],
        ...,
        [ 0.2860,  0.1823, -0.3339,  ..., -0.1168,  0.0413, -0.1010],
        [-0.2245,  0.0146,  0.1288,  ...,  0.1895,  0.0029,  0.2033],
        [-0.2011,  0.1737,  0.3446,  ...,  0.0470, -0.1438, -0.1075]],
       requires_grad=True)


In [49]:
y

tensor([52.4806, 39.9268, 79.5650, 40.1725, 76.7146, 44.1516, 42.6937, 50.1656,
        53.1920, 45.9787, 60.9406, 64.8860, 55.2403, 48.0090, 56.3649, 47.0781,
        44.5381, 79.8559, 42.5877, 44.5614, 46.0385, 44.4320, 62.0268, 55.6006,
        48.5692, 52.1786, 51.3215, 65.1167, 40.7720, 49.6470, 54.2327, 59.1713,
        72.4725, 66.8485, 42.8287, 80.2531, 52.7735, 78.2152, 46.6956, 50.0613,
        47.2606, 70.6202, 59.4537, 52.3826, 65.9901, 50.3221, 38.0817, 51.1881,
        42.6852, 69.6638, 49.2945, 48.1482, 58.2510, 53.2523, 36.5243, 73.8186,
        47.7621, 35.9020, 68.3639, 56.6267, 38.7456, 60.3730, 50.8206, 48.0086,
        76.2703, 53.5668, 51.4371, 59.0544, 55.6461, 56.4417, 41.2829, 48.8292,
        61.0471, 57.8089, 61.9463, 76.7924, 32.8383, 55.7798, 80.6952, 61.6781,
        48.3043, 52.5980, 69.6243, 55.1001, 46.1341, 65.5288, 64.3684, 46.4507,
        49.2094, 45.0830, 56.1902, 60.5274, 34.1262, 46.9553, 38.2774, 39.1461,
        61.1303, 70.4343, 87.3101, 50.83

In [50]:
y_hat

tensor([53.4534, 39.6380, 77.0883, 38.4472, 75.0314, 43.7656, 41.4632, 50.4156,
        53.8903, 45.7611, 62.3025, 66.0893, 56.0212, 48.6027, 57.2163, 47.6397,
        43.8731, 77.6936, 43.2527, 44.3753, 45.7102, 44.3684, 61.4044, 56.3802,
        48.4465, 52.9681, 51.6857, 65.5949, 40.8825, 50.7751, 54.9245, 59.9985,
        72.5483, 67.6029, 43.2685, 77.9744, 53.9752, 76.0348, 47.0282, 50.2037,
        47.8849, 70.2574, 60.4121, 52.8520, 66.8731, 50.8384, 35.9971, 51.8220,
        41.4559, 70.2192, 50.2778, 48.0601, 59.6094, 53.8188, 34.2691, 72.6623,
        47.9803, 34.3597, 68.7236, 57.7402, 37.8101, 59.4896, 51.9812, 48.0214,
        75.1413, 54.8048, 51.8802, 60.0238, 56.4462, 57.3334, 40.6758, 49.9409,
        61.8253, 58.0268, 62.6555, 75.0386, 31.2337, 56.5711, 77.8274, 62.5699,
        49.3454, 53.0842, 70.2157, 56.2807, 45.5975, 65.9606, 64.2595, 46.9015,
        50.2951, 44.3102, 57.4705, 61.5542, 32.3621, 46.5821, 36.9916, 37.2361,
        62.3815, 70.4545, 81.9108, 51.38

In [47]:
for i in range(1000):
    y_hat = model(X)
    y = y.reshape(-1)
    loss = criterion(y_hat,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_accuracy = torch.mean((y - y_hat)**2)
#     print(f"Layer 1 weights: {model.linear_relu_stack[0].weight}")
#     print(f"Layer 2 weights: {model.linear_relu_stack[2].weight}")
    if i%10 == 0:
        print(f"Min/max y-hat is:{min(y_hat):.2f}, {max(y_hat):.2f}")
        print(f"MSE: {train_accuracy}")

Min/max y-hat is:53.59, 55.48
MSE: 141.86679077148438
Min/max y-hat is:52.23, 56.59
MSE: 118.98584747314453
Min/max y-hat is:48.72, 60.68
MSE: 83.42225646972656
Min/max y-hat is:44.81, 64.76
MSE: 53.59394454956055
Min/max y-hat is:41.13, 68.28
MSE: 32.70072937011719
Min/max y-hat is:37.96, 71.21
MSE: 19.50954818725586
Min/max y-hat is:35.38, 73.54
MSE: 11.925878524780273
Min/max y-hat is:33.39, 75.31
MSE: 7.9656901359558105
Min/max y-hat is:31.94, 76.58
MSE: 6.084530830383301
Min/max y-hat is:30.96, 77.46
MSE: 5.2604522705078125
Min/max y-hat is:30.33, 78.04
MSE: 4.912637233734131
Min/max y-hat is:29.96, 78.41
MSE: 4.755030155181885
Min/max y-hat is:29.75, 78.64
MSE: 4.664678573608398
Min/max y-hat is:29.66, 78.78
MSE: 4.59536600112915
Min/max y-hat is:29.62, 78.87
MSE: 4.532023906707764
Min/max y-hat is:29.62, 78.94
MSE: 4.470486640930176
Min/max y-hat is:29.64, 78.99
MSE: 4.409852981567383
Min/max y-hat is:29.66, 79.04
MSE: 4.350011348724365
Min/max y-hat is:29.69, 79.08
MSE: 4.29095

In [70]:
x_samp = torch.tensor([[-1,-1],[0,0],[0,1],[1,0],[1,1]]).float()
y_hat = torch.tensor(a.evaluate(x_samp)).reshape(-1,1)
y = model(x_samp).reshape(-1,1)

In [71]:
torch.concat([x_samp, y_hat, y], dim = 1)

tensor([[-1.0000, -1.0000, 91.2392, 83.5865],
        [ 0.0000,  0.0000, 55.6021, 56.6748],
        [ 0.0000,  1.0000, 41.4966, 42.1808],
        [ 1.0000,  0.0000, 39.0365, 37.1191],
        [ 1.0000,  1.0000, 27.7029, 25.2506]], dtype=torch.float64,
       grad_fn=<CatBackward0>)

# Ackley

In [161]:
class SimpleNetwork2(nn.Module):
    def __init__(self, input_dim, output_dim, fn):
        super(SimpleNetwork2, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_dim, input_dim*2),
            nn.ReLU(),
            nn.Linear(input_dim*2, input_dim*4),
            nn.ReLU(),
            nn.Linear(input_dim*4, output_dim)
        )

    def forward(self, x):
        x = self.linear_relu_stack(x)
        x = torch.sum(x, dim = 1)
        return x

In [162]:
fn = test_fn.Ackley()
model = SimpleNetwork2(input_dim = 2, output_dim = 200, fn = fn)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.9)
criterion = torch.nn.MSELoss()

In [163]:
n = 1000
x = torch.randn((n, 2))

In [169]:
for i in range(400):
    y_hat = model(x)
    with torch.no_grad():
        y = fn.evaluate(model.linear_relu_stack(x))
    loss = criterion(y_hat,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_accuracy = torch.mean((y - y_hat)**2)
#     print(f"Layer 1 weights: {model.linear_relu_stack[0].weight}")
#     print(f"Layer 2 weights: {model.linear_relu_stack[2].weight}")
    if i%100 == 0:
        print(f"Min/max y-hat is:{min(y_hat):.2f}, {max(y_hat):.2f}")
        print(f"MSE: {train_accuracy}")

Min/max y-hat is:1.44, 4.25
MSE: 0.1248452439904213
Min/max y-hat is:1.32, 4.09
MSE: 0.09115035086870193
Min/max y-hat is:1.25, 3.97
MSE: 0.07242860645055771
Min/max y-hat is:1.20, 3.86
MSE: 0.06087846681475639


In [170]:
x_samp = torch.tensor([[-1,-1],[-0.8,-0.8],[0,0],[0,1],[1,0],[1,1]]).float()
with torch.no_grad():
    y = fn.evaluate(model.linear_relu_stack(x_samp)).reshape(-1,1)
y_hat = model(x_samp).reshape(-1,1)

In [171]:
with torch.no_grad():
    results = torch.concat([x_samp, y_hat, y], dim = 1)
(results*100).round()/100

tensor([[-1.0000, -1.0000,  2.6300,  2.4200],
        [-0.8000, -0.8000,  2.6600,  2.4000],
        [ 0.0000,  0.0000,  2.3000,  2.3300],
        [ 0.0000,  1.0000,  2.2100,  2.3300],
        [ 1.0000,  0.0000,  2.2700,  2.3300],
        [ 1.0000,  1.0000,  2.1500,  2.3500]])

# Problems
1. Requires black box model to be differentiable
2. Requires 70 * 1000 = 70,000 evaluations of expensive target function.

# Next Steps
1. Try using the black box output as the y values and training a small neural net to approximate the black box function. This gets around the differentiability problem.
2. Then, you can perform Bayes Opt in the small "active" subspace.