## Benchmarking TuRBO Bayesian Optimization
In this tutorial we demonstrate the use of Xopt to preform Bayesian Optimization on
the 20D Ackley test function.

## Define the test problem
Here we define a simple optimization problem, where we attempt to minimize the sin
function in the domian [0,2*pi]. Note that the function used to evaluate the
objective function takes a dictionary as input and returns a dictionary as the output.

In [1]:
from xopt.resources.test_functions.sphere_20 import vocs, evaluate_sphere

## Create Xopt objects
Create the evaluator to evaluate our test function and create a generator that uses
the Upper Confidence Bound acqusition function to perform Bayesian Optimization.

In [2]:
from xopt.evaluator import Evaluator
from xopt.generators.bayesian import UpperConfidenceBoundGenerator
from xopt import Xopt

evaluator = Evaluator(function=evaluate_sphere)
options = UpperConfidenceBoundGenerator.default_options()
options.n_initial = 5
options.optim.use_turbo = True
generator = UpperConfidenceBoundGenerator(vocs, options)

X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)

## Generate and evaluate initial points
To begin optimization, we must generate some random initial data points. The first call
to `X.step()` will generate and evaluate a number of randomly points specified by the
 generator. Note that if we add data to xopt before calling `X.step()` by assigning
 the data to `X.data`, calls to `X.step()` will ignore the random generation and
 proceed to generating points via Bayesian optimization.

In [3]:
# evaluate random initial points
X.step()

# inspect the gathered data
X.data

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x13,x14,x15,x16,x17,x18,x19,f,xopt_runtime,xopt_error
1,0.422018,-0.026224,0.718189,0.310911,0.707492,0.434676,-0.849791,0.970433,-0.413621,0.716674,...,0.824777,-0.94475,-0.944922,-0.199756,-0.873053,0.873657,0.46668,9.041967,0.000166,False
2,-0.882145,-0.562799,0.733046,0.64942,0.283789,0.704892,0.618188,-0.841374,0.417714,-0.108155,...,0.325305,-0.264091,0.458591,0.101729,0.163075,-0.542988,-0.334861,5.862751,3.5e-05,False
3,0.721131,0.314384,-0.289785,-0.852108,-0.844845,-0.384828,0.813833,0.312714,0.71393,-0.797754,...,-0.379077,-0.007176,0.454549,0.444502,-0.237513,0.553099,-0.925864,7.220641,2.5e-05,False
4,0.980294,-0.875082,0.657125,-0.737197,0.140548,-0.459261,0.165984,-0.273056,0.499388,-0.598485,...,-0.289586,-0.020493,0.948895,-0.658809,0.005012,0.604305,-0.185447,5.603789,2.5e-05,False
5,-0.279773,-0.499483,0.918083,0.83857,-0.271987,0.706489,-0.546202,0.32451,0.836784,0.98514,...,-0.323156,-0.645561,-0.708913,-0.155306,-0.549053,-0.482431,0.728909,7.1246204,2.4e-05,False


In [4]:
# determine trust region from gathered data
generator.train_model()
generator.trust_region

tensor([[-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
          1.,  1.,  1.,  1.,  1.,  1.]], dtype=torch.float64)

## Do bayesian optimization steps
To perform optimization we simply call `X.step()` in a loop. This allows us to do
intermediate tasks in between optimization steps, such as examining the model and
acquisition function at each step (as we demonstrate here).

In [5]:
import torch

print(X.generator.turbo_state.failure_tolerance)
for i in range(200):
    print(f"{i}: length:{X.generator.turbo_state.length}, "
          f"sc:{X.generator.turbo_state.success_counter}, "
          f"fc:{X.generator.turbo_state.failure_counter},"
          f"best_val: {X.generator.turbo_state.best_value}"
          )
    # do the optimization step
    X.step()


20
0: length:0.5, sc:1, fc:0,best_val: 7.12462043762207
1: length:0.5, sc:2, fc:0,best_val: 7.12462043762207
2: length:0.5, sc:3, fc:0,best_val: 6.596367835998535
3: length:0.5, sc:4, fc:0,best_val: 3.1890828609466553
4: length:0.5, sc:0, fc:1,best_val: 3.1890828609466553
5: length:0.5, sc:0, fc:2,best_val: 3.1890828609466553
6: length:0.5, sc:0, fc:3,best_val: 3.1890828609466553
7: length:0.5, sc:0, fc:4,best_val: 3.1890828609466553
8: length:0.5, sc:0, fc:5,best_val: 3.1890828609466553
9: length:0.5, sc:0, fc:6,best_val: 3.1890828609466553
10: length:0.5, sc:0, fc:7,best_val: 3.1890828609466553
11: length:0.5, sc:1, fc:0,best_val: 2.817741870880127
12: length:0.5, sc:0, fc:1,best_val: 2.817741870880127
13: length:0.5, sc:0, fc:2,best_val: 2.817741870880127
14: length:0.5, sc:0, fc:3,best_val: 2.817741870880127
15: length:0.5, sc:0, fc:4,best_val: 2.817741870880127
16: length:0.5, sc:0, fc:5,best_val: 2.817741870880127
17: length:0.5, sc:0, fc:6,best_val: 2.817741870880127
18: length:

In [6]:
# access the collected data
X.data

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x13,x14,x15,x16,x17,x18,x19,f,xopt_runtime,xopt_error
1,0.422018,-0.026224,0.718189,0.310911,0.707492,0.434676,-0.849791,0.970433,-0.413621,0.716674,...,0.824777,-0.944750,-0.944922,-0.199756,-0.873053,0.873657,0.466680,9.041967,0.000166,False
2,-0.882145,-0.562799,0.733046,0.649420,0.283789,0.704892,0.618188,-0.841374,0.417714,-0.108155,...,0.325305,-0.264091,0.458591,0.101729,0.163075,-0.542988,-0.334861,5.862751,0.000035,False
3,0.721131,0.314384,-0.289785,-0.852108,-0.844845,-0.384828,0.813833,0.312714,0.713930,-0.797754,...,-0.379077,-0.007176,0.454549,0.444502,-0.237513,0.553099,-0.925864,7.220641,0.000025,False
4,0.980294,-0.875082,0.657125,-0.737197,0.140548,-0.459261,0.165984,-0.273056,0.499388,-0.598485,...,-0.289586,-0.020493,0.948895,-0.658809,0.005012,0.604305,-0.185447,5.603789,0.000025,False
5,-0.279773,-0.499483,0.918083,0.838570,-0.271987,0.706489,-0.546202,0.324510,0.836784,0.985140,...,-0.323156,-0.645561,-0.708913,-0.155306,-0.549053,-0.482431,0.728909,7.1246204,0.000024,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,-0.022770,0.010683,-0.029797,-0.007162,-0.024295,-0.008045,0.049794,0.012742,-0.005660,-0.028956,...,0.014557,0.009588,-0.024531,0.023701,0.010039,-0.031757,-0.002574,0.010350179,0.000152,False
202,-0.022540,-0.022878,0.003643,-0.007380,-0.009710,-0.008285,0.049588,-0.018762,0.028848,-0.007280,...,0.014784,-0.028681,-0.024309,0.023450,0.010280,-0.031526,-0.024425,0.0111426,0.000072,False
203,0.008578,0.012142,-0.029341,-0.007557,-0.023872,-0.030718,0.039926,-0.018570,0.028628,-0.028548,...,0.014977,0.009129,0.005933,0.023250,0.010498,0.000389,-0.024229,0.00869617,0.000050,False
204,-0.022153,-0.022446,0.003245,-0.007724,-0.023675,-0.008701,0.019620,0.012140,0.028431,-0.028373,...,0.015177,0.008901,0.005748,0.023044,0.010694,-0.031128,-0.024057,0.0067548505,0.000047,False


## Getting the trust region

In [7]:
X.generator.trust_region

tensor([[-2.2022e-02, -2.2299e-02, -2.7696e-02,  5.9213e-03, -8.2131e-03,
          1.5310e-02, -2.8104e-02, -2.3810e-02, -9.9007e-03,  1.0835e-02,
         -3.0994e-02, -2.3929e-02, -2.9012e-02, -3.6670e-02, -2.3539e-02,
         -4.1223e-02,  1.9745e-02, -1.8258e-02, -4.8507e-03, -2.8248e-02],
        [ 8.2594e-03,  1.1771e-02,  3.1203e-03,  3.6883e-02,  2.3715e-02,
          4.6081e-02,  8.7390e-03,  5.6220e-03,  2.2900e-02,  4.3179e-02,
          5.7626e-05,  5.9942e-03,  3.1050e-03, -7.8491e-03,  7.5748e-03,
         -8.8412e-03,  4.9104e-02,  1.2008e-02,  2.8288e-02,  5.2454e-04]],
       dtype=torch.float64)

## Customizing optimization
Each generator has a set of options that can be modified to effect optimization behavior

In [8]:
X.generator.options.dict()

{'optim': {'num_restarts': 20,
  'raw_samples': 20,
  'sequential': True,
  'max_travel_distances': None,
  'use_turbo': True},
 'acq': {'proximal_lengthscales': None,
  'use_transformed_proximal_weights': True,
  'monte_carlo_samples': 128,
  'beta': 2.0},
 'model': {'name': 'standard',
  'custom_constructor': None,
  'use_low_noise_prior': True,
  'covar_modules': {},
  'mean_modules': {}},
 'n_initial': 5,
 'use_cuda': False}

In [9]:
# example: add a Gamma(1.0,10.0) prior to the noise hyperparameter to reduce model noise
# (good for optimizing noise-free simulations)
X.generator.options.model.use_low_noise_prior = True

In [10]:
list(X.generator.model.named_parameters())

[('models.0.likelihood.noise_covar.raw_noise',
  Parameter containing:
  tensor([-20.7809], dtype=torch.float64, requires_grad=True)),
 ('models.0.mean_module.raw_constant',
  Parameter containing:
  tensor(2.3985, dtype=torch.float64, requires_grad=True)),
 ('models.0.covar_module.raw_outputscale',
  Parameter containing:
  tensor(-1.5826, dtype=torch.float64, requires_grad=True)),
 ('models.0.covar_module.base_kernel.raw_lengthscale',
  Parameter containing:
  tensor([[0.4325, 0.6182, 0.4595, 0.4668, 0.5147, 0.4572, 0.7468, 0.3890, 0.5573,
           0.5351, 0.4713, 0.4142, 0.5240, 0.3572, 0.4744, 0.5370, 0.3852, 0.4317,
           0.5737, 0.3547]], dtype=torch.float64, requires_grad=True))]

## Compare to normal UCB

In [11]:
evaluator = Evaluator(function=evaluate_sphere)
options = UpperConfidenceBoundGenerator.default_options()
options.n_initial = 5
options.optim.use_turbo = False
generator = UpperConfidenceBoundGenerator(vocs, options)

X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)


In [12]:
X.step()

for i in range(200):
    print(f"{X.vocs.objective_data(X.data).min().to_numpy()}")
    # do the optimization step
    X.step()

[4.70230961]
[4.70230961]
[4.70230961]
[4.70230961]
[4.70230961]
[4.70230961]
[4.70230961]
[4.70230961]
[4.70230961]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.57788754]
[4.24313593]
[4.24313593]
[4.24313593]
[4.24313593]
[4.24313593]
[4.24313593]
[4.24313593]
[4.24313593]
[4.11975861]
[4.11975861]
[4.11975861]
[4.11975861]
[4.11975861]
[4.11975861]
[4.11975861]
[4.11975861]
[4.11975861]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.09187317]
[3.08557582]

In [13]:
list(X.generator.model.named_parameters())

[('models.0.likelihood.noise_covar.raw_noise',
  Parameter containing:
  tensor([-18.6872], dtype=torch.float64, requires_grad=True)),
 ('models.0.mean_module.raw_constant',
  Parameter containing:
  tensor(4.2172, dtype=torch.float64, requires_grad=True)),
 ('models.0.covar_module.raw_outputscale',
  Parameter containing:
  tensor(0.1444, dtype=torch.float64, requires_grad=True)),
 ('models.0.covar_module.base_kernel.raw_lengthscale',
  Parameter containing:
  tensor([[0.5190, 0.5267, 0.5178, 0.6047, 0.4322, 0.5229, 0.4061, 0.4835, 0.4625,
           0.3935, 0.4572, 0.2893, 0.4270, 0.4062, 0.4094, 0.4952, 0.3312, 0.4645,
           0.5173, 0.4439]], dtype=torch.float64, requires_grad=True))]