## Batched model tutorial
In this tutorial we demonstrate that for problems where more than one output is involved (constraints and objectives)
and
you are ok using only perfect samples (no NaNs in any output), a significant speedup can be achieved by using a
batched model. On GPU, this can get you 2-3x speedup.

In [None]:
import numpy as np
import time
from xopt.generators.bayesian.models.standard import BatchedModelConstructor, StandardModelConstructor
from xopt.evaluator import Evaluator
from xopt.generators.bayesian import ExpectedImprovementGenerator
from xopt.numerical_optimizer import LBFGSOptimizer
from xopt.resources.test_functions.rosenbrock import evaluate_rosenbrock
from xopt import Xopt
import matplotlib.pyplot as plt
import pandas as pd
import torch
import threadpoolctl

torch.set_num_threads(1)
threadpoolctl.threadpool_limits(limits=1, user_api="blas")
threadpoolctl.threadpool_limits(limits=1, user_api="openmp")

vocs = {
    "variables": {f"x{i}": [-3,3] for i in range(16)},
    "objectives": {"y": "MINIMIZE"},
    "constraints": {"c1": ["GREATER_THAN", 0.1],
                    "c2": ["LESS_THAN", 3],
                    #"c3": ["GREATER_THAN", 0]
                    },
}

def eval_f(input_dict):
    return {"y": np.sum(np.array([input_dict[f"x{i}"]**2 for i in range(16)])) + np.random.randn()*0.01,
            #"y": evaluate_rosenbrock(input_dict)['y'],
            "y2": input_dict["x0"] + input_dict["x1"],
            "c1": input_dict["x2"] + input_dict["x3"],
            "c2": input_dict["x4"] + input_dict["x5"]
            }

In [None]:
USE_CUDA = True
evaluator = Evaluator(function=eval_f)
generator = ExpectedImprovementGenerator(vocs=vocs,
                                         gp_constructor=StandardModelConstructor(train_method='adam'),
                                         numerical_optimizer=LBFGSOptimizer(n_restarts=5),
                                         use_cuda=USE_CUDA)
X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
generator_batched = ExpectedImprovementGenerator(vocs=vocs,
                                                 gp_constructor=BatchedModelConstructor(train_method='adam'),
                                                 numerical_optimizer=LBFGSOptimizer(n_restarts=5),
                                                 use_cuda=USE_CUDA,
                                                 )
X2 = Xopt(evaluator=evaluator, generator=generator_batched, vocs=vocs)

In [None]:
X.random_evaluate(20);
X2.random_evaluate(20);

## Run the optimization
We run the optimizers side by side to compare speed. In the interest of saving time, we skip 10 points by
 sampling randomly between optimization steps.

In [None]:
history = []
for i in range(50):
    torch.cuda.empty_cache()
    X.random_evaluate(10)

    # sync data
    X2.data = X.data.copy()
    X2.generator.data = X.generator.data.copy()

    t1 = time.perf_counter()
    #X.generator.train_model()
    X.step()
    t2 = time.perf_counter()
    X2.step()
    #X2.generator.train_model()
    t3 = time.perf_counter()
    if i % 10 == 0:
        print(f"Step {i}")
    history.append({'n':len(X.data),
                    'Standard training':X.generator.computation_time['training'].to_numpy()[-1],#t2-t1,
                    'Standard acquisition':X.generator.computation_time['acquisition_optimization'].to_numpy()[-1],
                    'Batched training':X2.generator.computation_time['training'].to_numpy()[-1],#t3-t2
                    'Batched acquisition':X2.generator.computation_time['acquisition_optimization'].to_numpy()[-1],
                    })

## Plot performance
Let's plot the timings.

In [None]:
history_df = pd.DataFrame(history)
fig, ax = plt.subplots(1, 1)
ax.plot(history_df['n'], history_df['Standard training'], label='Standard training')
ax.plot(history_df['n'], history_df['Batched training'], label='Batched training')
ax.set_ylabel('Time (s)')
ax.set_xlabel('Iteration')
ax.legend()
ax.set_title(f'Vars: {len(vocs["variables"])}, Objs: {len(vocs["objectives"])}, Cons: {len(vocs["constraints"])}, GPU: {generator.use_cuda}')

In [None]:
fig, ax = plt.subplots(1, 1)
ax.plot(history_df['n'], history_df['Standard acquisition'], label='Standard acquisition')
ax.plot(history_df['n'], history_df['Batched acquisition'], label='Batched acquisition')
ax.set_ylabel('Time (s)')
ax.set_xlabel('Iteration')
ax.legend()