# TLDR

The goal of this notebook is to understand how standardizing (or not) affects the outcome model fitting performance

In [3]:
%load_ext autoreload
%autoreload 2

import os, sys
# file_dir = os.path.dirname(__file__)
# sys.path.append(file_dir)
sys.path.append('/home/yz685/low_rank_BOPE')
sys.path.append('/home/yz685/low_rank_BOPE/low_rank_BOPE')
import warnings
import math
import matplotlib.pyplot as plt
import numpy as np
import scipy.linalg
import torch
from torch import Tensor
from tqdm.auto import trange

from botorch.models import SingleTaskGP
from botorch.models.transforms import Standardize
from botorch.utils.sampling import draw_sobol_samples

from gpytorch import ExactMarginalLogLikelihood
from gpytorch.constraints import GreaterThan
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.priors import GammaPrior
from low_rank_BOPE.bope_class import BopeExperiment
from low_rank_BOPE.bope_class_retraining import RetrainingBopeExperiment
from low_rank_BOPE.test_problems.shapes import AreaUtil, Image
from low_rank_BOPE.test_problems.synthetic_problem import make_controlled_coeffs, make_problem, generate_principal_axes, PCATestProblem, LinearUtil
from low_rank_BOPE.src.diagnostics import check_outcome_model_fit, check_util_model_fit
from low_rank_BOPE.src.transforms import fit_pca
from low_rank_BOPE.test_problems.car_problems import problem_setup_augmented


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## First try on synthetic low-rank problems

In [7]:
full_axes = generate_principal_axes(
    output_dim=20,
    num_axes=20,
    dtype=torch.double,
    seed=0
)
true_axes = full_axes[:3]
scaling = [1,2,3]

In [8]:

# default config
config = {
    "input_dim": 2,
    "outcome_dim": 20,
    "PC_noise_level": 0,
    "noise_std": 0.01,
    "num_initial_samples": 32,
    "true_axes": true_axes,
    "PC_lengthscales": [0.5]*3,
    "PC_scaling_factors": scaling,
    "dtype": torch.double,
    "problem_seed": 1234,
}

np.random.seed(config["problem_seed"])
torch.manual_seed(config["problem_seed"])
torch.autograd.set_detect_anomaly(True)

initial_X = torch.randn(
    (config["num_initial_samples"], config["input_dim"]), dtype=config["dtype"])

obj_indices = list(range(config["outcome_dim"]))
cons_indices = []

if len(config['true_axes'].shape) == 1:
    config['true_axes'] = config['true_axes'].unsqueeze(0)



In [9]:
# @Jerry: this part is very slow
problem = PCATestProblem(
    opt_config=(obj_indices, cons_indices),
    initial_X=initial_X,
    bounds=Tensor([[0, 1]] * config["input_dim"]),
    true_axes=config['true_axes'],
    noise_std=config["noise_std"],
    PC_lengthscales=Tensor(config["PC_lengthscales"]),
    PC_scaling_factors=Tensor(config["PC_scaling_factors"]),
    dtype=torch.double,
)

  "bounds", torch.tensor(self._bounds, dtype=torch.float).transpose(-1, -2)


In [7]:
problem

PCATestProblem(
  (gen_model_PC): SingleTaskGP(
    (likelihood): GaussianLikelihood(
      (noise_covar): HomoskedasticNoise(
        (noise_prior): GammaPrior()
        (raw_noise_constraint): GreaterThan(1.000E-04)
      )
    )
    (mean_module): ConstantMean()
    (covar_module): ScaleKernel(
      (base_kernel): MaternKernel(
        (lengthscale_prior): GammaPrior()
        (raw_lengthscale_constraint): Positive()
      )
      (outputscale_prior): GammaPrior()
      (raw_outputscale_constraint): Positive()
    )
  )
)

In [10]:
test_X = torch.randn((100, 2), dtype=torch.double)
test_Y = problem(test_X)


In [11]:
beta = make_controlled_coeffs(
    full_axes=full_axes,
    latent_dim=3,
    alpha=1,
    n_reps = 1,
    dtype=torch.double
).transpose(-2, -1)

util_func = LinearUtil(beta=beta)

In [12]:
exp = RetrainingBopeExperiment(
    problem = problem,
    util_func = util_func,
    methods = ["uwpca"],
    pe_strategies=["EUBO-zeta"],
    trial_idx=0,
    output_path='/home/yz685/low_rank_BOPE/experiments/synthetic/'
)

BopeExperiment settings:  {'pca_var_threshold': 0.95, 'initial_experimentation_batch': 16, 'n_check_post_mean': 20, 'every_n_comps': 3, 'verbose': True, 'dtype': torch.float64, 'num_restarts': 20, 'raw_samples': 128, 'batch_limit': 4, 'sampler_num_outcome_samples': 64, 'maxiter': 1000, 'initial_latent_dim': None, 'min_stdv': 100000, 'true_axes': None, 'standardize': True}
self.methods,  ['uwpca']
True utility landscape:  (-0.8557261109931735, 1.902402839473068, -5.314487096447218, 1.5261655025016472, tensor([-2.4954, -0.2233,  0.8608], dtype=torch.float64))


In [13]:
exp.standardize

True

In [14]:
exp.generate_random_experiment_data(32)
exp.compute_projections("uwpca", "EUBO-zeta")

  pca_axes = torch.tensor(torch.transpose(V[:, : num_axes], -2, -1), dtype = torch.double)


In [15]:
exp.fit_outcome_model("uwpca","EUBO-zeta")

Fitting outcome model using uwpca and EUBO-zeta


In [None]:
# the smaller the better the fit is
check_outcome_model_fit(
    outcome_model = exp.outcome_models_dict[("uwpca","EUBO-zeta")],
    problem=problem,
    n_test=1024
)

tensor(0.1569, dtype=torch.float64, grad_fn=<DivBackward0>)

In [None]:
# now do not standardize and check model fit again

exp.standardize=False
exp.compute_projections("uwpca", "EUBO-zeta")
exp.fit_outcome_model("uwpca","EUBO-zeta")
check_outcome_model_fit(
    outcome_model = exp.outcome_models_dict[("uwpca","EUBO-zeta")],
    problem=problem,
    n_test=1024
)

# if not standardizing, model fit is much better

  pca_axes = torch.tensor(torch.transpose(V[:, : num_axes], -2, -1), dtype = torch.double)


Fitting outcome model using uwpca and EUBO-zeta


tensor(0.0035, dtype=torch.float64, grad_fn=<DivBackward0>)

## Further try on car problems

In [4]:
from low_rank_BOPE.test_problems.car_problems import problem_setup_augmented

# original 9D outcome raised to 45D
input_dim, outcome_dim, problem, _, util_func, _, _ = problem_setup_augmented(
    "carcabdesign_7d9d_piecewiselinear_45", augmented_dims_noise=0.01
)

output_path = f"/home/yz685/low_rank_BOPE/experiments/cars/test/"

exp = RetrainingBopeExperiment(
    problem, 
    util_func, 
    methods = ["uwpca"],
    pe_strategies=["EUBO-zeta"],
    trial_idx=0,
    output_path=output_path
)

carcabdesign_7d9d_piecewiselinear_45, noisy: False, noise_std: 0
Generated outcome projection matrix:  tensor([[-0.0883,  0.3420,  0.4112,  1.0051, -0.1117, -0.5988, -0.0982, -0.3511,
          0.7209, -0.2169, -1.0427,  0.2448, -0.9887, -0.5196,  0.6585,  0.6406,
          0.7839,  0.6573, -1.6348, -0.6108,  0.6003, -0.8769,  0.9649, -0.1926,
          0.3745,  0.0933,  0.3025,  1.3122, -0.5610,  0.1900, -0.1660,  0.9107,
         -1.7856, -0.9354,  0.8843,  1.5818,  0.1096,  0.1010, -0.1656,  0.3211,
          0.1205,  0.5147, -0.0914,  2.3645, -0.4103],
        [ 0.5155, -1.1730, -0.7426,  0.5920,  0.0815, -0.4143,  1.1540,  0.1156,
          0.9368,  0.7956, -0.0108, -0.0245, -0.3893, -0.4196, -0.4357,  0.3146,
         -1.7189,  0.9152, -0.2367,  2.4031, -0.7140, -2.5363,  0.3875, -1.8925,
         -1.0344, -0.2548, -0.3988, -1.6678,  0.5487,  1.0110,  0.4060, -0.1338,
          0.5337, -1.1412, -1.7431,  1.1547, -0.8211, -1.1050,  0.9516,  0.6811,
         -1.9544,  0.3775,  0.10

  "bounds", torch.tensor(self._bounds, dtype=torch.float).transpose(-1, -2)


True utility landscape:  (-41.2332054560695, 2.2286759493579233, -48.29236791005685, -34.45658870292537, tensor([-42.7027, -41.1230, -39.6802], dtype=torch.float64))


In [5]:
exp.generate_random_experiment_data(32)
exp.compute_projections("uwpca", "EUBO-zeta")
exp.fit_outcome_model("uwpca","EUBO-zeta")
check_outcome_model_fit(
    outcome_model = exp.outcome_models_dict[("uwpca","EUBO-zeta")],
    problem=problem,
    n_test=1024
)



  pca_axes = torch.tensor(torch.transpose(V[:, : num_axes], -2, -1), dtype = torch.double)


Fitting outcome model using uwpca and EUBO-zeta


tensor(14.6516, dtype=torch.float64, grad_fn=<DivBackward0>)

In [6]:
exp.standardize=False
exp.compute_projections("uwpca", "EUBO-zeta")
exp.fit_outcome_model("uwpca","EUBO-zeta")
check_outcome_model_fit(
    outcome_model = exp.outcome_models_dict[("uwpca","EUBO-zeta")],
    problem=problem,
    n_test=1024
)

  pca_axes = torch.tensor(torch.transpose(V[:, : num_axes], -2, -1), dtype = torch.double)


Fitting outcome model using uwpca and EUBO-zeta


tensor(16.0258, dtype=torch.float64, grad_fn=<DivBackward0>)

In [4]:
# original 9D outcome raised to 72D
input_dim, outcome_dim, problem, _, util_func, _, _ = problem_setup_augmented(
    "carcabdesign_7d9d_piecewiselinear_72", augmented_dims_noise=0.01
)

output_path = f"/home/yz685/low_rank_BOPE/experiments/cars/test/"

exp = RetrainingBopeExperiment(
    problem, 
    util_func, 
    methods = ["uwpca"],
    pe_strategies=["EUBO-zeta"],
    trial_idx=0,
    output_path=output_path
)

carcabdesign_7d9d_piecewiselinear_72, noisy: False, noise_std: 0
Generated outcome projection matrix:  tensor([[-0.0883,  0.3420,  0.4112,  1.0051, -0.1117, -0.5988, -0.0982, -0.3511,
          0.7209, -0.2169, -1.0427,  0.2448, -0.9887, -0.5196,  0.6585,  0.6406,
          0.7839,  0.6573, -1.6348, -0.6108,  0.6003, -0.8769,  0.9649, -0.1926,
          0.3745,  0.0933,  0.3025,  1.3122, -0.5610,  0.1900, -0.1660,  0.9107,
         -1.7856, -0.9354,  0.8843,  1.5818,  0.1096,  0.1010, -0.1656,  0.3211,
          0.1205,  0.5147, -0.0914,  2.3645, -0.4103,  0.5155, -1.1730, -0.7426,
          0.5920,  0.0815, -0.4143,  1.1540,  0.1156,  0.9368,  0.7956, -0.0108,
         -0.0245, -0.3893, -0.4196, -0.4357,  0.3146, -1.7189,  0.9152, -0.2367,
          2.4031, -0.7140, -2.5363,  0.3875, -1.8925, -1.0344, -0.2548, -0.3988],
        [-1.6678,  0.5487,  1.0110,  0.4060, -0.1338,  0.5337, -1.1412, -1.7431,
          1.1547, -0.8211, -1.1050,  0.9516,  0.6811, -1.9544,  0.3775,  0.1086,
     

  "bounds", torch.tensor(self._bounds, dtype=torch.float).transpose(-1, -2)


True utility landscape:  (-61.96517649620232, 7.729437115445321, -81.407098742132, -45.24203179810365, tensor([-68.2561, -62.5050, -55.6492], dtype=torch.float64))


In [5]:
exp.generate_random_experiment_data(32)
exp.compute_projections("uwpca", "EUBO-zeta")
exp.fit_outcome_model("uwpca","EUBO-zeta")
check_outcome_model_fit(
    outcome_model = exp.outcome_models_dict[("uwpca","EUBO-zeta")],
    problem=problem,
    n_test=1024
)

  pca_axes = torch.tensor(torch.transpose(V[:, : num_axes], -2, -1), dtype = torch.double)


Fitting outcome model using uwpca and EUBO-zeta


tensor(12.0892, dtype=torch.float64, grad_fn=<DivBackward0>)

In [6]:
exp.standardize=False
exp.compute_projections("uwpca", "EUBO-zeta")
exp.fit_outcome_model("uwpca","EUBO-zeta")
check_outcome_model_fit(
    outcome_model = exp.outcome_models_dict[("uwpca","EUBO-zeta")],
    problem=problem,
    n_test=1024
)

  pca_axes = torch.tensor(torch.transpose(V[:, : num_axes], -2, -1), dtype = torch.double)


Fitting outcome model using uwpca and EUBO-zeta


tensor(11.4817, dtype=torch.float64, grad_fn=<DivBackward0>)