<a href="https://colab.research.google.com/github/utkarsh5k/HighDimBOInLearntSubspace/blob/main/High_Dimensional_BO_GP_vs_Real_Eval_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# High Dimensional Bayesian Optimization using learnt active subspaces

In [None]:
!pip install torch
!pip install gpytorch
!pip install botorch
!pip install matplotlib

Collecting gpytorch
  Downloading gpytorch-1.5.1-py2.py3-none-any.whl (503 kB)
[K     |████████████████████████████████| 503 kB 5.2 MB/s 
Installing collected packages: gpytorch
Successfully installed gpytorch-1.5.1
Collecting botorch
  Downloading botorch-0.5.1-py3-none-any.whl (486 kB)
[K     |████████████████████████████████| 486 kB 5.0 MB/s 
Installing collected packages: botorch
Successfully installed botorch-0.5.1


In [None]:
import numpy as np 
from botorch.models import SingleTaskGP
from botorch.fit import fit_gpytorch_model
from botorch.utils import standardize
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.acquisition import UpperConfidenceBound, qExpectedImprovement
from botorch.optim import optimize_acqf
from torch import nn as nn
import torch 
from torch.autograd import Variable
from torch.quasirandom import SobolEngine


In [None]:
def ackley(x): 
  """
  Calculates ackley function value for arbitrary number of dimensions
  """
  a = 20 
  b = 0.2 
  c = 2 * np.pi 

  n = len(x)
  first_operand = -a * np.exp(np.sqrt(np.sum(x**2) / n) * -b)
  second_operand = np.exp(np.sum(np.cos(c * x)) / n)

  return first_operand - second_operand + a + np.exp(1)

offset = [np.random.uniform(low = -10, high = 10) for _ in range(200)]

def ackley_prime(x): 
  assert len(x) == 200
  return ackley(x + offset)



In [None]:
class TransformerNetwork(nn.Module):
  def __init__(self, original_dim, target_dim):
        super().__init__()
        self.target_dims = target_dim
        self.orig_dims = original_dim
        self.flatten = nn.Flatten()
        self.linear_nn = nn.Sequential(
            nn.Linear(target_dim, target_dim, bias = False),
            nn.Relu(),
            nn.Linear(target_dim, original_dim, bias = False),
        )

        self.optimizer = None 

        self.alpha = 1
        self.beta = 1

        # [([x_1, x_2...], y)....]
        self.ackley_evaluations = []
        self.loss_per_step = []
        #self.bootstrap_low_dim_space(200)
        self.init_optimizer()
        self.num_evaluations = 0 
        
  def init_optimizer(self):
    learning_rate = 1e-3
    self.optimizer = torch.optim.SGD(self.linear_nn.parameters(), lr=learning_rate)

  def forward(self, x):
    logits = self.linear_nn(x)
    return logits

  def get_transformation_loss(self, points):
    '''
    Gets average euclidean distance between original point space
    vs same points reconstructed through one forward and reverse 
    pass through the neural network
    '''
    forward_reverse_transformation = self.reverse_pass(self.forward(points).detach().numpy())
    diff_mat = points.detach().numpy() - forward_reverse_transformation
    return np.sqrt(np.sum(diff_mat**2)) / points.shape[0]  

  def reverse_pass(self, x):     
    weight_copy = self.linear_nn[0].weight.detach().numpy()

    low_dim = x 
    for i in range(len(self.linear_nn) - 1, -1, -1):
      reverse_transformation = np.linalg.pinv(self.linear_nn[i].weight.detach().numpy().T)
      low_dim = np.matmul(low_dim, reverse_transformation)
    return low_dim
  
  def train_loop(self):
    for i in range(100):
      candidate, prediction = self.run_bayes_op()
      loss = Variable(self.get_full_loss(candidate, prediction), requires_grad = True)
      self.optimizer.zero_grad()
      loss.backward()
      self.optimizer.step()

  def project_high_dim_history_to_low(self): 
    eval_x = np.array([x[0] for x in self.ackley_evaluations])
    low_dim_eval_x = self.reverse_pass(eval_x)
    return torch.tensor(low_dim_eval_x), torch.tensor([[x[1] * -1] for x in self.ackley_evaluations])

  def get_evals_for_low_dim(self, low_dim): 
    eval_val = []
    eval_x = []
    for pt in low_dim: 
      x = self.forward(torch.tensor(pt.detach().numpy(), dtype = torch.float)).detach().numpy()
      eval_x.append(pt.detach().numpy())   
      eval_val.append(ackley_prime(x))
    #print(eval_x)
    return torch.tensor(eval_x, dtype = torch.float), torch.tensor([[x * -1] for x in eval_val])

  def get_full_loss(self, point, prediction): 
    high_dim_point = self.forward(point).detach().numpy()
    high_dim_eval = ackley_prime(high_dim_point)
    self.ackley_evaluations.append((point.detach().numpy(), high_dim_eval))
    loss = self.alpha * (high_dim_eval - prediction)
    loss = loss + self.beta * (self.get_transformation_loss(point)) 
    print(f'High dimensional eval: {high_dim_eval} Prediction: {prediction} loss = {loss}')
    return torch.tensor([loss])

  def bootstrap_low_dim_space(self, num_points = 1): 
    '''
    We need to have at least one sample in the low dim space to run BayesOP
    '''
    evals = []

    points = self.get_initial_points(self.orig_dims, num_points).detach().numpy()
    #points = self.latin_hypercube(num_points, self.orig_dims)

    for point in points:
      #sample = np.array([np.random.uniform(low = -32.768, high = 32.768) for _ in range(self.orig_dims)])
      eval = ackley_prime(point)
      evals.append((point, eval))
    
    return evals

  def get_initial_points(self, dim, n_pts, seed=0):
    sobol = SobolEngine(dimension=dim, scramble=True, seed=seed)
    X_init = sobol.draw(n=n_pts).to(dtype=float)
    return X_init
    
  def latin_hypercube(self, n_pts, dim):
    """Basic Latin hypercube implementation with center perturbation."""
    X = np.zeros((n_pts, dim))
    centers = (1.0 + 2.0 * np.arange(0.0, n_pts)) / float(2 * n_pts)
    for i in range(dim):  # Shuffle the center locataions for each dimension.
        X[:, i] = centers[np.random.permutation(n_pts)]

    # Add some perturbations within each box
    pert = np.random.uniform(-1.0, 1.0, (n_pts, dim)) / float(2 * n_pts)
    X += pert
    return X

  def run_bayes_op(self):   
    init_points = self.get_initial_points(self.target_dims, 50)
    train_X, train_Y = self.get_evals_for_low_dim(init_points)

    gp = SingleTaskGP(train_X, train_Y)
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
    fit_gpytorch_model(mll)

    # Optimize acquisition function 
    #UCB = UpperConfidenceBound(gp, beta=0.1)
    ei = qExpectedImprovement(gp, train_Y.max(), maximize=True)
    #bounds = torch.stack([torch.tensor([0] * self.target_dims), torch.tensor([1] * self.target_dims)])
    bounds = torch.stack([torch.tensor([-32] * self.target_dims, dtype = torch.float), torch.tensor([32] * self.target_dims, dtype = torch.float)])
    candidate, acq_value = optimize_acqf(ei, bounds=bounds, q=1, num_restarts=10, raw_samples=512)
    prediction = gp.posterior(candidate).mean.detach().numpy()[0, 0]
    best_f = train_Y.min()
    print(f"Expected Improvement: {ei(train_X)}")
    return candidate[0], prediction * -1



In [None]:
network = TransformerNetwork(original_dim = 200, target_dim = 10)
#network = SVDBO(original_dim = 200, target_dim = 20)
network.train_loop()
  

Expected Improvement: tensor([0.0025], grad_fn=<MeanBackward1>)
High dimensional eval: 19.832565109352952 Prediction: 15.450535774230957 loss = 4.38203021177648
Expected Improvement: tensor([0.0025], grad_fn=<MeanBackward1>)
High dimensional eval: 19.845438830105053 Prediction: 15.450535774230957 loss = 4.394903953037868
Expected Improvement: tensor([0.0025], grad_fn=<MeanBackward1>)
High dimensional eval: 20.425362422916443 Prediction: 15.450535774230957 loss = 4.974827297254161
Expected Improvement: tensor([0.0025], grad_fn=<MeanBackward1>)
High dimensional eval: 20.222247072325395 Prediction: 15.450535774230957 loss = 4.771712473923567
Expected Improvement: tensor([0.0025], grad_fn=<MeanBackward1>)
High dimensional eval: 20.576781762284163 Prediction: 15.450535774230957 loss = 5.126246381669553
Expected Improvement: tensor([0.0026], grad_fn=<MeanBackward1>)
High dimensional eval: 20.123439157152404 Prediction: 15.450535774230957 loss = 4.672904427687297


KeyboardInterrupt: ignored

In [None]:
evaluations = [x[1] for x in network.ackley_evaluations]

In [None]:
monotonic_evaluations = []
min = evaluations[0]
for x in evaluations: 
  if x < min: 
    min = x
  
  monotonic_evaluations.append(min)

import matplotlib.pyplot as plt 

x_axis = [i for i in range(1, len(evaluations) + 1)]
plt.plot(x_axis, monotonic_evaluations)
plt.xlabel("Number of evaluations")
plt.ylabel("200D Ackley Function Value")
plt.title("Number of evaluations vs smallest 200D Ackley Fn Value")

from google.colab import files
plt.savefig("200DAckley.png")
files.download("200DAckley.png") 

In [None]:
class SVDBO(): 
  def __init__(self, original_dim, target_dim): 
    self.target_dims = target_dim
    self.orig_dims = original_dim

    self.alpha = 1
    self.beta = 1

    # [([x_1, x_2...], y)....]
    self.ackley_evaluations = []
    self.num_evaluations = 0 
    self.initial_points  = self.latin_hypercube(self.orig_dims, self.orig_dims)
    self.evaluations = []
    self.get_evaluations()
    self.points_mean = None
    _, _, self.right = self.perform_svd()
    self.subspace_axes = self.right[0: self.target_dims]
  
  def latin_hypercube(self, n_pts, dim):
    """Basic Latin hypercube implementation with center perturbation."""
    X = np.zeros((n_pts, dim))
    centers = (1.0 + 2.0 * np.arange(0.0, n_pts)) / float(2 * n_pts)
    for i in range(dim):  # Shuffle the center locataions for each dimension.
        X[:, i] = centers[np.random.permutation(n_pts)]

    # Add some perturbations within each box
    pert = np.random.uniform(-1.0, 1.0, (n_pts, dim)) / float(2 * n_pts)
    X += pert
    return X

  def project_points_into_subspace(self, points):
    return points @ self.subspace_axes.T
  
  def get_reverse_projection(self, points):
    return (points @ self.subspace_axes) + self.points_mean

  def perform_svd(self):
    self.points_mean = self.initial_points.mean(axis = 0)
    return np.linalg.svd(self.initial_points - self.points_mean)

  def get_evaluations(self):
    for pt in self.initial_points:
      self.evaluations.append(ackley_prime(pt)) 

  def get_evals_for_low_dim(self):
    low_dim_points = self.project_points_into_subspace(self.initial_points)
    return torch.tensor(low_dim_points, dtype = torch.float), torch.tensor([[x * -1] for x in self.evaluations])

  def train_loop(self):
    for _ in range(500):
      candidate, prediction = self.run_bayes_op()
      high_dim = self.get_reverse_projection(np.array([candidate.detach().numpy()]))[0]
      self.initial_points = np.vstack([self.initial_points, high_dim])
      eval = ackley_prime(high_dim)
      self.evaluations.append(eval)
      print(f"High Dim Actual: {eval} GP Predicted: {prediction}")    

  def run_bayes_op(self):   
    train_X, train_Y = self.get_evals_for_low_dim()

    gp = SingleTaskGP(train_X, train_Y)
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
    fit_gpytorch_model(mll)

    # Optimize acquisition function 
    #UCB = UpperConfidenceBound(gp, beta=0.1)
    ei = qExpectedImprovement(gp, train_Y.max(), maximize=True)
    #bounds = torch.stack([torch.tensor([0] * self.target_dims), torch.tensor([1] * self.target_dims)])
    bounds = torch.stack([torch.tensor([-32] * self.target_dims, dtype = torch.float), torch.tensor([32] * self.target_dims, dtype = torch.float)])
    candidate, acq_value = optimize_acqf(ei, bounds=bounds, q=1, num_restarts=10, raw_samples=512)
    prediction = gp.posterior(candidate).mean.detach().numpy()[0, 0]
    return candidate[0], prediction * -1 

