# TLDR

This notebook explores running preferential BO on the shapes dataset. 

## Summary of the PBO procedure:
- create outcome function (Image) and utility function (gradient aware area)
- generate initial data of (image 1, image 2, preference)
- train a utility model
- iteratively use EUBO to generate new comparisons, expand data, refit the embedding and utility model; generate image that maximizes utility; plot the utility of the candidate over comparisons


`PboExperiment` class is implemented here https://github.com/zyyjjj/low_rank_BOPE/blob/main/low_rank_BOPE/pbo_class.py

In [4]:
%load_ext autoreload
%autoreload 2

import itertools
import pickle
import re
import warnings
from collections import defaultdict
from dataclasses import asdict, dataclass
from typing import Dict, List, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# import seaborn as sns
import torch 
from torch import Tensor

warnings.filterwarnings("ignore")

import sys
sys.path.append('..')
sys.path.append('/home/yz685/low_rank_BOPE')
sys.path.append('/home/yz685/low_rank_BOPE/low_rank_BOPE')

from botorch.models import PairwiseGP, PairwiseLaplaceMarginalLogLikelihood
from botorch.models.transforms.input import ChainedInputTransform
from botorch.fit import fit_gpytorch_mll
from botorch.acquisition.monte_carlo import qSimpleRegret
from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption
from botorch.optim.optimize import optimize_acqf
from botorch.sampling.normal import SobolQMCNormalSampler



from low_rank_BOPE.bope_class import BopeExperiment
from low_rank_BOPE.pbo_class import PboExperiment
from low_rank_BOPE.src.models import make_modified_kernel
from low_rank_BOPE.test_problems.shapes import AreaUtil, LargestRectangleUtil, GradientAwareAreaUtil, Image, Bars
from low_rank_BOPE.src.transforms import InputCenter, PCAInputTransform, get_latent_ineq_constraints, compute_weights
from low_rank_BOPE.src.pref_learning_helpers import gen_comps, gen_exp_cand



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Running experiments

In [5]:
   
N_PIXELS = 8

problem = Image(num_pixels = N_PIXELS)
# problem = Bars(num_pixels = N_PIXELS)

util_func = GradientAwareAreaUtil(
    penalty_param=0.5, 
    image_shape=(N_PIXELS, N_PIXELS)
)

methods = [
    "uw_pca", # unweighted PCA
    "w_pca_true", # PCA with true-util-value based rank weights
    "st" # indep GP
]

output_dir = "/home/yz685/low_rank_BOPE/experiments/shapes_pbo/"

exp = PboExperiment(
    problem = problem,
    util_func=util_func,
    methods=methods,
    trial_idx=0,
    output_path=output_dir + "8by8_rectangle_gradient_aware_area",
    n_check_post_mean = 13,
    pca_var_threshold = 0.9,
    initial_pref_batch_size = 32,
)
# exp.run_PBO_loop()

PboExperiment settings:  {'pca_var_threshold': 0.9, 'initial_pref_batch_size': 32, 'n_check_post_mean': 13, 'every_n_comps': 3, 'verbose': True, 'dtype': torch.float64, 'num_restarts': 20, 'raw_samples': 128, 'batch_limit': 4, 'sampler_num_outcome_samples': 64, 'maxiter': 1000, 'latent_dim': None, 'min_stdv': 100000, 'true_axes': None}
self.methods,  ['uw_pca', 'w_pca_true', 'st']


In [6]:
exp.run_PBO_loop()

===== Running PE using uw_pca =====
util model accuracy 0.6880000233650208
Running 1/3 preference learning
Pref model fitting successful
===== Running PE using w_pca_true =====


In [3]:
# if you want to examine a single method, can also do the following
exp.generate_initial_data(n=16)
exp.run_PE_stage('uw_pca')

===== Running PE using uw_pca =====
util model accuracy 0.7879999876022339
uw_pca True
Running 1/3 preference learning
Pref model fitting successful
Running 2/3 preference learning
Pref model fitting successful
Running 3/3 preference learning
Pref model fitting successful
util model accuracy 0.7699999809265137
uw_pca True
Running 1/3 preference learning
Pref model fitting successful
Running 2/3 preference learning
Pref model fitting successful
Running 3/3 preference learning
Pref model fitting successful
util model accuracy 0.7580000162124634
uw_pca True
Running 1/3 preference learning
Pref model fitting successful


KeyboardInterrupt: 

## Loading outputs and plotting

In [None]:
outputs = defaultdict(lambda: defaultdict(dict))
problems = ["8by8_rectangle_gradient_aware_area"]

for problem in problems:

    results_folder = output_dir + problem + '/'

    for trial in trial_range:

        try:

            outputs[problem]['within_session_results'][trial] = \
                list(itertools.chain.from_iterable(torch.load(results_folder + f'PE_session_results_trial={trial}.th').values()))

        except:
            print(f"{problem} Trial {trial} not finished yet, skipping for now")
            continue

In [None]:
colors_dict = {
    "uw_pca": "tab:red", 
    "st": "tab:blue", 
    "w_pca_true": "tab:purple", 
}
labels_dict = {
    "st": "Indep", "uw_pca": "PCA",  
    "w_pca_true": "weighted PCA",
}

In [None]:
from matplotlib.ticker import MaxNLocator

def plot_candidate_over_comps(problem, methods = ["st", "uw_pca"], metric = "util"):

    f, axs = plt.subplots(1, 1, figsize=(8, 6))

    x_jitter_dict = {
        "uw_pca": 0.1, 
        "st": 0, 
        "w_pca_true": 0.4, "w_pca_est": 0.5, 
    }

    
    within_session_results = [res 
                              for i in outputs[problem]['within_session_results'].keys() 
                              for res in outputs[problem]["within_session_results"][i]]

    within_df = pd.DataFrame(within_session_results)

    # within_df["pe_strategy"] = within_df["pe_strategy"].str.replace("EUBO-zeta", r"$EUBO-\\zeta$")
    # print(within_df.groupby(["n_comps", "method"]).count())

    within_df = (
        within_df.groupby(["n_comps", "method"])
        .agg({metric: ["mean", "sem"]})
        .droplevel(level=0, axis=1)
        .reset_index()
    )

    print(within_df)

    for name, group in within_df.groupby(["method"]):
        print(name)
        if name in methods:

            jitter = x_jitter_dict[group["method"].values[0]]
            x_jittered = [x_ + jitter for x_ in group["n_comps"].values]
            print(name, 'n_comps: ', group["n_comps"].values)

            axs.errorbar(
                x=x_jittered,
                y=group["mean"].values,
                yerr=group["sem"],
                label=labels_dict[name],
                linewidth=1.5,
                capsize=3,
                alpha=0.6,
                color=colors_dict[name],
            )

            # ax1.legend(title="Transform + PE Strategy", bbox_to_anchor=(1, 0.8))

            axs.set_xlabel("Number of comparisons")
            axs.set_title(
                problem, fontsize=16
            )
            axs.xaxis.set_major_locator(MaxNLocator(integer=True))

    if metric == "util":
        axs.set_ylabel("True utility of estimated \n utility-maximizing outcome")
    elif metric == "util_model_acc":
        axs.set_ylabel("Utility model accuracy")
    axs.legend(loc="lower left", bbox_to_anchor=(-0.2, -0.2), ncol=5, fontsize=15)

In [None]:
# to plot max posterior mean utility over comparisons
plot_candidate_over_comps_multiple(
    "8by8_rectangle_gradient_aware_area",
    methods = ["st", "uw_pca", "w_pca_true"],
    metric = "util"
)

In [None]:
# to plot utility model accuracy over comparisons
plot_candidate_over_comps(
    "8by8_rectangle_gradient_aware_area",
    methods = ["st", "uw_pca", "w_pca_true"],
    metric = "util_model_acc"
)

In [None]:
# plotter for multiple experiments

def plot_candidate_over_comps_multiple(
    problem_l, 
    methods = ["st", "uw_pca"],
    metric = "util"):

    f, axs = plt.subplots(1, len(problem_l), figsize=(10, 3))
    print(axs.shape)

    x_jitter_dict = {
        "uw_pca": 0.1, 
        "st": 0, 
        "w_pca_true": 0.4, "w_pca_est": 0.5, 
    }

    
    for j in range(len(problem_l)):
        problem = problem_l[j]
        input_dim = 4
        num_pixels, _ = problem.split("by")
        outcome_dim = int(num_pixels) ** 2

        within_session_results = [res 
                                for i in outputs[problem]['within_session_results'].keys() 
                                for res in outputs[problem]["within_session_results"][i]]

        within_df = pd.DataFrame(within_session_results)

        within_df = (
            within_df.groupby(["n_comps", "method"])
            .agg({metric: ["mean", "sem"]})
            .droplevel(level=0, axis=1)
            .reset_index()
        )

        for name, group in within_df.groupby(["method"]):
            if name in methods:

                jitter = x_jitter_dict[group["method"].values[0]]
                x_jittered = [x_ + jitter for x_ in group["n_comps"].values]

                axs[j].errorbar(
                    x=x_jittered,
                    y=group["mean"].values,
                    # yerr=1.96 * group["sem"],
                    yerr = group["sem"],
                    label=labels_dict[name],
                    linewidth=1.5,
                    capsize=3,
                    alpha=0.6,
                    color=colors_dict[name],
                )

                axs[j].set_xlabel("Number of comparisons", fontsize=12)
                axs[j].set_title(
                    f"{problem}\n d={input_dim}, k={outcome_dim}", fontsize=12.5
                )

    if metric == "util":
        axs[0].set_ylabel("True utility of estimated \n utility-maximizing outcome")
    elif metric == "util_model_acc":
        axs[0].set_ylabel("Utility model accuracy")
    axs[0].legend(bbox_to_anchor=(-0.05, -0.4), loc="lower left", ncol=5, fontsize=12)
