In [1]:
import os

import wandb

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

plt.style.use("ggplot")
plt.rcParams["figure.figsize"] = (8, 6)
plt.rcParams["image.cmap"] = "Blues"

import seaborn as sns

from tqdm.notebook import tqdm

In [2]:
data_names = [
    # "square1000",
    "fashion",
    "fatemah",
    "bmg",
]

strategies = [
    "random",
    "one-step",
    "pessimistic_0.25_ECI",
    "pessimistic_0.5_ECI",
    "pessimistic_0.75_ECI",
    "pessimistic_3.00_0.75_SELECT",
    "pessimistic_3.00_0.5_SELECT",
    "pessimistic_3.00_0.25_SELECT",
    "pessimistic_1.00_0.75_SELECT",
    "pessimistic_1.00_0.5_SELECT",
    "pessimistic_1.00_0.25_SELECT",
    "pessimistic_0.30_0.75_SELECT",
    "pessimistic_0.30_0.5_SELECT",
    "pessimistic_0.30_0.25_SELECT",
    "pessimistic_0.10_0.75_SELECT",
    "pessimistic_0.10_0.5_SELECT",
    "pessimistic_0.10_0.25_SELECT",
    "VAS",
]

budget = 200
batch_size = 5
num_repeats = 10

num_iterations = budget // batch_size

In [3]:
def remove_nan(array):
    mask = ~np.isnan(array)
    return array[mask]


linear_utilities = np.zeros(
    (len(data_names), len(strategies), num_repeats, num_iterations)
)
vs_utilities = np.zeros(
    (len(data_names), len(strategies), num_repeats, num_iterations)
)

entity = "quanng"
project = "diverse-search"

api = wandb.Api()
runs = api.runs(
    path=f"{entity}/{project}",
    order="created_at",
    filters={"state": "finished"},
)

In [4]:
pbar = tqdm(total=linear_utilities.size)

for data_i, data_name in enumerate(data_names):
    for strategy_i, strategy in enumerate(strategies):
        run_name = f"{data_name}_{budget}_{batch_size}_{strategy}"
        this_runs = [run for run in runs if run.name == run_name]
        
        for repeat_i in range(num_repeats):
            for run in this_runs:
                if run.config["seed"] == repeat_i:
                    break
            
            history = run.history()
            
            linear_utilities[data_i, strategy_i, repeat_i, :] = remove_nan(
                history["linear_utility"]
            )
            vs_utilities[data_i, strategy_i, repeat_i, :] = remove_nan(
                history["vs_utility"]
            )
            
            pbar.update(num_iterations)

  0%|          | 0/21600 [00:00<?, ?it/s]

In [5]:
columns = ["data", "policy", "repeat", "utility"]

linear_utility_df = pd.DataFrame(columns=columns)
vs_utility_df = pd.DataFrame(columns=columns)

for data_i, data_name in enumerate(data_names):
    for strategy_i, strategy in enumerate(strategies):
        for repeat_i in range(num_repeats):
            linear_utility_df = pd.concat(
                [
                    linear_utility_df,
                    pd.DataFrame(
                        data={
                            "data": [data_name] * num_iterations,
                            "policy": [strategy] * num_iterations,
                            "repeat": [repeat_i] * num_iterations,
                            "utility": linear_utilities[data_i, strategy_i, repeat_i, -1],
                        }
                    ),
                ]
            )
            
            vs_utility_df = pd.concat(
                [
                    vs_utility_df,
                    pd.DataFrame(
                        data={
                            "data": [data_name] * num_iterations,
                            "policy": [strategy] * num_iterations,
                            "repeat": [repeat_i] * num_iterations,
                            "utility": vs_utilities[data_i, strategy_i, repeat_i, -1],
                        }
                    ),
                ]
            )

  linear_utility_df = pd.concat(
  vs_utility_df = pd.concat(


In [6]:
with pd.option_context('display.max_rows', None):
    print(vs_utility_df.groupby(["data", "policy"]).agg({"utility": ["mean", "sem"]}))

                                         utility          
                                            mean       sem
data    policy                                            
bmg     VAS                           134.944280  0.185584
        one-step                       26.491559  0.609766
        pessimistic_0.10_0.25_SELECT   26.677261  0.535461
        pessimistic_0.10_0.5_SELECT    26.677261  0.535461
        pessimistic_0.10_0.75_SELECT   46.807733  0.817292
        pessimistic_0.25_ECI           22.474559  0.062681
        pessimistic_0.30_0.25_SELECT   27.331192  0.511988
        pessimistic_0.30_0.5_SELECT    30.521359  0.497998
        pessimistic_0.30_0.75_SELECT   53.245371  0.887356
        pessimistic_0.5_ECI            24.918417  0.000000
        pessimistic_0.75_ECI            5.522247  0.249679
        pessimistic_1.00_0.25_SELECT   45.326617  0.795966
        pessimistic_1.00_0.5_SELECT    50.497917  0.813246
        pessimistic_1.00_0.75_SELECT   62.024769  0.8501

In [7]:
linear_utility_df.groupby(["data", "policy"]).agg({"utility": ["mean", "sem"]})

Unnamed: 0_level_0,Unnamed: 1_level_0,utility,utility
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,sem
data,policy,Unnamed: 2_level_2,Unnamed: 3_level_2
bmg,VAS,154.7,0.196524
bmg,one-step,187.0,0.481747
bmg,pessimistic_0.10_0.25_SELECT,185.6,0.426091
bmg,pessimistic_0.10_0.5_SELECT,185.6,0.426091
bmg,pessimistic_0.10_0.75_SELECT,175.3,0.371982
...,...,...,...
square1000,pessimistic_1.00_0.5_SELECT,169.5,0.253546
square1000,pessimistic_1.00_0.75_SELECT,164.3,0.222821
square1000,pessimistic_3.00_0.25_SELECT,129.9,0.193179
square1000,pessimistic_3.00_0.5_SELECT,129.9,0.157439
