In [2]:
import os
import math
import random
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 11})
import matplotlib.ticker as ticker
# PyTorch
import torch
# GPyTorch
from gpytorch.mlls import ExactMarginalLogLikelihood
# BOTorch
import botorch
from botorch.models import SingleTaskGP
from botorch.models.transforms import Normalize, Standardize
from botorch.fit import fit_gpytorch_mll
from botorch.acquisition import LogExpectedImprovement
from botorch.optim import optimize_acqf

In [8]:
################################################################################
def check_epochs(df, n, batch_size=128, steps=6000, drop_last=True):
    num_batches = math.floor(n/batch_size) if drop_last else math.ceil(n/batch_size)
    epochs = int(steps/num_batches)
    return df.shape[0] == epochs

def print_job(alpha, beta, dataset, dataset_dir, experiments_dir, lr_0, 
              method, model, model_arch, n, prior_dir, prior_type, random_state, 
              save, tune):
    model_name = f"{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}"
    if os.path.exists(f"{experiments_dir}/{model_name}.csv"):
        temp_df = pd.read_csv(f"{experiments_dir}/{model_name}.csv")
        n_train = n - int((1/5) * n) if tune else n
        if check_epochs(temp_df, n_train, batch_size=min(128, n_train), steps=6000, drop_last=True):
            return

    command = (
        f"python ../src/main_image_classifiers.py "
        f"--alpha={alpha} "
        "--batch_size=128 "
        f"--beta={beta} "
        f"--dataset=\"{dataset}\" "
        f"--dataset_dir=\"{dataset_dir}\" "
        f"--experiments_dir=\"{experiments_dir}\" "
        f"--lr_0={lr_0} "
        f"--method=\"{method}\" "
        f"--model=\"{model}\" "
        f"--model_arch=\"{model_arch}\" "
        f"--model_name=\"{model_name}\" "
        f"--n={n} "
        "--num_workers=0 "
        f"--prior_dir=\"{prior_dir}\" "
        f"--prior_type=\"{prior_type}\" "
        f"--random_state={random_state} "
        f"{'--save' if save else ''}"
        f"{'--tune' if tune else ''}"
    )
    
    print(f"    '{command}'")
    
def get_runtime(alpha, beta, experiments_dir, lr_0, model, n, random_state, tune):
    model_name = f"{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}"
    if not os.path.exists(f"{experiments_dir}/{model_name}.csv"):
        return 0.0
        #raise FileNotFoundError(f"Expected file not found: {experiments_dir}/{model_name}.csv")
    df = pd.read_csv(f"{experiments_dir}/{model_name}.csv")
    n_train = n - int((1/5) * n) if tune else n
    if not check_epochs(df, n_train, batch_size=min(128, n_train), steps=6000, drop_last=True):
        return 0.0
        #raise RuntimeError(f"Run incomplete: {model_name} did not run for the specified number of epochs")
    return df["train_sec/epoch"].sum()

def get_val_or_test_acc(alpha, beta, experiments_dir, lr_0, model, n, random_state, tune):
    model_name = f"{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}"
    if not os.path.exists(f"{experiments_dir}/{model_name}.csv"):
        return 0.0
        #raise FileNotFoundError(f"Expected file not found: {experiments_dir}/{model_name}.csv")
    df = pd.read_csv(f"{experiments_dir}/{model_name}.csv")
    n_train = n - int((1/5) * n) if tune else n
    if not check_epochs(df, n_train, batch_size=min(128, n_train), steps=6000, drop_last=True):
        return 0.0
        #raise RuntimeError(f"Run incomplete: {model_name} did not run for the specified number of epochs")
    return df["val_or_test_acc"].values[-1]
    
def get_val_or_test_nll(alpha, beta, experiments_dir, lr_0, model, n, random_state, tune):
    model_name = f"{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}"
    if not os.path.exists(f"{experiments_dir}/{model_name}.csv"):
        return float("inf")
        #raise FileNotFoundError(f"Expected file not found: {experiments_dir}/{model_name}.csv")
    df = pd.read_csv(f"{experiments_dir}/{model_name}.csv")
    n_train = n - int((1/5) * n) if tune else n
    if not check_epochs(df, n_train, batch_size=min(128, n_train), steps=6000, drop_last=True):
        return float("inf")
        #raise RuntimeError(f"Run incomplete: {model_name} did not run for the specified number of epochs")
    return df["val_or_test_nll"].values[-1]

def get_candidate(train_X, train_Y, seed):
    
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    
    gp = SingleTaskGP(
        train_X=train_X,
        train_Y=train_Y,
        input_transform=Normalize(d=3),
        outcome_transform=Standardize(m=1),
    )
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
    fit_gpytorch_mll(mll)

    logEI = LogExpectedImprovement(model=gp, best_f=train_Y.max())

    bounds = torch.stack([torch.zeros(3), torch.ones(3)]).to(torch.double)
    candidate, acq_value = optimize_acqf(
      logEI, bounds=bounds, q=1, num_restarts=5, raw_samples=20,
    )
    
    return candidate.detach()

In [9]:
# DONE:

# CIFAR-10 n_iters = 41 tuned
# Flower-102 n_iters = 41 tuned
# Pet-37 n_iters = 41 tuned

# CIFAR-10 n_iters = 35 retrained
# Flower-102 n_iters = 35 retrained
# Pet-37 n_iters = 35 retrained

# TODO:

# CIFAR-10 n_iters = 42 tuned
# Flower-102 n_iters = 42 tuned
# Pet-37 n_iters = 42 tuned

# CIFAR-10 n_iters = 36 retrained
# Flower-102 n_iters = 36 retrained
# Pet-37 n_iters = 36 retrained

In [15]:
dataset = "CIFAR-10"
dataset_dir = "/cluster/tufts/hugheslab/eharve06/CIFAR-10"
model = "l2-sp"
method = "MAP"
model_arch = "ConvNeXt-Tiny"
ns = [100, 1000, 10000, 50000]
prior_dir = "/cluster/tufts/hugheslab/eharve06/convnext_tiny_torchvision"
prior_type = "convnext_tiny_torchvision"
random_states = [1001, 2001, 3001]
retrained_experiments_dir = "/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_CIFAR-10_ConvNeXt-Tiny_BO"
tuned_experiments_dir = "/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_CIFAR-10_ConvNeXt-Tiny_BO"

n_iters = 0
seeds = [0, 1, 2, 3, 4]

bounds = torch.tensor([[-6, -6, -4], [-2, -2, -1]], dtype=torch.double)

columns = ["alpha", "beta", "lr_0", "n", "n_iter", "random_state", "runtime", "seed", "val_acc", "val_nll", "test_acc", "test_nll"]
cifar10_bo_df = pd.DataFrame(columns=columns)

for n, random_state, seed in itertools.product(ns, random_states, seeds):

    gen = torch.Generator()
    gen.manual_seed(seed)

    train_X = torch.rand(size=(1, 3,), generator=gen, dtype=torch.double)
    train_X_bounded = (bounds[1] - bounds[0]) * train_X + bounds[0]
    
    alpha, beta, lr_0 = 10**train_X_bounded[0]
    print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, False, True)

    train_Y = torch.tensor([[-get_val_or_test_nll(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True)] for x in train_X_bounded], dtype=torch.float64)
    
    alpha_star, beta_star, lr_0_star = 10**train_X_bounded[torch.argmax(train_Y)]
    print_job(alpha_star, beta_star, dataset, dataset_dir, retrained_experiments_dir, lr_0_star, method, model, model_arch, n, prior_dir, prior_type, random_state, True, False)
    
    runtime = 0.0
    runtime = get_runtime(alpha, beta, tuned_experiments_dir, model, lr_0, n, random_state, True)
    runtime += get_runtime(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
    
    val_acc = get_val_or_test_acc(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
    val_nll = get_val_or_test_nll(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
    test_acc = get_val_or_test_acc(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
    test_nll = get_val_or_test_nll(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)

    row = [alpha.item(), beta.item(), lr_0.item(), n, 0, random_state, runtime, seed, val_acc, val_nll, test_acc, test_nll]
    cifar10_bo_df.loc[len(cifar10_bo_df)] = row
    
    for i in range(1, n_iters+1):

        candidate = get_candidate(train_X, train_Y, seed)
        candidate_bounded = (bounds[1] - bounds[0]) * candidate + bounds[0]
        train_X = torch.cat([train_X, candidate])
        train_X_bounded = (bounds[1] - bounds[0]) * train_X + bounds[0]
                
        alpha, beta, lr_0 = 10**candidate_bounded[0]        
        print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, False, True)

        train_Y = torch.tensor([[-get_val_or_test_nll(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True)] for x in train_X_bounded], dtype=torch.float64)

        alpha_star, beta_star, lr_0_star = 10**train_X_bounded[torch.argmax(train_Y)]
        #print_job(alpha_star, beta_star, dataset, dataset_dir, retrained_experiments_dir, lr_0_star, method, model, model_arch, n, prior_dir, prior_type, random_state, True, False)
        
        runtime = 0.0
        runtime = sum([get_runtime(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True) for x in train_X_bounded])
        runtime += get_runtime(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
        
        val_acc = get_val_or_test_acc(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
        val_nll = get_val_or_test_nll(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
        test_acc = get_val_or_test_acc(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
        test_nll = get_val_or_test_nll(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)

        row = [alpha.item(), beta.item(), lr_0.item(), n, i, random_state, runtime, seed, val_acc, val_nll, test_acc, test_nll]
        cifar10_bo_df.loc[len(cifar10_bo_df)] = row

cifar10_bo_df.head(100)

    'python ../src/main_image_classifiers.py --alpha=0.00758947976208385 --batch_size=128 --beta=0.0006780776932355295 --dataset='CIFAR-10' --dataset_dir='/cluster/tufts/hugheslab/eharve06/CIFAR-10' --experiments_dir='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/AAA' --lr_0='0.0023886297077207034' --method='MAP' --model='l2-sp' --model_arch='ConvNeXt-Tiny' --model_name='l2-sp_alpha=0.00758947976208385_beta=0.0006780776932355295_lr_0=0.0023886297077207034_n=100_random_state=1001' --n=100 --num_workers=0 --prior_dir='/cluster/tufts/hugheslab/eharve06/convnext_tiny_torchvision' --prior_type='convnext_tiny_torchvision' --random_state=1001 --tune'
    'python ../src/main_image_classifiers.py --alpha=1.7547413041177135e-06 --batch_size=128 --beta=7.91075667332751e-06 --dataset='CIFAR-10' --dataset_dir='/cluster/tufts/hugheslab/eharve06/CIFAR-10' --experiments_dir='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/AAA' --lr_0='0.0005043796336040412' --me

    'python ../src/main_image_classifiers.py --alpha=1.3690924723771312e-06 --batch_size=128 --beta=1.4026874356562238e-05 --dataset='CIFAR-10' --dataset_dir='/cluster/tufts/hugheslab/eharve06/CIFAR-10' --experiments_dir='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/AAA' --lr_0='0.020832665132000876' --method='MAP' --model='l2-sp' --model_arch='ConvNeXt-Tiny' --model_name='l2-sp_alpha=1.3690924723771312e-06_beta=1.4026874356562238e-05_lr_0=0.020832665132000876_n=100_random_state=3001' --n=100 --num_workers=0 --prior_dir='/cluster/tufts/hugheslab/eharve06/convnext_tiny_torchvision' --prior_type='convnext_tiny_torchvision' --random_state=3001 --tune'
    'python ../src/main_image_classifiers.py --alpha=8.099114510269596e-05 --batch_size=128 --beta=0.0008445374360730293 --dataset='CIFAR-10' --dataset_dir='/cluster/tufts/hugheslab/eharve06/CIFAR-10' --experiments_dir='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/AAA' --lr_0='0.0001488233620888951

    'python ../src/main_image_classifiers.py --alpha=0.004679965619116805 --batch_size=128 --beta=2.4415231675522605e-06 --dataset='CIFAR-10' --dataset_dir='/cluster/tufts/hugheslab/eharve06/CIFAR-10' --experiments_dir='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/AAA' --lr_0='0.01337376014945696' --method='MAP' --model='l2-sp' --model_arch='ConvNeXt-Tiny' --model_name='l2-sp_alpha=0.004679965619116805_beta=2.4415231675522605e-06_lr_0=0.01337376014945696_n=10000_random_state=3001' --n=10000 --num_workers=0 --prior_dir='/cluster/tufts/hugheslab/eharve06/convnext_tiny_torchvision' --prior_type='convnext_tiny_torchvision' --random_state=3001 --tune'
    'python ../src/main_image_classifiers.py --alpha=1.3690924723771312e-06 --batch_size=128 --beta=1.4026874356562238e-05 --dataset='CIFAR-10' --dataset_dir='/cluster/tufts/hugheslab/eharve06/CIFAR-10' --experiments_dir='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/AAA' --lr_0='0.020832665132000876'

Unnamed: 0,alpha,beta,lr_0,n,n_iter,random_state,runtime,seed,val_acc,val_nll,test_acc,test_nll
0,0.007589,0.000678,0.002389,100.0,0.0,1001.0,3291.86097,0.0,0.0,inf,0.6895,1.007312
1,2e-06,8e-06,0.000504,100.0,0.0,1001.0,3343.366074,1.0,0.0,inf,0.6395,1.109125
2,0.00468,2e-06,0.013374,100.0,0.0,1001.0,3278.860939,2.0,0.0,inf,0.7951,0.700389
3,1e-06,1.4e-05,0.020833,100.0,0.0,1001.0,3272.87138,3.0,0.0,inf,0.8095,0.719501
4,8.1e-05,0.000845,0.000149,100.0,0.0,1001.0,3273.628606,4.0,0.0,inf,0.608,1.173087
5,0.007589,0.000678,0.002389,100.0,0.0,2001.0,3230.555144,0.0,0.0,inf,0.709,1.062701
6,2e-06,8e-06,0.000504,100.0,0.0,2001.0,3268.283947,1.0,0.0,inf,0.6696,1.086542
7,0.00468,2e-06,0.013374,100.0,0.0,2001.0,3291.831187,2.0,0.0,inf,0.8064,0.686987
8,1e-06,1.4e-05,0.020833,100.0,0.0,2001.0,3281.11144,3.0,0.0,inf,0.8151,0.768214
9,8.1e-05,0.000845,0.000149,100.0,0.0,2001.0,3243.486655,4.0,0.0,inf,0.6548,1.003984


In [41]:
#cifar10_bo_df.to_csv("CIFAR-10_ConvNeXt-Tiny_BO.csv", index=False)
cifar10_bo_df = pd.read_csv("CIFAR-10_ConvNeXt-Tiny_BO.csv")
cifar10_bo_df.head(100)

Unnamed: 0,alpha,beta,lr_0,n,n_iter,random_state,runtime,seed,val_acc,val_nll,test_acc,test_nll
0,0.007589,0.000678,0.002389,100.0,0.0,1001.0,3291.860970,0.0,0.60,1.291354,0.6895,1.007312
1,0.000001,0.000001,0.100000,100.0,1.0,1001.0,9627.154016,0.0,0.40,3.193286,0.6895,1.007312
2,0.004565,0.001518,0.000627,100.0,2.0,1001.0,12253.339415,0.0,0.70,1.114461,0.6500,1.101206
3,0.000287,0.010000,0.002248,100.0,3.0,1001.0,14865.472126,0.0,0.60,1.209142,0.6500,1.101206
4,0.010000,0.010000,0.000100,100.0,4.0,1001.0,17441.893066,0.0,0.65,1.216636,0.6500,1.101206
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.000187,0.000001,0.000223,100.0,23.0,1001.0,67689.889091,2.0,0.75,1.029704,0.8047,0.775284
96,0.000006,0.000001,0.000445,100.0,24.0,1001.0,70440.697596,2.0,0.75,1.008025,0.8047,0.775284
97,0.000281,0.000030,0.045352,100.0,25.0,1001.0,73163.069124,2.0,0.80,0.691157,0.8047,0.775284
98,0.000088,0.000769,0.000430,100.0,26.0,1001.0,75813.926749,2.0,0.70,1.009623,0.8047,0.775284


In [None]:
# DONE:

# CIFAR-10 n_iters = 6 tuned

# CIFAR-10 n_iters = 5 retrained

# TODO:

# CIFAR-10 n_iters = 7 tuned

# CIFAR-10 n_iters = 6 retrained

In [33]:
dataset = "CIFAR-10"
dataset_dir = "/cluster/tufts/hugheslab/eharve06/CIFAR-10"
model = "l2-sp"
method = "MAP"
model_arch = "ViT-B/16"
ns = [100, 1000, 10000, 50000]
prior_dir = "/cluster/tufts/hugheslab/eharve06/vit_b_16_torchvision"
prior_type = "vit_b_16_torchvision"
random_states = [1001, 2001, 3001]
retrained_experiments_dir = "/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_CIFAR-10_ViT_B_16_BO"
tuned_experiments_dir = "/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_CIFAR-10_ViT_B_16_BO"

n_iters = 5
seeds = [0, 1, 2, 3, 4]

bounds = torch.tensor([[-6, -6, -4], [-2, -2, -1]], dtype=torch.double)

columns = ["alpha", "beta", "lr_0", "n", "n_iter", "random_state", "runtime", "seed", "val_acc", "val_nll", "test_acc", "test_nll"]
cifar10_bo_df = pd.DataFrame(columns=columns)

for n, random_state, seed in itertools.product(ns, random_states, seeds):

    gen = torch.Generator()
    gen.manual_seed(seed)

    train_X = torch.rand(size=(1, 3,), generator=gen, dtype=torch.double)
    train_X_bounded = (bounds[1] - bounds[0]) * train_X + bounds[0]
    
    alpha, beta, lr_0 = 10**train_X_bounded[0]
    print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, False, True)

    train_Y = torch.tensor([[-get_val_or_test_nll(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True)] for x in train_X_bounded], dtype=torch.float64)
    
    alpha_star, beta_star, lr_0_star = 10**train_X_bounded[torch.argmax(train_Y)]
    print_job(alpha_star, beta_star, dataset, dataset_dir, retrained_experiments_dir, lr_0_star, method, model, model_arch, n, prior_dir, prior_type, random_state, True, False)
    
    runtime = 0.0
    runtime = get_runtime(alpha, beta, tuned_experiments_dir, model, lr_0, n, random_state, True)
    runtime += get_runtime(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
    
    val_acc = get_val_or_test_acc(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
    val_nll = get_val_or_test_nll(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
    test_acc = get_val_or_test_acc(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
    test_nll = get_val_or_test_nll(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)

    row = [alpha.item(), beta.item(), lr_0.item(), n, 0, random_state, runtime, seed, val_acc, val_nll, test_acc, test_nll]
    cifar10_bo_df.loc[len(cifar10_bo_df)] = row
    
    for i in range(1, n_iters+1):

        candidate = get_candidate(train_X, train_Y, seed)
        candidate_bounded = (bounds[1] - bounds[0]) * candidate + bounds[0]
        train_X = torch.cat([train_X, candidate])
        train_X_bounded = (bounds[1] - bounds[0]) * train_X + bounds[0]
                
        alpha, beta, lr_0 = 10**candidate_bounded[0]        
        print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, False, True)

        train_Y = torch.tensor([[-get_val_or_test_nll(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True)] for x in train_X_bounded], dtype=torch.float64)

        alpha_star, beta_star, lr_0_star = 10**train_X_bounded[torch.argmax(train_Y)]
        #print_job(alpha_star, beta_star, dataset, dataset_dir, retrained_experiments_dir, lr_0_star, method, model, model_arch, n, prior_dir, prior_type, random_state, True, False)
        
        runtime = 0.0
        runtime = sum([get_runtime(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True) for x in train_X_bounded])
        runtime += get_runtime(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
        
        val_acc = get_val_or_test_acc(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
        val_nll = get_val_or_test_nll(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
        test_acc = get_val_or_test_acc(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
        test_nll = get_val_or_test_nll(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)

        row = [alpha.item(), beta.item(), lr_0.item(), n, i, random_state, runtime, seed, val_acc, val_nll, test_acc, test_nll]
        cifar10_bo_df.loc[len(cifar10_bo_df)] = row

cifar10_bo_df.head(100)

Unnamed: 0,alpha,beta,lr_0,n,n_iter,random_state,runtime,seed,val_acc,val_nll,test_acc,test_nll
0,0.007589,0.000678,0.002389,100.0,0.0,1001.0,6750.569728,0.0,0.750,0.664725,0.8786,0.382506
1,0.000001,0.000001,0.100000,100.0,1.0,1001.0,17262.992587,0.0,0.500,2.772601,0.8786,0.382506
2,0.001950,0.001412,0.000690,100.0,2.0,1001.0,22437.080971,0.0,0.700,0.795896,0.8786,0.382506
3,0.010000,0.007690,0.040400,100.0,3.0,1001.0,27527.874768,0.0,0.500,1.683694,0.8786,0.382506
4,0.010000,0.000087,0.000550,100.0,4.0,1001.0,32718.751672,0.0,0.700,0.790889,0.8786,0.382506
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.010000,0.000001,0.000100,1000.0,5.0,1001.0,57334.673697,0.0,0.960,0.190884,0.9373,0.231481
96,0.000002,0.000008,0.000504,1000.0,0.0,1001.0,7827.780382,1.0,0.950,0.138502,0.9375,0.219226
97,0.003442,0.010000,0.064100,1000.0,1.0,1001.0,22980.140682,1.0,0.960,0.109862,0.9300,0.265885
98,0.008463,0.005513,0.010742,1000.0,2.0,1001.0,30612.602440,1.0,0.940,0.216755,0.9300,0.265885


In [40]:
#cifar10_bo_df.to_csv("CIFAR-10_ViT_B_16_BO.csv", index=False)
cifar10_bo_df = pd.read_csv("CIFAR-10_ViT_B_16_BO.csv")
cifar10_bo_df.head(100)

Unnamed: 0,alpha,beta,lr_0,n,n_iter,random_state,runtime,seed,val_acc,val_nll,test_acc,test_nll
0,0.007589,0.000678,0.002389,100.0,0.0,1001.0,6750.569728,0.0,0.750,0.664725,0.8786,0.382506
1,0.000001,0.000001,0.100000,100.0,1.0,1001.0,17262.992587,0.0,0.500,2.772601,0.8786,0.382506
2,0.001950,0.001412,0.000690,100.0,2.0,1001.0,22437.080971,0.0,0.700,0.795896,0.8786,0.382506
3,0.010000,0.007690,0.040400,100.0,3.0,1001.0,27527.874768,0.0,0.500,1.683694,0.8786,0.382506
4,0.010000,0.000087,0.000550,100.0,4.0,1001.0,32718.751672,0.0,0.700,0.790889,0.8786,0.382506
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.010000,0.000001,0.000100,1000.0,5.0,1001.0,57334.673697,0.0,0.960,0.190884,0.9373,0.231481
96,0.000002,0.000008,0.000504,1000.0,0.0,1001.0,7827.780382,1.0,0.950,0.138502,0.9375,0.219226
97,0.003442,0.010000,0.064100,1000.0,1.0,1001.0,22980.140682,1.0,0.960,0.109862,0.9300,0.265885
98,0.008463,0.005513,0.010742,1000.0,2.0,1001.0,30612.602440,1.0,0.940,0.216755,0.9300,0.265885


In [None]:
# DONE:

# CIFAR-10 n_iters = 11 tuned

# CIFAR-10 n_iters = 11 retrained

# TODO:

# CIFAR-10 n_iters = 12 tuned

# CIFAR-10 n_iters = 12 retrained

In [37]:
dataset = "CIFAR-10"
dataset_dir = "/cluster/tufts/hugheslab/eharve06/CIFAR-10"
model = "l2-sp"
method = "MAP"
model_arch = "ResNet-50"
ns = [10000, 50000]
prior_dir = "/cluster/tufts/hugheslab/eharve06/resnet50_torchvision"
prior_type = "resnet50_torchvision"
random_states = [1001, 2001, 3001]
retrained_experiments_dir = "/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_CIFAR-10_BO"
tuned_experiments_dir = "/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_CIFAR-10_BO"

n_iters = 11
seeds = [0, 1, 2, 3, 4]

bounds = torch.tensor([[-6, -6, -4], [-2, -2, -1]], dtype=torch.double)

columns = ["alpha", "beta", "lr_0", "n", "n_iter", "random_state", "runtime", "seed", "val_acc", "val_nll", "test_acc", "test_nll"]
cifar10_bo_df = pd.DataFrame(columns=columns)

for n, random_state, seed in itertools.product(ns, random_states, seeds):

    gen = torch.Generator()
    gen.manual_seed(seed)

    train_X = torch.rand(size=(1, 3,), generator=gen, dtype=torch.double)
    train_X_bounded = (bounds[1] - bounds[0]) * train_X + bounds[0]
    
    alpha, beta, lr_0 = 10**train_X_bounded[0]
    print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, False, True)

    train_Y = torch.tensor([[-get_val_or_test_nll(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True)] for x in train_X_bounded], dtype=torch.float64)
    
    alpha_star, beta_star, lr_0_star = 10**train_X_bounded[torch.argmax(train_Y)]
    print_job(alpha_star, beta_star, dataset, dataset_dir, retrained_experiments_dir, lr_0_star, method, model, model_arch, n, prior_dir, prior_type, random_state, True, False)
    
    runtime = 0.0
    runtime = get_runtime(alpha, beta, tuned_experiments_dir, model, lr_0, n, random_state, True)
    runtime += get_runtime(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
    
    val_acc = get_val_or_test_acc(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
    val_nll = get_val_or_test_nll(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
    test_acc = get_val_or_test_acc(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
    test_nll = get_val_or_test_nll(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)

    row = [alpha.item(), beta.item(), lr_0.item(), n, 0, random_state, runtime, seed, val_acc, val_nll, test_acc, test_nll]
    cifar10_bo_df.loc[len(cifar10_bo_df)] = row
    
    for i in range(1, n_iters+1):

        candidate = get_candidate(train_X, train_Y, seed)
        candidate_bounded = (bounds[1] - bounds[0]) * candidate + bounds[0]
        train_X = torch.cat([train_X, candidate])
        train_X_bounded = (bounds[1] - bounds[0]) * train_X + bounds[0]
                
        alpha, beta, lr_0 = 10**candidate_bounded[0]        
        print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, False, True)

        train_Y = torch.tensor([[-get_val_or_test_nll(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True)] for x in train_X_bounded], dtype=torch.float64)

        alpha_star, beta_star, lr_0_star = 10**train_X_bounded[torch.argmax(train_Y)]
        #print_job(alpha_star, beta_star, dataset, dataset_dir, retrained_experiments_dir, lr_0_star, method, model, model_arch, n, prior_dir, prior_type, random_state, True, False)
        
        runtime = 0.0
        runtime = sum([get_runtime(10**x[0], 10**x[1], tuned_experiments_dir, 10**x[2], model, n, random_state, True) for x in train_X_bounded])
        runtime += get_runtime(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
        
        val_acc = get_val_or_test_acc(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
        val_nll = get_val_or_test_nll(alpha, beta, tuned_experiments_dir, lr_0, model, n, random_state, True)
        test_acc = get_val_or_test_acc(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)
        test_nll = get_val_or_test_nll(alpha_star, beta_star, retrained_experiments_dir, lr_0_star, model, n, random_state, False)

        row = [alpha.item(), beta.item(), lr_0.item(), n, i, random_state, runtime, seed, val_acc, val_nll, test_acc, test_nll]
        cifar10_bo_df.loc[len(cifar10_bo_df)] = row

cifar10_bo_df.head(100)

Unnamed: 0,alpha,beta,lr_0,n,n_iter,random_state,runtime,seed,val_acc,val_nll,test_acc,test_nll
0,0.007589,0.000678,0.002389,10000.0,0.0,1001.0,2079.274698,0.0,0.9320,0.246327,0.9392,0.214758
1,0.000001,0.000001,0.100000,10000.0,1.0,1001.0,6055.884249,0.0,0.9300,0.359137,0.9392,0.214758
2,0.002775,0.001468,0.000655,10000.0,2.0,1001.0,7919.535531,0.0,0.9195,0.224003,0.9283,0.217172
3,0.000138,0.010000,0.000741,10000.0,3.0,1001.0,9832.100755,0.0,0.9210,0.220109,0.9309,0.210503
4,0.010000,0.010000,0.000100,10000.0,4.0,1001.0,11791.489255,0.0,0.8090,0.836178,0.9309,0.210503
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.000001,0.010000,0.009679,10000.0,11.0,2001.0,26379.588150,2.0,0.9365,0.210918,0.9506,0.178149
96,0.000001,0.000014,0.020833,10000.0,0.0,2001.0,2018.297087,3.0,0.9465,0.249594,0.9508,0.239866
97,0.004370,0.010000,0.000100,10000.0,1.0,2001.0,6174.177397,3.0,0.8005,0.845951,0.9508,0.239866
98,0.000001,0.000001,0.012291,10000.0,2.0,2001.0,8248.324189,3.0,0.9430,0.237979,0.9479,0.244793


In [39]:
#cifar10_bo_df.to_csv("CIFAR-10_BO.csv", index=False)
cifar10_bo_df = pd.read_csv("CIFAR-10_BO.csv")
cifar10_bo_df.head(100)

Unnamed: 0,alpha,beta,lr_0,n,n_iter,random_state,runtime,seed,val_acc,val_nll,test_acc,test_nll
0,0.007589,0.000678,0.002389,10000.0,0.0,1001.0,2079.274698,0.0,0.9320,0.246327,0.9392,0.214758
1,0.000001,0.000001,0.100000,10000.0,1.0,1001.0,6055.884249,0.0,0.9300,0.359137,0.9392,0.214758
2,0.002775,0.001468,0.000655,10000.0,2.0,1001.0,7919.535531,0.0,0.9195,0.224003,0.9283,0.217172
3,0.000138,0.010000,0.000741,10000.0,3.0,1001.0,9832.100755,0.0,0.9210,0.220109,0.9309,0.210503
4,0.010000,0.010000,0.000100,10000.0,4.0,1001.0,11791.489255,0.0,0.8090,0.836178,0.9309,0.210503
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.000001,0.010000,0.009679,10000.0,11.0,2001.0,26379.588150,2.0,0.9365,0.210918,0.9506,0.178149
96,0.000001,0.000014,0.020833,10000.0,0.0,2001.0,2018.297087,3.0,0.9465,0.249594,0.9508,0.239866
97,0.004370,0.010000,0.000100,10000.0,1.0,2001.0,6174.177397,3.0,0.8005,0.845951,0.9508,0.239866
98,0.000001,0.000001,0.012291,10000.0,2.0,2001.0,8248.324189,3.0,0.9430,0.237979,0.9479,0.244793
