In [1]:
import os
import math
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
################################################################################
def check_epochs(df, n, batch_size=128, steps=6000, drop_last=True):
    num_batches = math.floor(n/batch_size) if drop_last else math.ceil(n/batch_size)
    epochs = int(steps/num_batches)
    return df.shape[0] == epochs

def print_job(alpha, beta, dataset, dataset_dir, experiments_dir, lr_0, method, 
              model, model_arch, n, prior_dir, prior_type, random_state, save, temps, tune):
    model_name = f"{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}"
    if os.path.exists(f"{experiments_dir}/{model_name}.csv"):
        temp_df = pd.read_csv(f"{experiments_dir}/{model_name}.csv")
        n_train = n - int((1/5) * n) if tune else n
        if check_epochs(temp_df, n_train, batch_size=min(128, n_train), steps=6000, drop_last=True):
            return
        
    save = "--save " if save else ""
    #temps = " ".join(map(str, temps))
    tune = "--tune " if tune else ""

    command = (
        f"python ../src/main_image_classifiers_clml.py "
        f"--alpha={alpha} "
        "--batch_size=128 "
        f"--beta={beta} "
        f"--dataset=\"{dataset}\" "
        f"--dataset_dir=\"{dataset_dir}\" "
        f"--experiments_dir=\"{experiments_dir}\" "
        "--la_batch_size=64 "
        f"--lr_0={lr_0} "
        f"--method=\"{method}\" "
        f"--model=\"{model}\" "
        f"--model_arch=\"{model_arch}\" "
        f"--model_name=\"{model_name}\" "
        f"--n={n} "
        "--num_workers=0 "
        f"--prior_dir=\"{prior_dir}\" "
        f"--prior_type=\"{prior_type}\" "
        f"--random_state={random_state} "
        f"{save}"
        #f"--temps {temps}"
        f"{tune}"
    )
    
    print(f"    '{command}'")

In [3]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
dataset = "CIFAR-10"
dataset_dir = "{home_dir}/CIFAR-10"
tuned_experiments_dir = "{home_dir}/data-emphasized-ELBo/experiments/tuned_CIFAR-10_ConvNeXt-Tiny_CLML"
retrained_experiments_dir = "{home_dir}/data-emphasized-ELBo/experiments/retrained_CIFAR-10_ConvNeXt-Tiny_CLML"
lr_0s = [0.1, 0.01, 0.001, 0.0001]
method = "CLML"
model = "l2-sp"
model_arch = "ConvNeXt-Tiny"
ns = [1000]
prior_dir = "{home_dir}/convnext_tiny_torchvision"
prior_type = "convnext_tiny_torchvision"
random_states = [1001, 2001, 3001]
temps = [1.0, 0.1, 0.01, 0.001, 0.0001]

for alpha, beta, lr_0, n, random_state in itertools.product(alphas, betas, lr_0s, ns, random_states):
    #print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, True, temps, True)
    
    model_name = f"{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}"
    if os.path.exists(f"{tuned_experiments_dir}/{model_name}.csv"):
        temp_df = pd.read_csv(f"{tuned_experiments_dir}/{model_name}.csv")
        n_train = n - int((1/5) * n)
        if check_epochs(temp_df, n_train, batch_size=min(128, n_train), steps=6000, drop_last=True):
            best_temp = [temp_df.val_or_test_temp.values[-1]]
            print_job(alpha, beta, dataset, dataset_dir, retrained_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, True, best_temp, False)


    'python ../src/main_image_classifiers_clml.py --alpha=0.0 --batch_size=128 --beta=1e-05 --dataset="CIFAR-10" --dataset_dir="{home_dir}/CIFAR-10" --experiments_dir="{home_dir}/data-emphasized-ELBo/experiments/retrained_CIFAR-10_ConvNeXt-Tiny_CLML" --la_batch_size=64 --lr_0=0.1 --method="CLML" --model="l2-sp" --model_arch="ConvNeXt-Tiny" --model_name="l2-sp_alpha=0.0_beta=1e-05_lr_0=0.1_n=1000_random_state=2001" --n=1000 --num_workers=0 --prior_dir="{home_dir}/convnext_tiny_torchvision" --prior_type="convnext_tiny_torchvision" --random_state=2001 --save --temps 1.0 '
    'python ../src/main_image_classifiers_clml.py --alpha=0.0 --batch_size=128 --beta=1e-05 --dataset="CIFAR-10" --dataset_dir="{home_dir}/CIFAR-10" --experiments_dir="{home_dir}/data-emphasized-ELBo/experiments/retrained_CIFAR-10_ConvNeXt-Tiny_CLML" --la_batch_size=64 --lr_0=0.1 --method="CLML" --model="l2-sp" --model_arch="ConvNeXt-Tiny" --model_name="l2-sp_alpha=0.0_beta=1e-05_lr_0=0.1_n=1000_random_state=3001" --n=10

In [6]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
dataset = "Flower-102"
dataset_dir = "{home_dir}/Flower-102"
tuned_experiments_dir = "{home_dir}/data-emphasized-ELBo/experiments/tuned_Flower-102_ConvNeXt-Tiny_CLML"
retrained_experiments_dir = "{home_dir}/data-emphasized-ELBo/experiments/retrained_Flower-102_ConvNeXt-Tiny_CLML"
lr_0s = [0.1, 0.01, 0.001, 0.0001]
method = "CLML"
model = "l2-sp"
model_arch = "ConvNeXt-Tiny"
ns = [510]
prior_dir = "{home_dir}/convnext_tiny_torchvision"
prior_type = "convnext_tiny_torchvision"
random_states = [1001, 2001, 3001]
temps = [1.0, 0.1, 0.01, 0.001, 0.0001]

for alpha, beta, lr_0, n, random_state in itertools.product(alphas, betas, lr_0s, ns, random_states):
    print_job(alpha, beta, dataset, dataset_dir, tuned_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, True, temps, True)
    
    #model_name = f"{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}"
    #if os.path.exists(f"{tuned_experiments_dir}/{model_name}.csv"):
    #    temp_df = pd.read_csv(f"{tuned_experiments_dir}/{model_name}.csv")
    #    n_train = n - int((1/5) * n)
    #    if check_epochs(temp_df, n_train, batch_size=min(128, n_train), steps=6000, drop_last=True):
    #        best_temp = [temp_df.val_or_test_temp.values[-1]]
    #        print_job(alpha, beta, dataset, dataset_dir, retrained_experiments_dir, lr_0, method, model, model_arch, n, prior_dir, prior_type, random_state, True, best_temp, False)


    'python ../src/main_image_classifiers_clml.py --alpha=0.01 --batch_size=128 --beta=0.01 --dataset="Flower-102" --dataset_dir="{home_dir}/Flower-102" --experiments_dir="{home_dir}/data-emphasized-ELBo/experiments/tuned_Flower-102_ConvNeXt-Tiny_CLML" --la_batch_size=64 --lr_0=0.1 --method="CLML" --model="l2-sp" --model_arch="ConvNeXt-Tiny" --model_name="l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=510_random_state=1001" --n=510 --num_workers=0 --prior_dir="{home_dir}/convnext_tiny_torchvision" --prior_type="convnext_tiny_torchvision" --random_state=1001 --save --tune '
    'python ../src/main_image_classifiers_clml.py --alpha=0.01 --batch_size=128 --beta=0.01 --dataset="Flower-102" --dataset_dir="{home_dir}/Flower-102" --experiments_dir="{home_dir}/data-emphasized-ELBo/experiments/tuned_Flower-102_ConvNeXt-Tiny_CLML" --la_batch_size=64 --lr_0=0.1 --method="CLML" --model="l2-sp" --model_arch="ConvNeXt-Tiny" --model_name="l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=510_random_state=2001" --n=510 --

In [5]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
model = 'l2-sp'
dataset_dir = '{home_dir}/CIFAR-10'
experiments_dir = '{home_dir}/data-emphasized-ELBo/experiments/tuned_CIFAR-10_ConvNeXt-Tiny_CLML'
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [1000]
random_states = [1001, 2001, 3001]

columns = ['criterion', 'model_name', 'n', 'random_state', 'runtime', 'val_acc', 'val_nll', 'val_bma_acc', 'val_clml']
tuned_df = pd.DataFrame(columns=columns)

for alpha, beta, lr_0, n, random_state in itertools.product(alphas, betas, lr_0s, ns, random_states):
    try:
        model_name = f'{model}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}'
        temp_df = pd.read_csv(f"{experiments_dir}/{model_name}.csv")
        tuned_n = n - int((1/5) * n)
        if check_epochs(temp_df, tuned_n, batch_size=min(128, tuned_n), steps=6000):
            row = [model, model_name, n, random_state, temp_df['train_sec/epoch'].sum(), temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1], temp_df.val_or_test_bma_acc.values[-1], temp_df.val_or_test_clml.values[-1]]
            tuned_df.loc[len(tuned_df)] = row
    except:
        pass

experiments_dir = '{home_dir}/data-emphasized-ELBo/experiments/retrained_CIFAR-10_ConvNeXt-Tiny_CLML'

columns = ['criterion', 'model_name', 'n', 'random_state', 'runtime', 'test_acc', 'test_nll', 'test_bma_acc', 'test_clml']
retrained_df = pd.DataFrame(columns=columns)

for index, row in tuned_df.iterrows():
    try:
        temp_df = pd.read_csv(f"{experiments_dir}/{row.model_name}.csv")
        if check_epochs(temp_df, row.n, batch_size=min(128, row.n), steps=6000):
            row = [row.criterion, row.model_name, row.n, row.random_state, temp_df['train_sec/epoch'].sum(), temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1], temp_df.val_or_test_bma_acc.values[-1], temp_df.val_or_test_clml.values[-1]]
            retrained_df.loc[len(retrained_df)] = row
    except:
        pass
    
retrained_df

Unnamed: 0,criterion,model_name,n,random_state,runtime,test_acc,test_nll,test_bma_acc,test_clml
0,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=1000_ran...,1000,1001,3579.360843,0.938,0.213469,0.9378,-0.212732
1,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=1000_ran...,1000,2001,3662.673018,0.9393,0.224194,0.9376,-0.276654
2,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=1000_ran...,1000,3001,3637.66543,0.9403,0.205864,0.9408,-0.208108
3,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.01_n=1000_ra...,1000,1001,3614.541176,0.9373,0.234695,0.9373,-0.213469
4,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.01_n=1000_ra...,1000,2001,3657.110261,0.9344,0.254649,0.9345,-0.212593
5,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.01_n=1000_ra...,1000,3001,3616.054446,0.9337,0.239611,0.9341,-0.349767
6,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.001_n=1000_r...,1000,1001,3642.455976,0.9103,0.295979,0.9104,-0.374055
7,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.001_n=1000_r...,1000,2001,3736.441835,0.8997,0.345413,0.9002,-0.387552
8,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.001_n=1000_r...,1000,3001,3592.374528,0.912,0.297442,0.9089,-0.830878
9,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.0001_n=1000_...,1000,1001,3653.160328,0.7943,0.820949,0.7919,-6.300895


In [6]:
tuned_df = tuned_df[tuned_df.model_name.isin(retrained_df.model_name.values)]
tuned_df

Unnamed: 0,criterion,model_name,n,random_state,runtime,val_acc,val_nll,val_bma_acc,val_clml
0,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=1000_ran...,1000,1001,3731.23478,0.95,0.15727,0.95,-0.204632
1,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=1000_ran...,1000,2001,3620.225999,0.94,0.234847,0.94,-0.297838
2,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.1_n=1000_ran...,1000,3001,3627.440226,0.97,0.09821,0.97,-0.172571
3,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.01_n=1000_ra...,1000,1001,3632.139241,0.955,0.178018,0.955,-0.21373
4,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.01_n=1000_ra...,1000,2001,3604.982917,0.925,0.276001,0.925,-0.227632
5,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.01_n=1000_ra...,1000,3001,3601.768231,0.95,0.11807,0.965,-0.407486
6,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.001_n=1000_r...,1000,1001,3594.383706,0.9,0.41233,0.9,-0.494209
7,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.001_n=1000_r...,1000,2001,3589.903927,0.885,0.38737,0.885,-0.435058
8,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.001_n=1000_r...,1000,3001,3641.977676,0.93,0.182748,0.935,-0.999154
9,l2-sp,l2-sp_alpha=0.01_beta=0.01_lr_0=0.0001_n=1000_...,1000,1001,3608.73675,0.73,0.941149,0.73,-6.773576


In [7]:
tuned_df['random_state'].value_counts()

random_state
1001    15
2001    15
3001    14
Name: count, dtype: int64

In [8]:
tuned_df.to_csv('tuned_CIFAR-10_ConvNeXt-Tiny_CLML2.csv', index=False)
retrained_df.to_csv('retrained_CIFAR-10_ConvNeXt-Tiny_CLML2.csv', index=False)