In [1]:
import os
import math
import itertools
import numpy as np
import pandas as pd

In [2]:
def check_epochs(df, n, batch_size=128, steps=6000):
    num_batches = math.floor(n/batch_size)
    epochs = int(steps/num_batches)
    if df.shape[0] == epochs:
        return True
    else:
        return False

In [3]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'l2-zero'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102'
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

count = -1
for alpha, lr_0, n, random_state in itertools.product(alphas, lr_0s, ns, random_states):
    model_name = f'{criterion}_alpha={alpha}_lr_0={lr_0}_n={n}_random_state={random_state}'
    if os.path.exists(f'{experiments_directory}/{model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
        tuned_n = n - int((1/5) * n)
        if not check_epochs(temp_df, tuned_n, batch_size=min(128, tuned_n), steps=6000):
            count += 1
            print(f'    "python ../src/main_Flowers_102.py --alpha={alpha} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state} --tune"')
    else:
        count += 1
        print(f'    "python ../src/main_Flowers_102.py --alpha={alpha} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state} --tune"')
print(count)

-1


In [4]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'l2-sp'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102'
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

count = -1
for alpha, beta, lr_0, n, random_state in itertools.product(alphas, betas, lr_0s, ns, random_states):
    model_name = f'{criterion}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}'
    if os.path.exists(f'{experiments_directory}/{model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
        tuned_n = n - int((1/5) * n)
        if not check_epochs(temp_df, tuned_n, batch_size=min(128, tuned_n), steps=6000):
            count += 1
            print(f'    "python ../src/main_Flowers_102.py --alpha={alpha} --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state} --tune"')
    else:
        count += 1
        print(f'    "python ../src/main_Flowers_102.py --alpha={alpha} --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state} --tune"')
print(count)

-1


In [5]:
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'ptyl'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102_SSL'
lambds = np.logspace(start=0, stop=9, num=10)
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
prior_directory = '/cluster/tufts/hugheslab/eharve06/resnet50_ssl_prior'
prior_type = 'resnet50_ssl_prior'
random_states = [1001, 2001, 3001]

count = -1
for beta, lambd, lr_0, n, random_state in itertools.product(betas, lambds, lr_0s, ns, random_states):
    model_name = f'{criterion}_beta={beta}_lambd={lambd}_lr_0={lr_0}_n={n}_random_state={random_state}'
    if os.path.exists(f'{experiments_directory}/{model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
        tuned_n = n - int((1/5) * n)
        if not check_epochs(temp_df, tuned_n, batch_size=min(128, tuned_n), steps=6000):
            count += 1
            print(f'    "python ../src/main_Flowers_102.py --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lambd={lambd} --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --prior_directory=\'{prior_directory}\' --prior_type=\'{prior_type}\' --random_state={random_state} --tune"')
    else:
        count += 1
        print(f'    "python ../src/main_Flowers_102.py --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lambd={lambd} --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --prior_directory=\'{prior_directory}\' --prior_type=\'{prior_type}\' --random_state={random_state} --tune"')
print(count)

    "python ../src/main_Flowers_102.py --beta=0.01 --batch_size=128 --criterion='ptyl' --dataset_directory='/cluster/tufts/hugheslab/eharve06/Flowers_102' --experiments_directory='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102_SSL' --lambd=100000000.0 --lr_0=0.01 --model_name='ptyl_beta=0.01_lambd=100000000.0_lr_0=0.01_n=510_random_state=3001' --n=510 --num_workers=0 --prior_directory='/cluster/tufts/hugheslab/eharve06/resnet50_ssl_prior' --prior_type='resnet50_ssl_prior' --random_state=3001 --tune"
    "python ../src/main_Flowers_102.py --beta=0.01 --batch_size=128 --criterion='ptyl' --dataset_directory='/cluster/tufts/hugheslab/eharve06/Flowers_102' --experiments_directory='/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102_SSL' --lambd=100000000.0 --lr_0=0.01 --model_name='ptyl_beta=0.01_lambd=100000000.0_lr_0=0.01_n=1020_random_state=1001' --n=1020 --num_workers=0 --prior_directory='/cluster/tufts/hugheslab/eha

199


In [6]:
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'ptyl'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102'
lambds = np.logspace(start=0, stop=9, num=10)
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

count = -1
for beta, lambd, lr_0, n, random_state in itertools.product(betas, lambds, lr_0s, ns, random_states):
    model_name = f'{criterion}_beta={beta}_lambd={lambd}_lr_0={lr_0}_n={n}_random_state={random_state}'
    if os.path.exists(f'{experiments_directory}/{model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
        tuned_n = n - int((1/5) * n)
        if not check_epochs(temp_df, tuned_n, batch_size=min(128, tuned_n), steps=6000):
            count += 1
            print(f'    "python ../src/main_Flowers_102.py --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lambd={lambd} --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state} --tune"')
    else:
        count += 1
        print(f'    "python ../src/main_Flowers_102.py --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lambd={lambd} --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state} --tune"')
print(count)

-1


In [7]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'l2-zero'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102'
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

columns = ['criterion', 'model_name', 'n', 'random_state', 'val_acc', 'val_nll']
tuned_df = pd.DataFrame(columns=columns)

for alpha, lr_0, n, random_state in itertools.product(alphas, lr_0s, ns, random_states):
    model_name = f'{criterion}_alpha={alpha}_lr_0={lr_0}_n={n}_random_state={random_state}'
    temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
    row = [criterion, model_name, n, random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
    tuned_df.loc[len(tuned_df)] = row
    
min_indices = tuned_df.groupby(['criterion', 'n', 'random_state'])['val_nll'].idxmin()
tuned_df = tuned_df.loc[min_indices]

experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_Flowers_102'

columns = ['criterion', 'model_name', 'n', 'random_state', 'test_acc', 'test_nll']
retrained_df = pd.DataFrame(columns=columns)

for index, row in tuned_df.iterrows():
    if os.path.exists(f'{experiments_directory}/{row.model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{row.model_name}.csv')
        row = [row.criterion, row.model_name, row.n, row.random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
        retrained_df.loc[len(retrained_df)] = row

retrained_df

Unnamed: 0,criterion,model_name,n,random_state,test_acc,test_nll
0,l2-zero,l2-zero_alpha=0.0_lr_0=0.1_n=510_random_state=...,510,1001,0.859594,0.675128
1,l2-zero,l2-zero_alpha=1e-05_lr_0=0.1_n=510_random_stat...,510,2001,0.865044,0.631728
2,l2-zero,l2-zero_alpha=1e-05_lr_0=0.1_n=510_random_stat...,510,3001,0.866699,0.631672
3,l2-zero,l2-zero_alpha=1e-06_lr_0=0.1_n=1020_random_sta...,1020,1001,0.923864,0.380184
4,l2-zero,l2-zero_alpha=1e-06_lr_0=0.1_n=1020_random_sta...,1020,2001,0.927144,0.347217
5,l2-zero,l2-zero_alpha=0.0001_lr_0=0.1_n=1020_random_st...,1020,3001,0.932412,0.322754


In [8]:
grouped_df = retrained_df.groupby(['criterion', 'n']).agg(lambda x: tuple(x))
columns = ['test_acc', 'test_nll']
for column in columns:
    grouped_df[f'{column}_mean'] = grouped_df[column].apply(lambda item: np.mean(item))
    grouped_df[f'{column}_std'] = grouped_df[column].apply(lambda item: np.std(item))
    grouped_df[f'{column}_min'] = grouped_df[column].apply(lambda item: np.min(item))
    grouped_df[f'{column}_max'] = grouped_df[column].apply(lambda item: np.max(item))
grouped_df = grouped_df.reset_index()
grouped_df

Unnamed: 0,criterion,n,model_name,random_state,test_acc,test_nll,test_acc_mean,test_acc_std,test_acc_min,test_acc_max,test_nll_mean,test_nll_std,test_nll_min,test_nll_max
0,l2-zero,510,(l2-zero_alpha=0.0_lr_0=0.1_n=510_random_state...,"(1001, 2001, 3001)","(0.8595935106277466, 0.8650438189506531, 0.866...","(0.6751280320207529, 0.6317284799010232, 0.631...",0.863779,0.003036,0.859594,0.866699,0.646176,0.020472,0.631672,0.675128
1,l2-zero,1020,(l2-zero_alpha=1e-06_lr_0=0.1_n=1020_random_st...,"(1001, 2001, 3001)","(0.9238643050193788, 0.9271442294120787, 0.932...","(0.3801838651372868, 0.3472168642901048, 0.322...",0.927807,0.003521,0.923864,0.932412,0.350052,0.023531,0.322754,0.380184


In [9]:
grouped_df[['criterion', 'n', 'test_acc_mean', 'test_acc_min', 'test_acc_max']]

Unnamed: 0,criterion,n,test_acc_mean,test_acc_min,test_acc_max
0,l2-zero,510,0.863779,0.859594,0.866699
1,l2-zero,1020,0.927807,0.923864,0.932412


In [10]:
grouped_df[['criterion', 'n', 'test_nll_mean', 'test_nll_min', 'test_nll_max']]

Unnamed: 0,criterion,n,test_nll_mean,test_nll_min,test_nll_max
0,l2-zero,510,0.646176,0.631672,0.675128
1,l2-zero,1020,0.350052,0.322754,0.380184


In [11]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'l2-sp'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_Flowers_102'
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

count = -1
for alpha, beta, lr_0, n, random_state in itertools.product(alphas, betas, lr_0s, ns, random_states):
    model_name = f'{criterion}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}'
    if os.path.exists(f'{experiments_directory}/{model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
        if not check_epochs(temp_df, n, batch_size=min(128, n), steps=6000):
            count += 1
            print(f'    "python ../src/main_Flowers_102.py --alpha={alpha} --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state}"')
    else:
        count += 1
        print(f'    "python ../src/main_Flowers_102.py --alpha={alpha} --beta={beta} --batch_size=128 --criterion=\'{criterion}\' --dataset_directory=\'{dataset_directory}\' --experiments_directory=\'{experiments_directory}\' --lr_0={lr_0} --model_name=\'{model_name}\' --n={n} --num_workers=0 --random_state={random_state}"')

print(count)

-1


In [12]:
alphas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'l2-sp'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102'
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

columns = ['criterion', 'model_name', 'n', 'random_state', 'val_acc', 'val_nll']
tuned_df = pd.DataFrame(columns=columns)

for alpha, beta, lr_0, n, random_state in itertools.product(alphas, betas, lr_0s, ns, random_states):
    model_name = f'{criterion}_alpha={alpha}_beta={beta}_lr_0={lr_0}_n={n}_random_state={random_state}'
    temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
    row = [criterion, model_name, n, random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
    tuned_df.loc[len(tuned_df)] = row
    
min_indices = tuned_df.groupby(['criterion', 'n', 'random_state'])['val_nll'].idxmin()
tuned_df = tuned_df.loc[min_indices]

experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_Flowers_102'

columns = ['criterion', 'model_name', 'n', 'random_state', 'test_acc', 'test_nll']
retrained_df = pd.DataFrame(columns=columns)

for index, row in tuned_df.iterrows():
    if os.path.exists(f'{experiments_directory}/{row.model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{row.model_name}.csv')
        row = [row.criterion, row.model_name, row.n, row.random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
        retrained_df.loc[len(retrained_df)] = row

retrained_df

Unnamed: 0,criterion,model_name,n,random_state,test_acc,test_nll
0,l2-sp,l2-sp_alpha=1e-06_beta=1e-06_lr_0=0.1_n=510_ra...,510,1001,0.853202,0.675011
1,l2-sp,l2-sp_alpha=0.0_beta=0.0_lr_0=0.1_n=510_random...,510,2001,0.866876,0.629629
2,l2-sp,l2-sp_alpha=0.0001_beta=0.0_lr_0=0.1_n=510_ran...,510,3001,0.863317,0.634219
3,l2-sp,l2-sp_alpha=0.0_beta=0.0_lr_0=0.1_n=1020_rando...,1020,1001,0.931542,0.347675
4,l2-sp,l2-sp_alpha=0.0_beta=1e-05_lr_0=0.1_n=1020_ran...,1020,2001,0.930701,0.330521
5,l2-sp,l2-sp_alpha=1e-05_beta=0.0001_lr_0=0.1_n=1020_...,1020,3001,0.933074,0.315685


In [13]:
grouped_df = retrained_df.groupby(['criterion', 'n']).agg(lambda x: tuple(x))
columns = ['test_acc', 'test_nll']
for column in columns:
    grouped_df[f'{column}_mean'] = grouped_df[column].apply(lambda item: np.mean(item))
    grouped_df[f'{column}_std'] = grouped_df[column].apply(lambda item: np.std(item))
    grouped_df[f'{column}_min'] = grouped_df[column].apply(lambda item: np.min(item))
    grouped_df[f'{column}_max'] = grouped_df[column].apply(lambda item: np.max(item))
grouped_df = grouped_df.reset_index()
grouped_df

Unnamed: 0,criterion,n,model_name,random_state,test_acc,test_nll,test_acc_mean,test_acc_std,test_acc_min,test_acc_max,test_nll_mean,test_nll_std,test_nll_min,test_nll_max
0,l2-sp,510,(l2-sp_alpha=1e-06_beta=1e-06_lr_0=0.1_n=510_r...,"(1001, 2001, 3001)","(0.8532021045684814, 0.8668757677078247, 0.863...","(0.6750107475100997, 0.6296289651056399, 0.634...",0.861132,0.005792,0.853202,0.866876,0.646286,0.020398,0.629629,0.675011
1,l2-sp,1020,(l2-sp_alpha=0.0_beta=0.0_lr_0=0.1_n=1020_rand...,"(1001, 2001, 3001)","(0.9315415620803832, 0.9307012557983398, 0.933...","(0.347674924849946, 0.3305205590603899, 0.3156...",0.931772,0.000982,0.930701,0.933074,0.331294,0.013071,0.315685,0.347675


In [14]:
grouped_df[['criterion', 'n', 'test_acc_mean', 'test_acc_min', 'test_acc_max']]

Unnamed: 0,criterion,n,test_acc_mean,test_acc_min,test_acc_max
0,l2-sp,510,0.861132,0.853202,0.866876
1,l2-sp,1020,0.931772,0.930701,0.933074


In [15]:
grouped_df[['criterion', 'n', 'test_nll_mean', 'test_nll_min', 'test_nll_max']]

Unnamed: 0,criterion,n,test_nll_mean,test_nll_min,test_nll_max
0,l2-sp,510,0.646286,0.629629,0.675011
1,l2-sp,1020,0.331294,0.315685,0.347675


In [17]:
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'ptyl'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102_SSL'
lambds = np.logspace(start=0, stop=9, num=10)
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

columns = ['criterion', 'model_name', 'n', 'random_state', 'val_acc', 'val_nll']
tuned_df = pd.DataFrame(columns=columns)

count = -1
for beta, lambd, lr_0, n, random_state in itertools.product(betas, lambds, lr_0s, ns, random_states):
    model_name = f'{criterion}_beta={beta}_lambd={lambd}_lr_0={lr_0}_n={n}_random_state={random_state}'
    if os.path.exists(f'{experiments_directory}/{model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
        row = [criterion, model_name, n, random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
        tuned_df.loc[len(tuned_df)] = row
    
min_indices = tuned_df.groupby(['criterion', 'n', 'random_state'])['val_nll'].idxmin()
tuned_df = tuned_df.loc[min_indices]

experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_Flowers_102_SSL'

columns = ['criterion', 'model_name', 'n', 'random_state', 'test_acc', 'test_nll']
retrained_df = pd.DataFrame(columns=columns)

for index, row in tuned_df.iterrows():
    if os.path.exists(f'{experiments_directory}/{row.model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{row.model_name}.csv')
        row = [row.criterion, row.model_name, row.n, row.random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
        retrained_df.loc[len(retrained_df)] = row

retrained_df

Unnamed: 0,criterion,model_name,n,random_state,test_acc,test_nll
0,ptyl,ptyl_beta=0.0_lambd=100.0_lr_0=0.1_n=510_rando...,510,1001,0.809471,0.859306
1,ptyl,ptyl_beta=0.0_lambd=100.0_lr_0=0.1_n=510_rando...,510,2001,0.820477,0.851634
2,ptyl,ptyl_beta=0.0001_lambd=100.0_lr_0=0.1_n=510_ra...,510,3001,0.827764,0.808247
3,ptyl,ptyl_beta=0.001_lambd=100000000.0_lr_0=0.1_n=1...,1020,1001,0.896939,0.492196
4,ptyl,ptyl_beta=0.0001_lambd=10.0_lr_0=0.1_n=1020_ra...,1020,2001,0.901413,0.461228
5,ptyl,ptyl_beta=0.0001_lambd=100.0_lr_0=0.1_n=1020_r...,1020,3001,0.892305,0.513435


In [18]:
grouped_df = retrained_df.groupby(['criterion', 'n']).agg(lambda x: tuple(x))
columns = ['test_acc', 'test_nll']
for column in columns:
    grouped_df[f'{column}_mean'] = grouped_df[column].apply(lambda item: np.mean(item))
    grouped_df[f'{column}_std'] = grouped_df[column].apply(lambda item: np.std(item))
    grouped_df[f'{column}_min'] = grouped_df[column].apply(lambda item: np.min(item))
    grouped_df[f'{column}_max'] = grouped_df[column].apply(lambda item: np.max(item))
grouped_df = grouped_df.reset_index()
grouped_df

Unnamed: 0,criterion,n,model_name,random_state,test_acc,test_nll,test_acc_mean,test_acc_std,test_acc_min,test_acc_max,test_nll_mean,test_nll_std,test_nll_min,test_nll_max
0,ptyl,510,(ptyl_beta=0.0_lambd=100.0_lr_0=0.1_n=510_rand...,"(1001, 2001, 3001)","(0.8094708919525146, 0.8204765319824219, 0.827...","(0.8593063579656345, 0.8516342300323566, 0.808...",0.819237,0.007519,0.809471,0.827764,0.839729,0.022481,0.808247,0.859306
1,ptyl,1020,(ptyl_beta=0.001_lambd=100000000.0_lr_0=0.1_n=...,"(1001, 2001, 3001)","(0.896939218044281, 0.9014132618904114, 0.8923...","(0.4921957659278386, 0.4612277829759083, 0.513...",0.896886,0.003719,0.892305,0.901413,0.488953,0.021436,0.461228,0.513435


In [19]:
grouped_df[['criterion', 'n', 'test_acc_mean', 'test_acc_min', 'test_acc_max']]

Unnamed: 0,criterion,n,test_acc_mean,test_acc_min,test_acc_max
0,ptyl,510,0.819237,0.809471,0.827764
1,ptyl,1020,0.896886,0.892305,0.901413


In [20]:
grouped_df[['criterion', 'n', 'test_nll_mean', 'test_nll_min', 'test_nll_max']]

Unnamed: 0,criterion,n,test_nll_mean,test_nll_min,test_nll_max
0,ptyl,510,0.839729,0.808247,0.859306
1,ptyl,1020,0.488953,0.461228,0.513435


In [18]:
betas = [0.01, 0.001, 0.0001, 1e-05, 1e-06, 0.0]
criterion = 'ptyl'
dataset_directory = '/cluster/tufts/hugheslab/eharve06/Flowers_102'
experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/tuned_Flowers_102'
lambds = np.logspace(start=0, stop=9, num=10)
lr_0s = [0.1, 0.01, 0.001, 0.0001]
ns = [510, 1020]
random_states = [1001, 2001, 3001]

columns = ['criterion', 'model_name', 'n', 'random_state', 'val_acc', 'val_nll']
tuned_df = pd.DataFrame(columns=columns)

count = -1
for beta, lambd, lr_0, n, random_state in itertools.product(betas, lambds, lr_0s, ns, random_states):
    model_name = f'{criterion}_beta={beta}_lambd={lambd}_lr_0={lr_0}_n={n}_random_state={random_state}'
    if os.path.exists(f'{experiments_directory}/{model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{model_name}.csv')
        row = [criterion, model_name, n, random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
        tuned_df.loc[len(tuned_df)] = row
    
min_indices = tuned_df.groupby(['criterion', 'n', 'random_state'])['val_nll'].idxmin()
tuned_df = tuned_df.loc[min_indices]

experiments_directory = '/cluster/tufts/hugheslab/eharve06/data-emphasized-ELBo/experiments/retrained_Flowers_102'

columns = ['criterion', 'model_name', 'n', 'random_state', 'test_acc', 'test_nll']
retrained_df = pd.DataFrame(columns=columns)

for index, row in tuned_df.iterrows():
    if os.path.exists(f'{experiments_directory}/{row.model_name}.csv'):
        temp_df = pd.read_csv(f'{experiments_directory}/{row.model_name}.csv')
        row = [row.criterion, row.model_name, row.n, row.random_state, temp_df.val_or_test_acc.values[-1], temp_df.val_or_test_nll.values[-1]]
        retrained_df.loc[len(retrained_df)] = row

retrained_df

Unnamed: 0,criterion,model_name,n,random_state,test_acc,test_nll
0,ptyl,ptyl_beta=1e-05_lambd=10000.0_lr_0=0.1_n=510_r...,510,1001,0.857836,0.675746
1,ptyl,ptyl_beta=0.0_lambd=100000.0_lr_0=0.1_n=510_ra...,510,2001,0.86601,0.630066
2,ptyl,ptyl_beta=1e-06_lambd=1000.0_lr_0=0.1_n=510_ra...,510,3001,0.865124,0.631908
3,ptyl,ptyl_beta=0.0_lambd=100000000.0_lr_0=0.1_n=102...,1020,1001,0.931298,0.335795
4,ptyl,ptyl_beta=1e-05_lambd=100000000.0_lr_0=0.1_n=1...,1020,2001,0.931068,0.323245
5,ptyl,ptyl_beta=0.0001_lambd=100000.0_lr_0=0.1_n=102...,1020,3001,0.925769,0.327369


In [19]:
grouped_df = retrained_df.groupby(['criterion', 'n']).agg(lambda x: tuple(x))
columns = ['test_acc', 'test_nll']
for column in columns:
    grouped_df[f'{column}_mean'] = grouped_df[column].apply(lambda item: np.mean(item))
    grouped_df[f'{column}_std'] = grouped_df[column].apply(lambda item: np.std(item))
    grouped_df[f'{column}_min'] = grouped_df[column].apply(lambda item: np.min(item))
    grouped_df[f'{column}_max'] = grouped_df[column].apply(lambda item: np.max(item))
grouped_df = grouped_df.reset_index()
grouped_df

Unnamed: 0,criterion,n,model_name,random_state,test_acc,test_nll,test_acc_mean,test_acc_std,test_acc_min,test_acc_max,test_nll_mean,test_nll_std,test_nll_min,test_nll_max
0,ptyl,510,(ptyl_beta=1e-05_lambd=10000.0_lr_0=0.1_n=510_...,"(1001, 2001, 3001)","(0.8578360080718994, 0.8660104274749756, 0.865...","(0.6757463747349225, 0.6300655637877993, 0.631...",0.86299,0.003663,0.857836,0.86601,0.645907,0.021113,0.630066,0.675746
1,ptyl,1020,(ptyl_beta=0.0_lambd=100000000.0_lr_0=0.1_n=10...,"(1001, 2001, 3001)","(0.9312978386878968, 0.9310680627822876, 0.925...","(0.33579462177171, 0.3232451387625941, 0.32736...",0.929378,0.002554,0.925769,0.931298,0.328803,0.005223,0.323245,0.335795


In [20]:
grouped_df[['criterion', 'n', 'test_acc_mean', 'test_acc_min', 'test_acc_max']]

Unnamed: 0,criterion,n,test_acc_mean,test_acc_min,test_acc_max
0,ptyl,510,0.86299,0.857836,0.86601
1,ptyl,1020,0.929378,0.925769,0.931298


In [21]:
grouped_df[['criterion', 'n', 'test_nll_mean', 'test_nll_min', 'test_nll_max']]

Unnamed: 0,criterion,n,test_nll_mean,test_nll_min,test_nll_max
0,ptyl,510,0.645907,0.630066,0.675746
1,ptyl,1020,0.328803,0.323245,0.335795
