In [8]:
import os
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset

In [9]:
def get_df(path):
    df = pd.read_csv(path, index_col='Unnamed: 0')
    return df

def get_val_nll(df):
    return df.val_or_test_nll.values[-1]

def get_last_epoch(df):
    return df.iloc[-1]

In [10]:
def get_best_hyperparameters(experiments_path, lr_0s, ns, random_states, weight_decays, prior_scales=None):
    if prior_scales is None:
        columns = ['lr_0', 'n', 'random_state', 'weight_decay']
        df = pd.DataFrame(columns=columns)
        for n, random_state in itertools.product(ns, random_states):
            best_val_nll = np.inf
            best_hyperparameters = None
            for lr_0, weight_decay in itertools.product(lr_0s, weight_decays):
                model_name = 'nonlearned_lr_0={}_n={}_random_state={}_weight_decay={}'\
                .format(lr_0, n, random_state, weight_decay)
                path =  '{}/{}.csv'.format(experiments_path, model_name)
                val_nll = get_val_nll(get_df(path))
                if val_nll < best_val_nll: best_val_nll = val_nll; best_hyperparameters = [lr_0, n, random_state, weight_decay]
            df.loc[df.shape[0]] = best_hyperparameters
    else:
        columns = ['lr_0', 'n', 'prior_scale', 'random_state', 'weight_decay']
        df = pd.DataFrame(columns=columns)
        for n, random_state in itertools.product(ns, random_states):
            best_val_nll = np.inf
            best_hyperparameters = None
            for lr_0, prior_scale, weight_decay in itertools.product(lr_0s, prior_scales, weight_decays):
                model_name = 'learned_lr_0={}_n={}_prior_scale={}_random_state={}_weight_decay={}'\
                .format(lr_0, n, prior_scale, random_state, weight_decay)
                path =  '{}/{}.csv'.format(experiments_path, model_name)
                val_nll = get_val_nll(get_df(path))
                if val_nll < best_val_nll: best_val_nll = val_nll; best_hyperparameters = [lr_0, n,prior_scale, random_state, weight_decay]
            df.loc[df.shape[0]] = best_hyperparameters
    return df

In [11]:
experiments_path = '/cluster/tufts/hugheslab/eharve06/bdl-transfer-learning/experiments/tuned_CIFAR-10'
lr_0s = np.logspace(-1, -4, num=4)
ns = [10, 100, 1000, 10000, 50000]
random_states = [1001, 2001, 3001]
weight_decays = np.append(np.logspace(-2, -6, num=5), 0)
hyperparameters_df = get_best_hyperparameters(experiments_path, lr_0s, ns, random_states, weight_decays)
hyperparameters_df

Unnamed: 0,lr_0,n,random_state,weight_decay
0,0.0001,10.0,1001.0,0.0001
1,0.01,10.0,2001.0,0.001
2,0.001,10.0,3001.0,0.01
3,0.0001,100.0,1001.0,1e-05
4,0.0001,100.0,2001.0,0.0001
5,0.0001,100.0,3001.0,0.001
6,0.01,1000.0,1001.0,0.001
7,0.01,1000.0,2001.0,0.001
8,0.01,1000.0,3001.0,1e-05
9,0.01,10000.0,1001.0,0.001


In [12]:
experiments_path = '/cluster/tufts/hugheslab/eharve06/bdl-transfer-learning/experiments/retrained_CIFAR-10'

columns = ['n', 'random_state', 'method', 'test_acc', 'test_loss', 'test_nll', 
           'test_prior', 'train_acc', 'train_loss', 'train_nll', 'train_prior']
results_df = pd.DataFrame(columns=columns)

for row_index, row in hyperparameters_df.iterrows():
    model_name = 'nonlearned_lr_0={}_n={}_random_state={}_weight_decay={}'\
    .format(row.lr_0, int(row.n), int(row.random_state), row.weight_decay)
    path =  '{}/{}.csv'.format(experiments_path, model_name)
    last_epoch = get_last_epoch(get_df(path))
    results_row = [int(row.n), int(row.random_state), 'nonlearned', 
                   last_epoch.val_or_test_acc, last_epoch.val_or_test_loss, 
                   last_epoch.val_or_test_nll, last_epoch.val_or_test_prior, 
                   last_epoch.train_acc, last_epoch.train_loss, 
                   last_epoch.train_nll, last_epoch.train_prior]
    results_df.loc[results_df.shape[0]] = results_row
# TODO: If more seeds are added average over seeds
results_df = results_df.groupby(['n', 'method']).agg(lambda x: tuple(x))
columns = ['test_acc', 'train_acc']
for column in columns:
    results_df['{}_mean'.format(column)] = results_df[column].apply(lambda item: np.mean(item))
    results_df['{}_std'.format(column)] = results_df[column].apply(lambda item: np.std(item))
results_df = results_df.reset_index()

AttributeError: 'Series' object has no attribute 'val_or_test_acc'

In [None]:
results_df

In [None]:
def color_ax(ax, color='gray'):
    ax.grid()
    ax.spines['bottom'].set_color(color)
    ax.spines['top'].set_color(color)
    ax.spines['left'].set_color(color)
    ax.spines['right'].set_color(color)
    ax.tick_params(which='both', color=color)
    

nonlearned_ns = np.array(results_df[results_df.method=='nonlearned'].n.values)
tripled_nonlearned_ns = [n for n in nonlearned_ns for _ in range(3)]
nonlearned_test_acc = np.array(sum(results_df[results_df.method=='nonlearned'].test_acc.values, ()))
nonlearned_test_acc_mean = np.array(results_df[results_df.method=='nonlearned'].test_acc_mean)
print(nonlearned_test_acc_mean)

fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(4, 3), dpi=300)
# Reported values in Pre-Train Your Loss
ax.plot([10, 100, 1000, 10000, 50000], [75.9, 49.3, 26.8, 8.9, 4.2], label='SGD Learned Prior', color='#1f77b4')
ax.errorbar([10, 100, 1000, 10000, 50000], [75.9, 49.3, 26.8, 8.9, 4.2], yerr=[2.1, 1.9, 1.1, 0.3, 0.1], capsize=3, color='#1f77b4')

ax.plot(nonlearned_ns, 100*(1-nonlearned_test_acc_mean), label='SGD Transfer Init', color='#9467bd')
ax.scatter(tripled_nonlearned_ns, 100*(1-nonlearned_test_acc), color='#9467bd')
# Zoomed section
axins = zoomed_inset_axes(ax, zoom=25, loc='upper right')
axins.plot([10, 100, 1000, 10000, 50000], [75.9, 49.3, 26.8, 8.9, 4.2], label='SGD Learned Prior', color='#1f77b4')
axins.errorbar([10, 100, 1000, 10000, 50000], [75.9, 49.3, 26.8, 8.9, 4.2], yerr=[2.1, 1.9, 1.1, 0.3, 0.1], capsize=3, color='#1f77b4')
axins.plot(nonlearned_ns, 100*(1-nonlearned_test_acc_mean), color='#9467bd')
axins.scatter(tripled_nonlearned_ns, 100*(1-nonlearned_test_acc), color='#9467bd')
axins.set_xlim(49000, 51000)
axins.set_ylim(3, 5)
axins.set_xscale('log')
color_ax(axins)
mark_inset(ax, axins, loc1=3, loc2=4, color='gray')
# Formatting
ax.set_xscale('log')
color_ax(ax)
ax.set_xticks([1e1, 1e2, 1e3, 1e4, 1e5])
ax.set_yticks([15, 40, 65, 90])
ax.set_xlabel('Number of training samples (log-scale)')
ax.set_ylabel('Test Error')
ax.legend(loc='lower left')
plt.show()