In [None]:
import sys
sys.path.append("./../")

In [None]:
import os
import json
import glob

import numpy as np
import pandas as pd

from tbparse import SummaryReader
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [None]:
def extract_results(model_dir):
    """
        Get metrics from a give model folder
    """
    
    # Get config
    config_json = os.path.join(model_dir, 'config.json')
    config = json.load(open(config_json, 'r'))
    
    # Get test events file
    tblog_dir = os.path.join(model_dir, "tblog")
    events_file = glob.glob(tblog_dir + "/events.out.tfevents*.1") # *.1 will be from test
    if len(events_file) > 0:
        events_file = events_file[0]
    else:
        print("Skipping", model_dir)
        return None
    tb_log = SummaryReader(events_file, pivot=True).scalars
    
    # Extract key points
    lam_sl = config['method_params']['lam_sl']
    ds_size = config['ds_params']['size']
    ds_corr = config['ds_params']['corruption']
    acc = tb_log.test_acc.values[0]
    ece = tb_log.test_ece.values[0]
    f1 = tb_log.test_f1.values[0]
    loss = tb_log.test_loss.values[0]
    
    return {
        'lam_sl': lam_sl,
        'ds_size': ds_size,
        'ds_corr': ds_corr,
        'acc': acc,
        'ece': ece,
        'f1': f1,
        'loss': loss
    }

In [None]:
def extract_best_results(model_dir):
    # Get config
    config_json = os.path.join(model_dir, 'config.json')
    config = json.load(open(config_json, 'r'))
    
    # Get test events file
    tblog_dir = os.path.join(model_dir, "tblog")
    events_file = glob.glob(tblog_dir + "/events.out.tfevents*.0") # *.1 will be from test
    if len(events_file) > 0:
        events_file = events_file[0]
    else:
        print("Skipping", model_dir)
        return None
    tb_log = SummaryReader(events_file, pivot=True).scalars
    
    # Get the best point in term of val_f1 (dont look at ECE)
    try:
        tb_log = tb_log[tb_log.val_f1 == tb_log.val_f1.max()]
    except:
        print(model_dir)
        return None
    
    # Extract key points
    lam_sl = config['method_params']['lam_sl']
    ds_size = config['ds_params'].get('size', 'Full')
    ds_corr = config['ds_params'].get('corruption', 'identity')
    acc = tb_log.val_acc.values[0]
    ece = tb_log.val_ece.values[0]
    f1 = tb_log.val_f1.values[0]
    loss = tb_log.val_loss.values[0]
    
    return {
        'lam_sl': lam_sl,
        'ds_size': ds_size,
        'ds_corr': ds_corr,
        'acc': acc,
        'ece': ece,
        'f1': f1,
        'loss': loss
    }

In [None]:
rootdir = "./../zoo/abl-alpha100-uniform/"
model_str = "LeNet"

# rootdir = "./../zoo/abl-alpha100-uniform-1000steps/"
# model_str = "ConvNet"

# rootdir = "./../zoo/abl-alpha100-unibin/"
# model_str = "VGG11"

In [None]:
modeldirs = []
for expdir in map(lambda x: os.path.join(rootdir, x), os.listdir(rootdir)):
    _modeldirs = os.listdir(os.path.join(expdir, model_str))
    modeldirs.extend(map(lambda x: os.path.join(expdir, model_str, x), _modeldirs))

In [None]:
# df_results = pd.DataFrame(filter(None, [extract_results(mdir) for mdir in modeldirs]))
df_results = pd.DataFrame(filter(None, [extract_best_results(mdir) for mdir in modeldirs]))

In [None]:
df_results

In [None]:
agg_results = {}

gdf_corr = df_results.groupby('ds_corr')
for corr_str, gdf1 in gdf_corr:
    agg_results[corr_str] = {}
    gdf_ds = gdf1.groupby('ds_size')
    
    for ds_size, gdf2 in gdf_ds:
        gdf_lam = gdf2.groupby('lam_sl')
        
        r = []
        for lam_sl, _df in gdf_lam:
            n = np.sqrt(_df.shape[0])

            d = {
                'lam_sl': lam_sl,
                'acc_mean': _df.acc.mean(),
                'acc_err': _df.acc.std() / n,
                'ece_mean': _df.ece.mean(),
                'ece_err': _df.ece.std() / n,
                'f1_mean': _df.f1.mean(),
                'f1_err': _df.f1.std() / n,
                'loss_mean': _df.loss.mean(),
                'loss_err': _df.loss.std() / n,
            }
            
            r.append(d)
        agg_results[corr_str][ds_size] = pd.DataFrame(r)

In [None]:
# agg_results = []

# gdf = df_results.groupby('lam_sl')
# for lam_sl, _df in gdf:
#     n = np.sqrt(_df.shape[0])
    
#     d = {
#         'lam_sl': lam_sl,
#         'acc_mean': _df.acc.mean(),
#         'acc_err': _df.acc.std() / n,
#         'ece_mean': _df.ece.mean(),
#         'ece_err': _df.ece.std() / n,
#         'f1_mean': _df.f1.mean(),
#         'f1_err': _df.f1.std() / n,
#         'loss_mean': _df.loss.mean(),
#         'loss_err': _df.loss.std() / n,
#     }
    
#     agg_results.append(d)

# df_agg = pd.DataFrame(agg_results)

In [None]:
agg_results

In [None]:
mfvi_results = pd.DataFrame({'lam_sl': {0: 0.0},
 'acc_mean': {0: 0.8082399964332581},
 'acc_err': {0: 0.0018123474110814655},
 'ece_mean': {0: 0.04666510298848152},
 'ece_err': {0: 0.005045091822872807},
 'f1_mean': {0: 0.808240008354187},
 'f1_err': {0: 0.0018123547451123277},
 'loss_mean': {0: 0.8273083090782165},
 'loss_err': {0: 0.030945931769120333}})

In [None]:
for corr_str in agg_results.keys():
    fig, ax = plt.subplots(1, 2, figsize=(24, 6))

    for ds_size, df_agg in agg_results[corr_str].items():

#         df_agg = df_agg[df_agg.lam_sl < 0.01]

        _ax = ax[0]
        _ax.plot(df_agg.lam_sl, df_agg.acc_mean, label=ds_size)
        _ax.fill_between(df_agg.lam_sl, df_agg.acc_mean + df_agg.acc_err, df_agg.acc_mean - df_agg.acc_err, alpha=0.5)
        _ax.hlines(mfvi_results.acc_mean, df_agg.lam_sl.min(), df_agg.lam_sl.max(), 
                    ls=':', label="MFVI")
        _ax.set_xscale('log')
        _ax.set_title('Accuracy')

        _ax = ax[1]
        _ax.plot(df_agg.lam_sl, df_agg.ece_mean, label=ds_size)
        _ax.fill_between(df_agg.lam_sl, df_agg.ece_mean + df_agg.ece_err, df_agg.ece_mean - df_agg.ece_err, alpha=0.5)
        _ax.hlines(mfvi_results.ece_mean, df_agg.lam_sl.min(), df_agg.lam_sl.max(),
                    ls=':', label="MFVI")
        _ax.set_xscale('log')
        _ax.set_yscale('log')
        _ax.set_title('ECE')

        # _ax = ax[2]
        # _ax.plot(df_agg.lam_sl, df_agg.f1_mean, label=ds_size)
        # _ax.fill_between(df_agg.lam_sl, df_agg.f1_mean + df_agg.f1_err, df_agg.f1_mean - df_agg.f1_err, alpha=0.5)
        # _ax.set_xscale('log')
        # _ax.set_title('F1')

        # _ax = ax[3]
        # _ax.plot(df_agg.lam_sl, df_agg.loss_mean, label=ds_size)
        # _ax.fill_between(df_agg.lam_sl, df_agg.loss_mean + df_agg.loss_err, df_agg.loss_mean - df_agg.loss_err, alpha=0.5)
        # _ax.set_xscale('log')
        # _ax.set_title('Loss')

    for _ax in ax:
        _ax.legend()

    plt.suptitle("Corruption - {}".format(corr_str))

## Normalized to first datapoint for each dataset size

In [None]:
for corr_str in agg_results.keys():
    fig, ax = plt.subplots(1, 2, figsize=(24, 6))

    for ds_size, df_agg in agg_results[corr_str].items():

#         df_agg = df_agg[df_agg.lam_sl < 0.01]

        _ax = ax[0]
        y = df_agg.acc_mean / mfvi_results.acc_mean.values[0]
        _ax.plot(df_agg.lam_sl, y, label=ds_size, marker='o')
#         _ax.fill_between(df_agg.lam_sl, df_agg.acc_mean + df_agg.acc_err, df_agg.acc_mean - df_agg.acc_err, alpha=0.5)
        _ax.set_xscale('log')
        _ax.set_ylabel('Normalized Accuracy')
        _ax.set_title('Accuracy')

        _ax = ax[1]
        y = df_agg.ece_mean / mfvi_results.ece_mean.values[0]
        _ax.plot(df_agg.lam_sl, y, label=ds_size, marker='o')
#         _ax.fill_between(df_agg.lam_sl, df_agg.ece_mean + df_agg.ece_err, df_agg.ece_mean - df_agg.ece_err, alpha=0.5)
        _ax.set_xscale('log')
        _ax.set_yscale('log')
        _ax.set_ylabel('Normalized ECE')
        _ax.set_title('ECE')

        # _ax = ax[2]
        # _ax.plot(df_agg.lam_sl, df_agg.f1_mean, label=ds_size)
        # _ax.fill_between(df_agg.lam_sl, df_agg.f1_mean + df_agg.f1_err, df_agg.f1_mean - df_agg.f1_err, alpha=0.5)
        # _ax.set_xscale('log')
        # _ax.set_title('F1')

        # _ax = ax[3]
        # _ax.plot(df_agg.lam_sl, df_agg.loss_mean, label=ds_size)
        # _ax.fill_between(df_agg.lam_sl, df_agg.loss_mean + df_agg.loss_err, df_agg.loss_mean - df_agg.loss_err, alpha=0.5)
        # _ax.set_xscale('log')
        # _ax.set_title('Loss')

    for _ax in ax:
        _ax.legend()

    plt.suptitle("Corruption - {}".format(corr_str))

Print latex table

In [None]:
def print_latex_table(df_agg):
    for cor, r in df_agg.items():
        for ds, df in r.items():
            print(cor, ds)
            
            str = ''
            str = " & ".join('$10^{{{}}}$'.format(int(np.log10(v))) for v in df.lam_sl.values)
            print(str)
            
            # Print acc
            str = []
            for m, v in zip(df.acc_mean.values, df.acc_err.values):
                str.append('${:.3f}\pm{:.3f}$'.format(m, v))
            str = ' & '.join(str)
            print(str)
            
            # Print ECE
            str = []
            for m, v in zip(df.ece_mean.values, df.ece_err.values):
                str.append('${:.3f}\pm{:.3f}$'.format(m, v))
            str = ' & '.join(str)
            print(str)
        print("------")
            
            

In [None]:
print_latex_table(agg_results)

In [None]:
def print_latex_table(df_agg):
    for cor, r in df_agg.items():
        for ds, df in r.items():
            print(cor, ds)
            for _, v in df.iterrows():
                buff = []
                
                buff.append("$10^{{{}}}$".format(int(np.log10(v.lam_sl))))
                buff.append('${:.3f}\pm{:.3f}$'.format(v.acc_mean, v.acc_err))
                buff.append('${:.3f}\pm{:.3f}$'.format(v.ece_mean, v.ece_err))
                
                buff = ' & '.join(buff) + ' \\\\'
                print(buff)
        print("--------")
            

In [None]:
print_latex_table(agg_results)