In [1]:
import sys
sys.path.append("./../")

In [2]:
import os
import json
import glob

import numpy as np
import pandas as pd

from tbparse import SummaryReader
import matplotlib.pyplot as plt
plt.style.use('seaborn')

2022-04-30 16:03:53.686449: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-30 16:03:53.686505: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
def extract_results(model_dir):
    """
        Get metrics from a give model folder
    """
    
    # Get config
    config_json = os.path.join(model_dir, 'config.json')
    config = json.load(open(config_json, 'r'))
    
    # Get test events file
    tblog_dir = os.path.join(model_dir, "tblog")
    events_file = glob.glob(tblog_dir + "/events.out.tfevents*.1") # *.1 will be from test
    if len(events_file) > 0:
        events_file = events_file[0]
    else:
        print("Skipping", model_dir)
        return None
    tb_log = SummaryReader(events_file, pivot=True).scalars
    
    # Extract key points
    lam_sl = config['method_params']['lam_sl']
    ds_size = config['ds_params']['size']
    ds_corr = config['ds_params']['corruption']
    acc = tb_log.test_acc.values[0]
    ece = tb_log.test_ece.values[0]
    f1 = tb_log.test_f1.values[0]
    loss = tb_log.test_loss.values[0]
    
    return {
        'lam_sl': lam_sl,
        'ds_size': ds_size,
        'ds_corr': ds_corr,
        'acc': acc,
        'ece': ece,
        'f1': f1,
        'loss': loss
    }

In [8]:
def extract_best_results(model_dir):
    # Get config
    config_json = os.path.join(model_dir, 'config.json')
    config = json.load(open(config_json, 'r'))
    
    # Get test events file
    tblog_dir = os.path.join(model_dir, "tblog")
    events_file = glob.glob(tblog_dir + "/events.out.tfevents*.0") # *.1 will be from test
    if len(events_file) > 0:
        events_file = events_file[0]
    else:
        print("Skipping", model_dir)
        return None
    tb_log = SummaryReader(events_file, pivot=True).scalars
    
    # Get the best point in term of val_f1 (dont look at ECE)
    try:
        tb_log = tb_log[tb_log.val_f1 == tb_log.val_f1.max()]
    except:
        print(model_dir)
        return None
    
    # Extract key points
    lam_sl = config['method_params'].get('lam_sl', 0.0)
    ds_size = config['ds_params'].get('size', 'Full')
    ds_corr = config['ds_params'].get('corruption', 'identity')
    acc = tb_log.val_acc.values[0]
    ece = tb_log.val_ece.values[0]
    f1 = tb_log.val_f1.values[0]
    loss = tb_log.val_loss.values[0]
    
    return {
        'lam_sl': lam_sl,
        'ds_size': ds_size,
        'ds_corr': ds_corr,
        'acc': acc,
        'ece': ece,
        'f1': f1,
        'loss': loss
    }

In [9]:
# model_str = "LeNet"
model_str = "VGG11"

In [10]:
# rootdir = "./../zoo/abl-alpha100-uniform/"
rootdir = "./../zoo/abl-alpha100-unibin-mfvi/"
modeldirs = []
for expdir in map(lambda x: os.path.join(rootdir, x), os.listdir(rootdir)):
    _modeldirs = os.listdir(os.path.join(expdir, model_str))
    modeldirs.extend(map(lambda x: os.path.join(expdir, model_str, x), _modeldirs))

In [11]:
# df_results = pd.DataFrame(filter(None, [extract_results(mdir) for mdir in modeldirs]))
df_results = pd.DataFrame(filter(None, [extract_best_results(mdir) for mdir in modeldirs]))

In [12]:
df_results

Unnamed: 0,lam_sl,ds_size,ds_corr,acc,ece,f1,loss
0,0.0,Full,identity,0.8123,0.037324,0.8123,0.731209
1,0.0,Full,identity,0.8127,0.052904,0.8127,0.866989
2,0.0,Full,identity,0.807,0.037957,0.807,0.792517
3,0.0,Full,identity,0.8038,0.041678,0.8038,0.834357
4,0.0,Full,identity,0.8054,0.063463,0.8054,0.911471


In [13]:
agg_results = {}

gdf_corr = df_results.groupby('ds_corr')
for corr_str, gdf1 in gdf_corr:
    agg_results[corr_str] = {}
    gdf_ds = gdf1.groupby('ds_size')
    
    for ds_size, gdf2 in gdf_ds:
        gdf_lam = gdf2.groupby('lam_sl')
        
        r = []
        for lam_sl, _df in gdf_lam:
            n = np.sqrt(_df.shape[0])

            d = {
                'lam_sl': lam_sl,
                'acc_mean': _df.acc.mean(),
                'acc_err': _df.acc.std() / n,
                'ece_mean': _df.ece.mean(),
                'ece_err': _df.ece.std() / n,
                'f1_mean': _df.f1.mean(),
                'f1_err': _df.f1.std() / n,
                'loss_mean': _df.loss.mean(),
                'loss_err': _df.loss.std() / n,
            }
            
            r.append(d)
        agg_results[corr_str][ds_size] = pd.DataFrame(r)

In [13]:
# agg_results = []

# gdf = df_results.groupby('lam_sl')
# for lam_sl, _df in gdf:
#     n = np.sqrt(_df.shape[0])
    
#     d = {
#         'lam_sl': lam_sl,
#         'acc_mean': _df.acc.mean(),
#         'acc_err': _df.acc.std() / n,
#         'ece_mean': _df.ece.mean(),
#         'ece_err': _df.ece.std() / n,
#         'f1_mean': _df.f1.mean(),
#         'f1_err': _df.f1.std() / n,
#         'loss_mean': _df.loss.mean(),
#         'loss_err': _df.loss.std() / n,
#     }
    
#     agg_results.append(d)

# df_agg = pd.DataFrame(agg_results)

In [16]:
agg_results['identity']['Full'].to_dict()

{'lam_sl': {0: 0.0},
 'acc_mean': {0: 0.8082399964332581},
 'acc_err': {0: 0.0018123474110814655},
 'ece_mean': {0: 0.04666510298848152},
 'ece_err': {0: 0.005045091822872807},
 'f1_mean': {0: 0.808240008354187},
 'f1_err': {0: 0.0018123547451123277},
 'loss_mean': {0: 0.8273083090782165},
 'loss_err': {0: 0.030945931769120333}}