# Extract OOD test results

In [1]:
import sys
sys.path.append("./../")

In [2]:
import os
import json
import glob
import pickle

import numpy as np
import pandas as pd

from tbparse import SummaryReader
import matplotlib.pyplot as plt
plt.style.use('seaborn')

2022-05-14 12:06:58.524292: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-14 12:06:58.524364: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
def extract_results(model_dir):
    """
        Get OOD metrics from model dir
    """

    # Get config
    config_json = os.path.join(model_dir, 'config.json')
    config = json.load(open(config_json, 'r'))
    
    # Extract config values
    method = config['method']
    lam_sl = config['method_params'].get('lam_sl', 0.0)
    ds_size = config['ds_params'].get('size', 'Full')
    
    
    results = None
    
    # Get OOD result files
    ood_result_files = glob.glob(model_dir + "/ece_results_*.pkl")
    
    # Get results
    for rfile in ood_result_files:
        filename = os.path.basename(rfile)
        # Get corruption name from file name
        corr_name = ' '.join(filename.split('_')[2:])[:-4]
        with open(rfile, 'rb') as f:
            logs = pickle.load(f)[0]
            r = {
                'method': method,
                'lam_sl': lam_sl,
                'ds_size': ds_size,
                'corruption': corr_name,
                'ece': logs['ece_uncal'],
                'acc': logs['acc']
            }
            
            if results is not None:
                results.append(r)
            else:
                results = [r]
    
    return results

## Model dirs

In [4]:
models_root = "./../zoo/abl-alpha100-uniform-lenet/BinaryMNISTC-1000-53-identity/LeNet"
elbo_models_root = "./../zoo/bmnist53-mfvi/BinaryMNISTC-1000-53-identity/LeNet"

## S-ELBO results

In [5]:
model_dirs = list(map(lambda d: os.path.join(models_root, d), os.listdir(models_root)))

In [6]:
results = []
for _m in model_dirs:
    results.extend(extract_results(_m))

## EBLO results

In [7]:
model_dirs = list(map(lambda d: os.path.join(elbo_models_root, d), os.listdir(elbo_models_root)))

In [8]:
for _m in model_dirs:
    results.extend(extract_results(_m))
df_results = pd.DataFrame(results)

In [9]:
df_results

Unnamed: 0,method,lam_sl,ds_size,corruption,ece,acc
0,sl,0.00001,1000,impulse noise,0.010992,0.978970
1,sl,0.00001,1000,stripe,0.007142,0.986330
2,sl,0.00001,1000,glass blur,0.008654,0.971083
3,sl,0.00001,1000,fog,0.176194,0.985279
4,sl,0.00001,1000,motion blur,0.021237,0.960568
...,...,...,...,...,...,...
635,mfvi,0.00000,1000,dotted line,0.007210,0.988433
636,mfvi,0.00000,1000,shear,0.008604,0.971609
637,mfvi,0.00000,1000,spatter,0.005627,0.986856
638,mfvi,0.00000,1000,brightness,0.048618,0.984227


In [10]:
metrics_summ = df_results.groupby('lam_sl').agg(
    n = pd.NamedAgg(column='acc', aggfunc='count'),
    acc_mean = pd.NamedAgg(column='acc', aggfunc='mean'),
    acc_err = pd.NamedAgg(column='acc', aggfunc=lambda x: np.std(x) / np.sqrt(x.shape[0])),
    ece_mean = pd.NamedAgg(column='ece', aggfunc='mean'),
    ece_err = pd.NamedAgg(column='ece', aggfunc=lambda x: np.std(x) / np.sqrt(x.shape[0])),
)

In [11]:
gdf_corr = df_results.groupby('corruption')

In [12]:
rdfs = []
for k, _df in gdf_corr:
#     r1 = _df.groupby('lam_sl').agg({'corruption': 'first','ece': 'mean'}).sort_values(by='ece').reset_index()
    r1 = _df.groupby('lam_sl').agg({'corruption': 'first','ece': 'mean'}).reset_index()
    r1['rank'] = r1.ece.rank()
    rdfs.append(r1)

df_ranked = pd.concat(rdfs)
# df_ranked.reset_index(inplace=True)


In [13]:
metrics_summ

Unnamed: 0_level_0,n,acc_mean,acc_err,ece_mean,ece_err
lam_sl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,80,0.955402,0.007034,0.039848,0.00685
1e-06,80,0.956677,0.006728,0.041804,0.006339
1e-05,80,0.958511,0.00676,0.034882,0.006245
0.0001,80,0.956217,0.007054,0.035206,0.006181
0.001,80,0.949974,0.007288,0.068309,0.007376
0.01,80,0.944013,0.007323,0.166911,0.007886
0.1,80,0.925572,0.008871,0.271303,0.008639
1.0,80,0.863275,0.012229,0.247753,0.008362


In [14]:
df_rank_results = df_ranked.groupby('lam_sl').agg(
    ece_mean = pd.NamedAgg(column='ece', aggfunc='mean'),
    rank_mean = pd.NamedAgg(column='rank', aggfunc='mean'),
    rank_err = pd.NamedAgg(column='rank', aggfunc=lambda x: np.std(x) / np.sqrt(x.shape[0]))
)

In [15]:
df_final = metrics_summ.merge(df_rank_results, on='lam_sl')

## Printout final results

In [16]:
df_final

Unnamed: 0_level_0,n,acc_mean,acc_err,ece_mean_x,ece_err,ece_mean_y,rank_mean,rank_err
lam_sl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,80,0.955402,0.007034,0.039848,0.00685,0.039848,2.9375,0.271983
1e-06,80,0.956677,0.006728,0.041804,0.006339,0.041804,3.9375,0.16462
1e-05,80,0.958511,0.00676,0.034882,0.006245,0.034882,2.5,0.330719
0.0001,80,0.956217,0.007054,0.035206,0.006181,0.035206,1.875,0.449609
0.001,80,0.949974,0.007288,0.068309,0.007376,0.068309,4.75,0.257694
0.01,80,0.944013,0.007323,0.166911,0.007886,0.166911,5.875,0.195156
0.1,80,0.925572,0.008871,0.271303,0.008639,0.271303,7.125,0.35217
1.0,80,0.863275,0.012229,0.247753,0.008362,0.247753,7.0,0.433013
