# Extract OOD test results

In [1]:
import sys
sys.path.append("./../")

In [2]:
import os
import json
import glob
import pickle

import numpy as np
import pandas as pd

In [3]:
def extract_results(model_dir):
    """
        Get OOD metrics from model dir
    """

    # Get config
    config_json = os.path.join(model_dir, 'config.json')
    config = json.load(open(config_json, 'r'))
    
    # Extract config values
    method = config['method']
    lam_sl = config['method_params'].get('lam_sl', 0.0)
    ds_size = config['ds_params'].get('size', 'Full')
    
    
    results = None
    
    # Get OOD result files
    ood_result_files = glob.glob(model_dir + "/ece_results_*.pkl")
    
    # Get results
    for rfile in ood_result_files:
        filename = os.path.basename(rfile)
        # Get corruption name from file name
        corr_name = ' '.join(filename.split('_')[2:])[:-4]
        with open(rfile, 'rb') as f:
            logs = pickle.load(f)[0]
            r = {
                'method': method,
                'lam_sl': lam_sl,
                'ds_size': ds_size,
                'corruption': corr_name,
                'ece': logs['ece_uncal'],
                'acc': logs['acc']
            }
            
            if results is not None:
                results.append(r)
            else:
                results = [r]
    
    return results

## Model dirs

In [4]:
# LeNet + 1000
# models_root = "./../zoo/abl-alpha100-uniform-lenet/BinaryMNISTC-1000-53-identity/LeNet"
# elbo_models_root = "./../zoo/bmnist53-mfvi/BinaryMNISTC-1000-53-identity/LeNet"
elbo_models_root = "./../zoo/bmnist53-ls/BinaryMNISTC-1000-53-identity/LeNet/"

# # LeNet + 10000
# models_root = "./../zoo/abl-alpha100-uniform-lenet/BinaryMNISTC-10000-53-identity/LeNet"
# elbo_models_root = "./../zoo/bmnist53-mfvi/BinaryMNISTC-10000-53-identity/LeNet"

# # ConvNet + 1000
# models_root = "./../zoo/abl-alpha100-uniform-convnet/BinaryMNISTC-1000-53-identity/ConvNet"
# elbo_models_root = "./../zoo/bmnist53-mfvi/BinaryMNISTC-1000-53-identity/ConvNet"

# # ConvNet + 10000
# models_root = "./../zoo/abl-alpha100-uniform-convnet/BinaryMNISTC-10000-53-identity/ConvNet"
# elbo_models_root = "./../zoo/bmnist53-mfvi/BinaryMNISTC-10000-53-identity/ConvNet"

## S-ELBO results

In [5]:
model_dirs = list(map(lambda d: os.path.join(models_root, d), os.listdir(models_root)))

NameError: name 'models_root' is not defined

In [6]:
results = []
for _m in model_dirs:
    results.extend(extract_results(_m))

NameError: name 'model_dirs' is not defined

## EBLO results

In [7]:
model_dirs = list(map(lambda d: os.path.join(elbo_models_root, d), os.listdir(elbo_models_root)))

In [8]:
for _m in model_dirs:
    print(_m)
    results.extend(extract_results(_m))
df_results = pd.DataFrame(results)

./../zoo/bmnist53-ls/BinaryMNISTC-1000-53-identity/LeNet/ls-sz1000-4-20220731135504
./../zoo/bmnist53-ls/BinaryMNISTC-1000-53-identity/LeNet/ls-sz1000-5-20220731140041
./../zoo/bmnist53-ls/BinaryMNISTC-1000-53-identity/LeNet/ls-sz1000-3-20220731134728
./../zoo/bmnist53-ls/BinaryMNISTC-1000-53-identity/LeNet/ls-sz1000-2-20220731133423
./../zoo/bmnist53-ls/BinaryMNISTC-1000-53-identity/LeNet/ls-sz1000-1-20220731132131


In [9]:
df_results

Unnamed: 0,method,lam_sl,ds_size,corruption,ece,acc
0,ls,0.0,1000,impulse noise,0.010152,0.987382
1,ls,0.0,1000,stripe,0.059883,0.927445
2,ls,0.0,1000,glass blur,0.021346,0.972135
3,ls,0.0,1000,fog,0.047986,0.984753
4,ls,0.0,1000,motion blur,0.015569,0.975815
...,...,...,...,...,...,...
75,ls,0.0,1000,dotted line,0.010035,0.987907
76,ls,0.0,1000,shear,0.027581,0.967928
77,ls,0.0,1000,spatter,0.012282,0.986330
78,ls,0.0,1000,brightness,0.009223,0.988959


In [10]:
metrics_summ = df_results.groupby('lam_sl').agg(
    n = pd.NamedAgg(column='acc', aggfunc='count'),
    acc_mean = pd.NamedAgg(column='acc', aggfunc='mean'),
    acc_err = pd.NamedAgg(column='acc', aggfunc=lambda x: np.std(x) / np.sqrt(x.shape[0])),
    ece_mean = pd.NamedAgg(column='ece', aggfunc='mean'),
    ece_err = pd.NamedAgg(column='ece', aggfunc=lambda x: np.std(x) / np.sqrt(x.shape[0])),
)

In [11]:
gdf_corr = df_results.groupby('corruption')

In [12]:
rdfs = []
for k, _df in gdf_corr:
#     r1 = _df.groupby('lam_sl').agg({'corruption': 'first','ece': 'mean'}).sort_values(by='ece').reset_index()
    r1 = _df.groupby('lam_sl').agg({'corruption': 'first', 'acc': 'mean', 'ece': 'mean'}).reset_index()
    r1['ece_rank'] = r1.ece.rank(ascending=True)
    r1['acc_rank'] = r1.acc.rank(ascending=False)
    rdfs.append(r1)

df_ranked = pd.concat(rdfs)
# df_ranked.reset_index(inplace=True)


In [13]:
metrics_summ

Unnamed: 0_level_0,n,acc_mean,acc_err,ece_mean,ece_err
lam_sl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,80,0.955054,0.007182,0.038909,0.006679


In [14]:
df_rank_results = df_ranked.groupby('lam_sl').agg(
    ece_rank_mean = pd.NamedAgg(column='ece_rank', aggfunc='mean'),
    ece_rank_err = pd.NamedAgg(column='ece_rank', aggfunc=lambda x: np.std(x) / np.sqrt(x.shape[0])),
    acc_rank_mean = pd.NamedAgg(column='acc_rank', aggfunc='mean'),
    acc_rank_err = pd.NamedAgg(column='acc_rank', aggfunc=lambda x: np.std(x) / np.sqrt(x.shape[0]))
)

In [15]:
df_final = metrics_summ.merge(df_rank_results, on='lam_sl')

## Printout final results

In [16]:
df_final

Unnamed: 0_level_0,n,acc_mean,acc_err,ece_mean,ece_err,ece_rank_mean,ece_rank_err,acc_rank_mean,acc_rank_err
lam_sl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,80,0.955054,0.007182,0.038909,0.006679,1.0,0.0,1.0,0.0


Print out latex table

In [17]:
for row in df_final.itertuples():
    print(
#         "${:.0e}$".format(row.Index),
        "& ${:.3f} \pm {:.3f}$".format(row.acc_mean, row.acc_err),
        # "& ${:.2f} \pm {:.2f}$".format(row.acc_rank_mean, row.acc_rank_err),
        "& ${:.2f}$".format(row.acc_rank_mean),
        "& ${:.3f} \pm {:.3f}$".format(row.ece_mean, row.ece_err),
#         "& ${:.2f} \pm {:.2f}$".format(row.ece_rank_mean, row.ece_rank_err)
        "& ${:.2f}$".format(row.ece_rank_mean)
    )

& $0.955 \pm 0.007$ & $1.00$ & $0.039 \pm 0.007$ & $1.00$
