In [1]:
import sys, os; sys.path.append(os.path.abspath("../"));
from importlib import reload
import pandas as pd, numpy as np, torch
import matplotlib.pyplot as plt

import utils.utils as utils
import persist_to_disk as ptd; ptd.config.set_project_path(os.path.abspath("../"))
import pipeline.main
import pipeline.evaluate as peval
import data.dataloader as dld
import _settings

DATASET = _settings.ImageNet1K_NAME
NCLASS = 1000
NBINS = 20
DNN = 'Inception'
datakwargs={'resize_for_pretrained_model': True}
%matplotlib inline

key = 'inception_resnet_v2'
_make_split = lambda seed: seed

trained_key = 'ProjectionTrainer-20220113_025401Skip-ELU'

metrics = ['acc', 'ece_adapt', 'cecet_adapt', 'brier_top1', 'ece', 'cecet', 'KCE']

# Un-calibrated

In [5]:
uncal_res = {}
for seed in range(10):
    tP_, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, baseline='uncal', resplit_seed=_make_split(seed))
    uncal_res[seed] = peval.routine(tP_, tY_, nbins=NBINS)
pd.DataFrame(uncal_res).T.describe().iloc[1:3].reindex(columns=metrics)

Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,80.444,3.208745,2.956612,0.111465,3.063977,3.704882,3.117072
std,0.241698,0.152408,0.024836,0.001361,0.128136,0.02559,0.117706


# Temperature Scaling

In [6]:
tscale_res = {}
for seed in range(10):
    tP_, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, baseline='ts', resplit_seed=_make_split(seed))
    tscale_res[seed] = peval.routine(tP_, tY_, nbins=NBINS)
pd.DataFrame(tscale_res).T.describe().iloc[1:3].reindex(columns=metrics)

Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,80.444,3.515509,3.251207,0.112008,3.259488,3.988511,3.309896
std,0.241698,0.131307,0.067915,0.001486,0.134696,0.067078,0.128743


# IMAX

In [8]:

imax_res = {}
for seed in range(10):
    tP_imax, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, 
                                                              resplit_seed=_make_split(seed), baseline='imax')
    imax_res[seed] = peval.routine(tP_imax, tY_)
pd.DataFrame(imax_res).T.describe().iloc[1:3].reindex(columns=metrics)

Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,80.3428,7.968129,2.815225,0.119262,8.050602,3.289669,7.314129
std,0.27725,0.353495,0.19128,0.001823,0.324979,0.207192,0.341781


# KCal

In [9]:
tkcalp_res = {}
for seed in range(10):
    res_df_ = pipeline.main.get_calibrated_preds(key, DATASET,  datakwargs=datakwargs,
                                                 kernel_name='trained', kernel_kwargs={"key": trained_key, 'dataset': DATASET},
                                                 proj_name='trained', proj_kwargs={"key": trained_key, 'dataset': DATASET},
                                                 cal_kwargs={'fit_bw_Fold': 20, 'fit_loss': 'log'},
                                                 resplit_seed=_make_split(seed), 
                                               )
    tP_kercal = res_df_.iloc[:, :NCLASS].values
    tkcalp_res[seed] = peval.routine(tP_kercal, tY = res_df_['y'].values)
pd.DataFrame(tkcalp_res).T.describe().iloc[1:3].reindex(columns=metrics)

Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,79.6372,1.426648,1.938358,0.111354,1.303055,2.401503,1.689666
std,0.240237,0.339148,0.044391,0.000968,0.278238,0.03701,0.327941


## Other baselines 
Requires the relevant official code.

In [7]:
#Dir Cal
reload(pipeline.main)
dircal_res = {}
for seed in range(10):
    tP_, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, baseline='dircal', resplit_seed=_make_split(seed))
    dircal_res[seed] = peval.routine(tP_, tY_, nbins=NBINS)
pd.DataFrame(dircal_res).T.describe().iloc[1:3].reindex(columns=metrics)

Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,79.55,4.297083,5.601277,0.120317,4.263899,6.111681,4.231659
std,0.244355,0.678765,0.23117,0.002142,0.743292,0.219505,0.715662


In [2]:
import pickle
_res_dir = os.path.join(_settings.WORKSPACE, '_spline_cal')
spline_res = {}
for seed in range(10):
    _, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, baseline='uncal', resplit_seed=_make_split(seed))
    tP_ = pickle.load(open(os.path.join(_res_dir, f"{DATASET}-{DNN}_{seed}.p"), 'rb'))
    tP_ = tP_ / tP_.sum(1)[:, np.newaxis]
    spline_res[seed] = peval.routine(tP_, tY_, nbins=NBINS)
pd.DataFrame(spline_res).T.describe().iloc[1:3].reindex(columns=metrics)

gen output on the fly:   0%|          | 0/782 [00:00<?, ?it/s]

inception_resnet_v2 64 val -1


gen output on the fly: 100%|██████████| 782/782 [00:00<00:00, 1841.15it/s]


Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,80.2212,1.097821,2.17216,0.106796,1.129224,2.803959,1.218672
std,0.269294,0.204781,0.062903,0.001253,0.151107,0.073033,0.144816


In [3]:
import pickle
from scipy.special import softmax
_res_dir = os.path.join(_settings.WORKSPACE, '_iop_cal')
iop_res = {}
for seed in range(10):
    _, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, baseline='uncal', resplit_seed=_make_split(seed))
    tS_ = pickle.load(open(os.path.join(_res_dir, f"{DATASET}-{DNN}_{seed}.p"), 'rb'))
    #print(tP_.sum(1).max(), tP_.sum(1).min())
    tP_ = softmax(tS_, 1)
    iop_res[seed] = peval.routine(tP_, tY_, nbins=NBINS)
pd.DataFrame(iop_res).T.describe().iloc[1:3].reindex(columns=metrics)

Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,80.444,1.312066,2.296213,0.106944,1.376053,2.925208,1.447235
std,0.241698,0.470618,0.136683,0.001326,0.464009,0.163021,0.401803


In [4]:
import pickle
from scipy.special import softmax
_res_dir = os.path.join(_settings.WORKSPACE, '_gp_cal')
gp_res = {}
for seed in range(10):
    _, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, baseline='uncal', resplit_seed=_make_split(seed))
    tP_ = pickle.load(open(os.path.join(_res_dir, f"{DATASET}-{DNN}_{seed}.p"), 'rb'))
    gp_res[seed] = peval.routine(tP_, tY_, nbins=NBINS)
pd.DataFrame(gp_res).T.describe().iloc[1:3].reindex(columns=metrics)

Unnamed: 0,acc,ece_adapt,cecet_adapt,brier_top1,ece,cecet,KCE
mean,80.4424,0.871503,2.417215,0.106725,0.945057,3.045649,1.093771
std,0.239059,0.123274,0.059628,0.00123,0.164667,0.077953,0.140132
