In [1]:
import sys, os; sys.path.append(os.path.abspath("../"));
from importlib import reload
import pickle

import matplotlib.pyplot as plt
import pandas as pd, numpy as np, torch
import seaborn as sns

import _settings
from _settings import CIFAR10_NAME, CIFAR100_NAME, SVHN_NAME, IIICSup_NAME, ECG_NAME, ISRUC_NAME
import utils.utils as utils
import persist_to_disk as ptd
ptd.config.set_project_path(os.path.abspath("../"))
import pipeline.main
import pipeline.evaluate as peval
import data.dataloader as dld


%matplotlib inline

NBINS = 20

In [2]:
DATASET = CIFAR10_NAME
DNN = 'ViT'
suffix = '' # '-pat' or '' for healthcare datasets
NCLASS = dld.get_nclasses(DATASET)

In [4]:
fit_bw_Fold = 20 # number of folds in cross-validation for bandwidth selection 
if DATASET in {CIFAR10_NAME, CIFAR100_NAME, SVHN_NAME}:
    _make_split = lambda seed: seed
    datakwargs={'resize_for_pretrained_model': True}
else:
    if suffix == '-pat':
        if DATASET == _settings.IIICSup_NAME:
            _make_split = lambda seed: {"seed": seed, 'val': 20, 'test': 80}
            datakwargs={'majority_only': True}
        else:
            assert DATASET == _settings.ISRUC_NAME
            _make_split = lambda seed: {"seed": seed, 'val': 6, 'test': 24}
            fit_bw_Fold = 6 # this is 6 only for ISRUC-pat as it contains only 6 patients in the calibration set.
            datakwargs={}
    else:
        assert suffix == ''
        _make_split = lambda seed: {"seed": seed, 'val': 5, 'test': 95}
        if DATASET == _settings.IIICSup_NAME:
            datakwargs={'majority_only': True, 'iid': True}
        else:
            datakwargs={'iid': True}

In [3]:
# Update the key and trained key for the base DNN / kernel
key = _settings._TRAINED_KEYS[(DATASET, DNN)]
kernelkey = _settings._KERNEL_KEYS[(DATASET, DNN)]

# Uncalibrated

In [5]:
uncal_res = {}
for seed in range(10):
    tP_, tY_ = pipeline.main.get_calibrated_preds(key, DATASET, dld.VALID, dld.TEST, datakwargs, baseline='uncal', resplit_seed=_make_split(seed))
    uncal_res[seed] = peval.routine(tP_, tY_, nbins=NBINS)
pd.DataFrame(uncal_res).T.describe().iloc[1:3]

Unnamed: 0,ece,ece_adapt,acc,cecet,cecet_adapt,brier_top1,brier,SKCE,KCE,NLLTorch
mean,9.168949,9.152733,98.94,3.42039,3.193899,0.017551,0.002708,0.122841,9.178768,0.124184
std,0.046673,0.048856,0.052915,0.013774,0.008351,0.000342,7.3e-05,0.006669,0.046179,0.001407


# KCal

In [6]:
tkcalp_res = {}
for seed in range(10):
    res_df_ = pipeline.main.get_calibrated_preds(key, DATASET,  datakwargs=datakwargs,
                                                kernel_name='trained', kernel_kwargs={"key": kernelkey, 'dataset': DATASET},
                                                proj_name='trained', proj_kwargs={"key": kernelkey, 'dataset': DATASET},
                                                cal_kwargs={'fit_bw_Fold': fit_bw_Fold, 'fit_loss': 'log'},
                                                resplit_seed=_make_split(seed),
                                               )
    tP_kercal = res_df_.iloc[:, :NCLASS].values
    tkcalp_res[seed] = peval.routine(tP_kercal, tY = res_df_['y'].values, nbins=NBINS)
pd.DataFrame(tkcalp_res).T.describe().iloc[1:3]

Unnamed: 0,ece,ece_adapt,acc,cecet,cecet_adapt,brier_top1,brier,SKCE,KCE,NLLTorch
mean,0.408738,0.396804,98.984,1.184326,0.737753,0.007497,0.001543,0.149565,0.770383,0.033477
std,0.10423,0.053051,0.087458,0.077218,0.071671,0.000494,0.000105,0.007519,0.073389,0.001874
