In [1]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from utils.training import numpy_metrics

In [3]:
def softmax(x, axis=1):
    return np.exp(x) / np.sum(np.exp(x),axis=axis, keepdims=True)

In [4]:
def ece(outputs, labels, name="", bins=np.arange(0.1, 1.05, 0.05), print_name=False):

    if outputs is None:
        ece = None
    else:
        confidences = np.max(outputs, 1)
        predictions = np.argmax(outputs,1)
        bin_lowers = bins[:-1]
        bin_uppers = bins[1:]
        
        accuracies = predictions == labels

        #ece = Variable(torch.zeros(1)).type_as(confidences)
        ece = 0.0
        for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
            # Calculated |confidence - accuracy| in each bin
            in_bin = (confidences > bin_lower) * (confidences < bin_upper)
            prop_in_bin = in_bin.mean()
            if prop_in_bin > 0:
                accuracy_in_bin = accuracies[in_bin].mean()
                avg_confidence_in_bin = confidences[in_bin].mean()
                ece += np.abs(avg_confidence_in_bin-accuracy_in_bin) * prop_in_bin
    if print_name:
        print(name, end=": ")
    if ece is not None:
        print("%.3f"%ece)
    else:
        print("-")
    return ece

In [5]:
dropout_locs = ['test_preds/drop_v2/preds.npz', 'test_preds/drop_v3/preds.npz']
swagdiag_locs = ['test_preds/swag_diag/preds.npz', 'test_preds/swag_diag_v2/preds.npz', 
                 'test_preds/swag_diag_v3/preds.npz']
swag_locs = ['test_preds/swag_scale_05/preds.npz', 'test_preds/swag_scale_05_v2/preds.npz', 
             'test_preds/swag_scale_05_v3/preds.npz']
swa_locs = ['test_preds/sgd/swa_preds.npz', 'test_preds/sgd/swa_1000_v2.npz', 'test_preds/sgd/swa_1000_v3.npz']
sgd_locs = ['test_preds/sgd/sgd_preds.npz', 'test_preds/sgd/sgd_1000_v2.npz', 'test_preds/sgd/sgd_1000_v3.npz']


In [12]:
def compute_mIOU_acc(loc, output_probs = True):
    npz_arr = np.load(loc)
    
    if output_probs:
        pred_probs = npz_arr['predictions']
    else:
        pred_probs = softmax(npz_arr['preds'],1)
    targets = npz_arr['targets']
    
    I, U, acc = numpy_metrics(pred_probs, targets)
    mIOU = np.mean(I / U)
    
    ece_val = ece(pred_probs, targets, print_name=False)
    print(acc, mIOU, ece_val, loc)
    return acc, mIOU, ece_val
    

In [13]:
for d in dropout_locs:
    compute_mIOU_acc(d)

0.040
0.8405242312840742 0.5430310970106214 0.040217998904142724 test_preds/drop_v2/preds.npz
0.040
0.8582369743529388 0.5730634901020127 0.040161222809092276 test_preds/drop_v3/preds.npz


In [15]:
for sd in swagdiag_locs:
    compute_mIOU_acc(sd)

0.085
0.90122923316984 0.5963490447443337 0.08527136570525475 test_preds/swag_diag/preds.npz
0.089
0.897311416764532 0.5836220246543136 0.0885331741435351 test_preds/swag_diag_v2/preds.npz
0.089
0.89395838742497 0.5794386135626851 0.08909606774016379 test_preds/swag_diag_v3/preds.npz


In [16]:
for loc in swag_locs:
    compute_mIOU_acc(loc)

0.087
0.9012262630531142 0.5970193596489862 0.0873648097786202 test_preds/swag_scale_05/preds.npz
0.091
0.8968514360785554 0.5835723035447599 0.09120995396217654 test_preds/swag_scale_05_v2/preds.npz
0.091
0.894291608694508 0.5806040031650241 0.09107189907951052 test_preds/swag_scale_05_v3/preds.npz


In [18]:
for loc in sgd_locs:
    compute_mIOU_acc(loc, output_probs = False)

0.056
0.9055111600586347 0.6170782768632335 0.05566461245871768 test_preds/sgd/sgd_preds.npz
0.081
0.9065664296177767 0.6324223625404213 0.08127851358270965 test_preds/sgd/sgd_1000_v2.npz


FileNotFoundError: [Errno 2] No such file or directory: 'test_preds/sgd/sgd_1000_v3.npz'

In [None]:
for loc in swa_locs:
    compute_mIOU_acc(loc, output_probs = False)