# Confidence Calibration

In our scoring application, we use positive confidence (possibility) as score of the sample. Since deep models trend to be overconfident, we cannot get a reasonable score directly with positive possibility.  
Following [On Calibration of Modern Neural Networks](https://arxiv.org/pdf/1706.04599.pdf), we calibrate network confidence by minimizing NLL of correct class with respect to a temperature parameter.  
Implementation with [temperature_scaling](https://github.com/gpleiss/temperature_scaling).  

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys
import time

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from sklearn import metrics
from matplotlib import pyplot as plt

from temperature_scaling import ModelWithTemperature

from dataset import *
from resnet import *
from classifier import *

In [2]:
# consts
TRAIN_NAME = 'merge'
TRAIN_ID = '03'
EPOCH = 1

# data consts
# ROOT_PATH = '/home/xd/data/chromo/confi-cali/'
ROOT_PATH = '/media/nvme/chromo-merge-class/original'
NUM_CLASSES = 2 # fg + 1(bg)
INPUT_SIZE = 512
BATCH_SIZE = 64
NUM_WORKERS = 16

# trainer consts
DEVICE = 'cuda:0'

# calibrator consts
LR = 1e-4

In [3]:
# val_loader
val_trans = transforms.Compose([
    transforms.ToPILImage(),
    PadSquare(),
    transforms.Resize((INPUT_SIZE, INPUT_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

'''
val_dataset = ConfiCaliDataset(
    ROOT_PATH,
    image_ext='.png',
    transform=val_trans
)
'''

val_dataset = ChromoDataset(
    ROOT_PATH,
    training=False,
    image_ext='.png',
    transform=val_trans
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    # batch_sampler=val_sampler,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

In [4]:
# load trained model
device = torch.device(DEVICE)

model = resnet50(pretrained=True, num_classes=NUM_CLASSES)

checkpoint_path = os.path.join('./models', '{}_{}'.format(TRAIN_NAME, TRAIN_ID), '{:0>3d}.pth'.format(EPOCH))
cp_state_dict = torch.load(checkpoint_path, map_location='cpu')

if 'module' in list(cp_state_dict.keys())[0]:
    new_state_dict = {}
    
    for key, value in cp_state_dict.items():
        new_state_dict[key.split('.', 1)[1]] = value
    
    model.load_state_dict(new_state_dict)
else:
    model.load_state_dict(cp_state_dict)
    
model = model.to(device)
model.eval()

print('loaded from: {}'.format(checkpoint_path))

loaded from: ./models/merge_03/001.pth


In [5]:
# calibration
tik = time.time()

scaled_model = ModelWithTemperature(model, lr=LR)
scaled_model.set_temperature(val_loader)

tok = time.time()
print('calibration duration: {}'.format(tok - tik))

Before temperature - NLL: 0.203, ECE: 0.014
Optimal temperature: 1.188
After temperature - NLL: 0.199, ECE: 0.008
calibration duration: 143.56323075294495


In [6]:
save_path = '{}-{}_{}_cc.pth'.format(TRAIN_NAME, TRAIN_ID, EPOCH)

torch.save(scaled_model.state_dict(), save_path)
print('saved to: {}'.format(save_path))

saved to: merge-03_1_cc.pth
