# Confidence Calibration

In our scoring application, we use positive confidence (possibility) as score of the sample. Since deep models trend to be overconfident, we cannot get a reasonable score directly with positive possibility.  
Following [On Calibration of Modern Neural Networks](https://arxiv.org/pdf/1706.04599.pdf), we calibrate network confidence by minimizing NLL of correct class with respect to a temperature parameter.  
Implementation with [temperature_scaling](https://github.com/gpleiss/temperature_scaling).  

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from sklearn import metrics
from matplotlib import pyplot as plt

from temperature_scaling import ModelWithTemperature

from dataset import *
from resnet import *
from classifier import *

In [2]:
# consts
TRAIN_NAME = 'uda'
TRAIN_ID = '13'
EPOCH = 99

# data consts
ROOT_PATH = '/home/xd/data/chromo/confi-cali/'
NUM_CLASSES = 2 # fg + 1(bg)
INPUT_SIZE = 512
BATCH_SIZE = 64
NUM_WORKERS = 16

# trainer consts
DEVICE = 'cuda:0'


In [3]:
# val_loader
val_trans = transforms.Compose([
    transforms.ToPILImage(),
    PadSquare(),
    transforms.Resize((INPUT_SIZE, INPUT_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_dataset = ConfiCaliDataset(
    ROOT_PATH,
    image_ext='.png',
    transform=val_trans
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    # batch_sampler=val_sampler,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

In [4]:
# load trained model
device = torch.device(DEVICE)

model = resnet50(pretrained=True, num_classes=NUM_CLASSES)

checkpoint_path = os.path.join('./models', '{}_{}'.format(TRAIN_NAME, TRAIN_ID), '{:0>3d}.pth'.format(EPOCH))
cp_state_dict = torch.load(checkpoint_path, map_location='cpu')

if 'module' in list(cp_state_dict.keys())[0]:
    new_state_dict = {}
    
    for key, value in cp_state_dict.items():
        new_state_dict[key.split('.', 1)[1]] = value
    
    model.load_state_dict(new_state_dict)
else:
    model.load_state_dict(cp_state_dict)
    
model = model.to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
# calibration

scaled_model = ModelWithTemperature(model)
scaled_model.set_temperature(val_loader)

Before temperature - NLL: 1.498, ECE: 0.432
Optimal temperature: 208.753
After temperature - NLL: 0.693, ECE: 0.074


ModelWithTemperature(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(