In [None]:
#!pip install -U --no-cache-dir gdown --pre

In [1]:
128/5040

0.025396825396825397

In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread


In [None]:
# Download data of the imagenet-resnet152 model and the human readable labels for the imagenet dataset

if not os.path.exists('../data'):
    os.system('gdown 1h7S6N_Rx7gdfO3ZunzErZy6H7620EbZK -O ../data.tar.gz')
    os.system('tar -xf ../data.tar.gz -C ../')
    os.system('rm ../data.tar.gz')
if not os.path.exists('../data/imagenet/human_readable_labels.json'):
    !wget -nv -O ../data/imagenet/human_readable_labels.json -L https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json

data = np.load('../data/imagenet/imagenet-resnet152.npz') 
example_paths = os.listdir('../data/imagenet/examples')
smx = data['smx'] # Softmax output of the model
labels = data['labels'].astype(int) # Real labels of the images


In [None]:
n = 1000 # Number of calibration points
confiable_value = 0.75
alpha = 1 - confiable_value # 1-alpha is the desired coverage

Analyzing, I saw that with alpha until $0.25$ I get a good estimate

In [None]:
idx = np.array([1] * n + [0] * (smx.shape[0]-n)) > 0
np.random.shuffle(idx)
cal_smx, val_smx = smx[idx,:], smx[~idx,:]
cal_labels, val_labels = labels[idx], labels[~idx]


# Conformal Predction

In [None]:
# 1: get conformal scores. n = calib_Y.shape[0]
cal_scores = 1-cal_smx[np.arange(n),cal_labels]
# 2: get adjusted quantile
q_level = np.ceil((n+1)*(1-alpha))/n
qhat = np.quantile(cal_scores, q_level, method='higher')
prediction_sets = val_smx >= (1-qhat) # 3: form prediction sets


In [None]:
empirical_coverage = prediction_sets[np.arange(prediction_sets.shape[0]),val_labels].mean()
print(f"The empirical coverage is: {empirical_coverage}")


In [None]:
with open('../data/imagenet/human_readable_labels.json') as f:
    label_strings = np.array(json.load(f))

example_paths =os.listdir('../data/imagenet/examples')
for i in range(10):
    rand_path = np.random.choice(example_paths)
    img = imread('../data/imagenet/examples/' + rand_path )
    img_index = int(rand_path.split('.')[0])
    prediction_set = smx[img_index] > 1-qhat
    plt.figure()
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    print(f"The prediction set is: {list(label_strings[prediction_set])}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import beta, betabinom
from scipy.optimize import brentq
import itertools
import seaborn as sns

In [None]:
ns = [100,1000,10000]
alpha = 0.1

sns.set_palette('pastel')
plt.figure()
ax = plt.gca()

for i in range(len(ns)):
  n = ns[i]
  l = np.floor((n+1)*alpha)
  a = n + 1 - l
  b = l
  x = np.linspace(0.825,0.975,1000)
  rv = beta(a, b)
  ax.plot(x, rv.pdf(x), lw=3, label=f'n={n}')
ax.vlines(1-alpha,ymin=0,ymax=150,color='#888888',linestyles='dashed',label=r'$1-\alpha$')
sns.despine(top=True,right=True)
plt.yticks([])
plt.legend()
plt.title('Distribution of coverage (infinite validation set)')
plt.tight_layout()
plt.show()

In [None]:
alpha = 0.1
epsilons = [0.1,0.05,0.01,0.005,0.001]
for epsilon in epsilons:
  def _condition(n):
    l = np.floor((n+1)*alpha)
    a = n + 1 - l
    b = l
    if (beta.ppf(0.05, a, b) < 1-alpha-epsilon) or (beta.ppf(0.95, a, b) > 1-alpha+epsilon):
      return -1
    else:
      return 1

  print(int(np.ceil(brentq(_condition,np.ceil(1/alpha),100000000000))))

In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread
from scipy.optimize import brentq
!pip install -U --no-cache-dir gdown --pre

In [None]:
# Load cached data
if not os.path.exists('../data'):
    os.system('gdown 1h7S6N_Rx7gdfO3ZunzErZy6H7620EbZK -O ../data.tar.gz')
    os.system('tar -xf ../data.tar.gz -C ../')
    os.system('rm ../data.tar.gz')
    
data = np.load('../data/coco/coco-tresnetxl.npz')
example_paths = os.listdir('../data/coco/examples')

sgmd = data['sgmd'] # sigmoid scores
labels = data['labels']
example_indexes = data['example_indexes']

In [None]:
# Problem setup
n=4952  # number of calibration points
alpha = 0.05 # 1-alpha is the desired false negative rate

def false_negative_rate(prediction_set, gt_labels):
    return 1-((prediction_set * gt_labels).sum(axis=1)/gt_labels.sum(axis=1)).mean()

In [None]:
# Split the softmax scores into calibration and validation sets (save the shuffling)
idx = np.array([1] * n + [0] * (sgmd.shape[0]-n)) > 0
np.random.shuffle(idx)
cal_sgmd, val_sgmd = sgmd[idx,:], sgmd[~idx,:]
cal_labels, val_labels = labels[idx], labels[~idx]

In [None]:
# Run the conformal risk control procedure
def lamhat_threshold(lam): return false_negative_rate(cal_sgmd>=lam, cal_labels) - ((n+1)/n*alpha - 1/(n+1))
lamhat = brentq(lamhat_threshold, 0, 1)
prediction_sets = val_sgmd >= lamhat

In [None]:
# Calculate empirical FNR
print(f"The empirical FNR is: {false_negative_rate(prediction_sets, val_labels)} and the threshold value is: {lamhat}")

In [None]:
# Show some examples
label_strings = np.load('../data/coco/human_readable_labels.npy')

example_paths =os.listdir('../data/coco/examples')
for i in range(10):
    rand_path = np.random.choice(example_paths)
    img = imread('../data/coco/examples/' + rand_path )
    img_index = int(rand_path.split('.')[0])
    prediction_set = sgmd[img_index] > 1-lamhat
    plt.figure()
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    print(f"The prediction set is: {list(label_strings[prediction_set])}")

In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread
from scipy.optimize import brentq
from skimage.transform import rescale, resize
!pip install -U --no-cache-dir gdown --pre

In [None]:
# Load cached data
if not os.path.exists('../data'):
    os.system('gdown 1h7S6N_Rx7gdfO3ZunzErZy6H7620EbZK -O ../data.tar.gz')
    os.system('tar -xf ../data.tar.gz -C ../')
    os.system('rm ../data.tar.gz')
    
data = np.load('../data/polyps/polyps-pranet.npz')
example_paths = os.listdir('../data/polyps/examples')

sgmd = data['sgmd'] # sigmoid scores
gt_masks = data['targets']
example_indexes = data['example_indexes']

In [None]:
# Problem setup
n=500 # number of calibration points
alpha = 0.1 # 1-alpha is the desired false negative rate

def false_negative_rate(pred_masks, true_masks):
    return 1-((pred_masks * true_masks).sum(axis=1).sum(axis=1)/true_masks.sum(axis=1).sum(axis=1)).mean()

In [None]:
# Split the softmax scores into calibration and validation sets (save the shuffling)
idx = np.array([1] * n + [0] * (sgmd.shape[0]-n)) > 0
np.random.shuffle(idx)
cal_sgmd, val_sgmd = sgmd[idx,:], sgmd[~idx,:]
cal_gt_masks, val_gt_masks = gt_masks[idx], gt_masks[~idx]

In [None]:
# Run the conformal risk control procedure
def lamhat_threshold(lam): return false_negative_rate(cal_sgmd>=lam, cal_gt_masks) - ((n+1)/n*alpha - 1/n)
lamhat = brentq(lamhat_threshold, 0, 1)
predicted_masks = val_sgmd >= lamhat

In [None]:
# Calculate empirical FNR
print(f"The empirical FNR is: {false_negative_rate(predicted_masks, val_gt_masks)} and the threshold value is: {lamhat}")

In [None]:
# Show some examples
for i in range(10):
    rand_idx = np.random.choice(example_indexes)
    img = imread('../data/polyps/examples/' + str(rand_idx) + '.jpg')
    gt_mask = imread('../data/polyps/examples/' + str(rand_idx) + '_gt_mask.jpg')
    predicted_mask = resize(sgmd[rand_idx] > lamhat, (img.shape[0],img.shape[1]), anti_aliasing=False)
    fig, axs = plt.subplots(1,3,figsize=(8.64,4.76))
    axs[0].imshow(img)
    axs[0].axis('off')
    axs[1].imshow(predicted_mask, cmap='gray')
    axs[1].axis('off')
    axs[2].imshow(gt_mask, cmap='gray')
    axs[2].axis('off')
    if i == 0:
        axs[0].set_title('input')
        axs[1].set_title('predicted mask')
        axs[2].set_title('ground truth mask')
    plt.show()

In [1]:
def calculador_erro(nominal, medido):
    numerador = medido - nominal
    denominador = nominal
    return ((numerador / denominador) * 100)

nominal = 10.1
medido = 9.96
erro = calculador_erro(nominal, medido)
print(f"Erro: {erro:.2f}%")

Erro: -1.39%
