In [1]:
import os
import math
import itertools
import numpy as np
import pandas as pd
# PyTorch
import torch
# Matplotlib
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})

In [2]:
import sys
sys.path.append('../src/')
%load_ext autoreload
%autoreload 2
# Importing our custom module(s)
import metrics
import models
import priors
import utils

In [3]:
repo_path = '/cluster/home/eharve06/extrapolating-classifier-accuracy-to-bigger-datasets'
experiments_path = os.path.join(repo_path, 'experiments')
models_path = os.path.join(repo_path, 'models')
models_without_priors_path = os.path.join(repo_path, 'models_without_priors')

In [4]:
def grouped_mean_auroc(df):
    df = df.sample(frac=1.0)
    df.reset_index(drop=True, inplace=True)
    group_size = 3
    df['group'] = (df.index // group_size) + 1
    df = df.groupby('group').test_auroc.agg(lambda x: list(x)).reset_index()
    test_aurocs = np.array(df.test_auroc.tolist())
    # _, label, group
    mean_test_aurocs = np.mean(test_aurocs, axis=1)
    return mean_test_aurocs

def coverage_ninety_five(model_objects, size, test_auroc):
    model, *likelihood_objects = model_objects
    label_map = { models.PowerLaw: 'Power law', models.Arctan: 'Arctan', models.GPPowerLaw: 'GP pow', models.GPArctan: 'GP arc' }
    label = label_map.get(type(model), 'Unknown') # Default label is 'Unknown' 
    if label.startswith('GP'):
        likelihood, = likelihood_objects
        with torch.no_grad(): predictions = likelihood(model(size*torch.ones(100)))
        loc = predictions.mean.numpy()
        scale = predictions.stddev.numpy()
        #lower, upper = priors.truncated_normal_uncertainty(0.0, 1.0, loc, scale)   
        lower, upper = priors.truncated_normal_uncertainty(0.0, 1.0, loc, scale, 0.1, 0.9)   
        coverage_95 = metrics.coverage(test_auroc, lower, upper)
        return coverage_95

In [5]:
def coverage_rates(experiments_path, dataset_name, label_name, models_path, model_name):
    # Get label index
    label_index = None
    labels = [['Atelectasis', 'Effusion', 'Infiltration'], ['Bacterial', 'Viral'], 
              ['Normal', 'Benign', 'Malignant'], ['PLAX', 'PSAX', 'A4C', 'A2C'], 
              ['Alzheimer’s'], ['WMD', 'CBI']]
    for label in labels:
        if label_name in label:
            label_index = label.index(label_name)
    assert label_index is not None, 'label not found in labels_list'

    filename = '{}_long_range.csv'.format(dataset_name)
    df = utils.load_experiment(os.path.join(experiments_path, filename))
    # Take mean of each random seed at each dataset size
    df = df.groupby('n').agg(lambda x: list(x))
    df.test_auroc = df.test_auroc.apply(lambda x: np.mean(x, axis=0))
    df.random_state = df.random_state.apply(lambda x: 'mean')
    df = df.reset_index()
    
    sizes = [5000, 10000, 20000]
    filenames = ['{}_5k.csv'.format(dataset_name), '{}_10k.csv'.format(dataset_name), '{}_20k.csv'.format(dataset_name)]
    dfs = [utils.load_experiment(os.path.join(experiments_path, filename)) for filename in filenames]
    
    X_train, y_train, X_test, y_test = utils.split_df(df, index=label_index)
    model_filename = '{}_{}_{}.pt'.format(dataset_name, label_name, model_name)
    print(model_filename)
    model_filepath = os.path.join(models_path, model_filename)
    model_objects = utils.load_model(model_name, model_filepath, X_train, y_train)
    
    coverages = {5000: [], 10000: [], 20000: []}
    for i in range(500):
        test_aurocs = [grouped_mean_auroc(df) for df in dfs]
        for size, test_auroc in zip(sizes, test_aurocs):
            coverage_95 = coverage_ninety_five(model_objects, size, test_auroc[:,label_index])
            coverages[size].append(coverage_95)
    for size in sizes:
        print('{} 95% coverage at {}k: ${:.1f}\pm{:.1f}\%$'.format(label_name, size//1000, 100*np.mean(coverages[size]), 100*np.std(coverages[size])))

In [6]:
coverage_rates(experiments_path, 'ChestX-ray14', 'Atelectasis', models_path, 'GPPowerLaw')
coverage_rates(experiments_path, 'ChestX-ray14', 'Atelectasis', models_path, 'GPArctan')
coverage_rates(experiments_path, 'ChestX-ray14', 'Atelectasis', models_without_priors_path, 'GPPowerLaw')

ChestX-ray14_Atelectasis_GPPowerLaw.pt
Atelectasis 95% coverage at 5k: $100.0\pm0.0\%$
Atelectasis 95% coverage at 10k: $100.0\pm0.1\%$
Atelectasis 95% coverage at 20k: $99.9\pm0.2\%$
ChestX-ray14_Atelectasis_GPArctan.pt
Atelectasis 95% coverage at 5k: $97.9\pm1.2\%$
Atelectasis 95% coverage at 10k: $77.7\pm2.7\%$
Atelectasis 95% coverage at 20k: $29.6\pm2.9\%$
ChestX-ray14_Atelectasis_GPPowerLaw.pt
Atelectasis 95% coverage at 5k: $80.4\pm2.7\%$
Atelectasis 95% coverage at 10k: $39.6\pm2.9\%$
Atelectasis 95% coverage at 20k: $12.1\pm2.4\%$


In [7]:
coverage_rates(experiments_path, 'ChestX-ray14', 'Effusion', models_path, 'GPPowerLaw')
coverage_rates(experiments_path, 'ChestX-ray14', 'Effusion', models_path, 'GPArctan')
coverage_rates(experiments_path, 'ChestX-ray14', 'Effusion', models_without_priors_path, 'GPPowerLaw')

ChestX-ray14_Effusion_GPPowerLaw.pt
Effusion 95% coverage at 5k: $67.5\pm2.7\%$
Effusion 95% coverage at 10k: $9.0\pm2.1\%$
Effusion 95% coverage at 20k: $0.1\pm0.3\%$
ChestX-ray14_Effusion_GPArctan.pt
Effusion 95% coverage at 5k: $61.9\pm3.0\%$
Effusion 95% coverage at 10k: $6.9\pm1.9\%$
Effusion 95% coverage at 20k: $0.1\pm0.2\%$
ChestX-ray14_Effusion_GPPowerLaw.pt
Effusion 95% coverage at 5k: $0.0\pm0.1\%$
Effusion 95% coverage at 10k: $0.0\pm0.0\%$
Effusion 95% coverage at 20k: $0.0\pm0.0\%$


In [8]:
coverage_rates(experiments_path, 'ChestX-ray14', 'Infiltration', models_path, 'GPPowerLaw')
coverage_rates(experiments_path, 'ChestX-ray14', 'Infiltration', models_path, 'GPArctan')
coverage_rates(experiments_path, 'ChestX-ray14', 'Infiltration', models_without_priors_path, 'GPPowerLaw')

ChestX-ray14_Infiltration_GPPowerLaw.pt
Infiltration 95% coverage at 5k: $99.4\pm0.8\%$
Infiltration 95% coverage at 10k: $99.1\pm0.9\%$
Infiltration 95% coverage at 20k: $97.8\pm1.3\%$
ChestX-ray14_Infiltration_GPArctan.pt
Infiltration 95% coverage at 5k: $0.0\pm0.0\%$
Infiltration 95% coverage at 10k: $0.0\pm0.0\%$
Infiltration 95% coverage at 20k: $0.0\pm0.0\%$
ChestX-ray14_Infiltration_GPPowerLaw.pt
Infiltration 95% coverage at 5k: $44.6\pm2.9\%$
Infiltration 95% coverage at 10k: $13.1\pm2.4\%$
Infiltration 95% coverage at 20k: $1.0\pm1.0\%$


In [9]:
coverage_rates(experiments_path, 'TMED-2', 'PLAX', models_path, 'GPPowerLaw')
coverage_rates(experiments_path, 'TMED-2', 'PLAX', models_path, 'GPArctan')
coverage_rates(experiments_path, 'TMED-2', 'PLAX', models_without_priors_path, 'GPPowerLaw')

TMED-2_PLAX_GPPowerLaw.pt
PLAX 95% coverage at 5k: $100.0\pm0.1\%$
PLAX 95% coverage at 10k: $100.0\pm0.0\%$
PLAX 95% coverage at 20k: $100.0\pm0.0\%$
TMED-2_PLAX_GPArctan.pt
PLAX 95% coverage at 5k: $3.9\pm1.6\%$
PLAX 95% coverage at 10k: $0.1\pm0.2\%$
PLAX 95% coverage at 20k: $0.0\pm0.0\%$
TMED-2_PLAX_GPPowerLaw.pt
PLAX 95% coverage at 5k: $100.0\pm0.0\%$
PLAX 95% coverage at 10k: $100.0\pm0.0\%$
PLAX 95% coverage at 20k: $100.0\pm0.0\%$


In [10]:
coverage_rates(experiments_path, 'TMED-2', 'PSAX', models_path, 'GPPowerLaw')
coverage_rates(experiments_path, 'TMED-2', 'PSAX', models_path, 'GPArctan')
coverage_rates(experiments_path, 'TMED-2', 'PSAX', models_without_priors_path, 'GPPowerLaw')

TMED-2_PSAX_GPPowerLaw.pt
PSAX 95% coverage at 5k: $100.0\pm0.0\%$
PSAX 95% coverage at 10k: $100.0\pm0.0\%$
PSAX 95% coverage at 20k: $100.0\pm0.2\%$
TMED-2_PSAX_GPArctan.pt
PSAX 95% coverage at 5k: $100.0\pm0.2\%$
PSAX 95% coverage at 10k: $99.2\pm0.8\%$
PSAX 95% coverage at 20k: $89.5\pm2.4\%$
TMED-2_PSAX_GPPowerLaw.pt
PSAX 95% coverage at 5k: $100.0\pm0.0\%$
PSAX 95% coverage at 10k: $100.0\pm0.0\%$
PSAX 95% coverage at 20k: $100.0\pm0.2\%$


In [11]:
coverage_rates(experiments_path, 'TMED-2', 'A4C', models_path, 'GPPowerLaw')
coverage_rates(experiments_path, 'TMED-2', 'A4C', models_path, 'GPArctan')
coverage_rates(experiments_path, 'TMED-2', 'A4C', models_without_priors_path, 'GPPowerLaw')

TMED-2_A4C_GPPowerLaw.pt
A4C 95% coverage at 5k: $99.9\pm0.2\%$
A4C 95% coverage at 10k: $99.4\pm0.7\%$
A4C 95% coverage at 20k: $94.2\pm1.8\%$
TMED-2_A4C_GPArctan.pt
A4C 95% coverage at 5k: $0.3\pm0.6\%$
A4C 95% coverage at 10k: $0.1\pm0.2\%$
A4C 95% coverage at 20k: $0.0\pm0.2\%$
TMED-2_A4C_GPPowerLaw.pt
A4C 95% coverage at 5k: $99.9\pm0.3\%$
A4C 95% coverage at 10k: $99.4\pm0.7\%$
A4C 95% coverage at 20k: $94.0\pm1.8\%$


In [12]:
coverage_rates(experiments_path, 'TMED-2', 'A2C', models_path, 'GPPowerLaw')
coverage_rates(experiments_path, 'TMED-2', 'A2C', models_path, 'GPArctan')
coverage_rates(experiments_path, 'TMED-2', 'A2C', models_without_priors_path, 'GPPowerLaw')

TMED-2_A2C_GPPowerLaw.pt
A2C 95% coverage at 5k: $62.2\pm2.8\%$
A2C 95% coverage at 10k: $96.6\pm1.6\%$
A2C 95% coverage at 20k: $100.0\pm0.2\%$
TMED-2_A2C_GPArctan.pt
A2C 95% coverage at 5k: $0.0\pm0.0\%$
A2C 95% coverage at 10k: $0.0\pm0.0\%$
A2C 95% coverage at 20k: $0.0\pm0.0\%$
TMED-2_A2C_GPPowerLaw.pt
A2C 95% coverage at 5k: $57.3\pm2.9\%$
A2C 95% coverage at 10k: $95.2\pm1.8\%$
A2C 95% coverage at 20k: $100.0\pm0.2\%$
