In [5]:
import pickle, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy import stats, integrate
from tqdm import tqdm_notebook

dropout_list = ["00", "05", "10", "15", "20", "25", "30"]

In [6]:
def calculate_density(percentile, standard_errors, residuals):
    num_within_quantile = 0
    for se, resid in zip(standard_errors, residuals):
        norm = stats.norm(loc=0, scale=se)
        lower_bound = norm.ppf(0.5-percentile/2)
        upper_bound = norm.ppf(0.5+percentile/2)
        if lower_bound <= resid <= upper_bound:
            num_within_quantile += 1
    density = num_within_quantile / len(residuals)
    return density

predicted_pi = np.linspace(0, 1, 100)

## Opening Pickle Files

In [16]:
def openPickleOld(dropout, version=103119, model='ensemble'):
    if model == 'ensemble':
        with open('CGCNN_ensemble/%s/assess_ensemble_plots_d%s.pkl' % (version, dropout), 
                  'rb') as file:
            datapack = pickle.load(file)
            
        predy = datapack[0]
        predx = datapack[1]

    elif model == 'standalone':
        with open('CGCNN/single_cgcnn_d%s.pkl' % dropout, 'rb') as file:
            datapack = pickle.load(file)

        predy = datapack[0].T
        predx = datapack[1]

    ymean = predy.mean(axis=1)
    res = ymean - predx.reshape(-1)
    std = predy.std(axis=1)

    # Calculate the error metrics
    mae  = mean_absolute_error(predx, ymean)
    rmse = np.sqrt(mean_squared_error(predx, ymean))
    r2   = r2_score(predx, ymean)
    
    marpd = np.abs(2 * res /
                (np.abs(ymean) + np.abs(predx.reshape(-1)))).mean() * 100
    corr = np.corrcoef(predx.reshape(-1), ymean)[0, 1]
    
    return {'predy': predy, 
            'predx': predx, 
            'ymean': ymean, 
            'res': res, 
            'std': std, 
            'mae': mae,
            'rmse': rmse,
            'r2': r2, 
            'marpd': marpd, 
            'corr': corr,
            'dropout': dropout}

In [18]:
ens_table = np.array([openPickleOld(d) for d in dropout_list])
ens_metrics_table = np.array([[dct['mae'], dct['rmse'], 
                               dct['r2'], dct['marpd'], 
                               dct['corr']] for dct in ens_table])

ens_metrics_table = np.around(ens_metrics_table, decimals=4)
np.savetxt("Plots/ens_dropout_metrics.csv", ens_metrics_table, delimiter=",")

nn100_table = np.array([openPickleOld(d, model="standalone") for d in dropout_list])
nn100_metrics_table = np.array([[dct['mae'], dct['rmse'], 
                               dct['r2'], dct['marpd'], 
                               dct['corr']] for dct in nn100_table])
nn100_metrics_table = np.around(nn100_metrics_table, decimals=4)
np.savetxt("Plots/nn100_dropout_metrics.csv", nn100_metrics_table, delimiter=",")

In [19]:
ens_metrics_table

array([[ 0.1638,  0.3343,  0.7581, 48.4074,  0.8713],
       [ 0.1607,  0.3243,  0.7723, 47.5289,  0.8791],
       [ 0.166 ,  0.3379,  0.7529, 48.3299,  0.8682],
       [ 0.1625,  0.3274,  0.768 , 48.2561,  0.8769],
       [ 0.1628,  0.3373,  0.7538, 48.8142,  0.8688],
       [ 0.1695,  0.3439,  0.744 , 50.438 ,  0.8636],
       [ 0.1661,  0.3364,  0.7551, 49.6521,  0.8693]])

In [20]:
import multiprocessing as mp

def observed_pi_calc(std, res, dropout):
    return [calculate_density(quantile, std, res)
            for quantile in tqdm_notebook(predicted_pi, desc='Calibration - Case with %s%% dropout' % dropout)]
    
def observed_pi_calc_parallel(table):
    observed_pi_input = np.array([[dct['std'], dct['res'], dct['dropout']]
                                  for dct in table])

    pool = mp.Pool(processes=len(table))
    out  = pool.starmap(observed_pi_calc, observed_pi_input)
    
    return out

In [None]:
ens_observed_pi = observed_pi_calc_parallel(ens_table)
nn100_observed_pi = observed_pi_calc_parallel(nn100_table)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


HBox(children=(IntProgress(value=0, description='Calibration - Case with 05% dropout', style=ProgressStyle(des…

HBox(children=(IntProgress(value=0, description='Calibration - Case with 00% dropout', style=ProgressStyle(des…

HBox(children=(IntProgress(value=0, description='Calibration - Case with 30% dropout', style=ProgressStyle(des…

HBox(children=(IntProgress(value=0, description='Calibration - Case with 20% dropout', style=ProgressStyle(des…

HBox(children=(IntProgress(value=0, description='Calibration - Case with 15% dropout', style=ProgressStyle(des…

HBox(children=(IntProgress(value=0, description='Calibration - Case with 25% dropout', style=ProgressStyle(des…

HBox(children=(IntProgress(value=0, description='Calibration - Case with 10% dropout', style=ProgressStyle(des…