In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

<IPython.core.display.Javascript object>

In [3]:
import sys
sys.path.append('../../')

from matplotlib import pyplot
import numpy as np
import pandas
from matplotlib import pyplot as plt
from scipy.stats import rankdata
from IPython.display import display, HTML

from bayesian_benchmarks.database_utils import Database
from bayesian_benchmarks.data import classification_datasets, _ALL_REGRESSION_DATATSETS, _ALL_CLASSIFICATION_DATATSETS
ALL_DATATSETS = {}
ALL_DATATSETS.update(_ALL_REGRESSION_DATATSETS)
ALL_DATATSETS.update(_ALL_CLASSIFICATION_DATATSETS)
from bayesian_benchmarks.data import regression_datasets


In [4]:
def rankarray(A):
    ranks = []
    for a in A:
        ranks.append(rankdata(a))
    return np.array(ranks)


def read_regression_classification(fs, models_names, datasets, task, db_loc='../results/results.db'):
    if task == 'classification':
        fields = ['dataset', 'N', 'D', 'K'] + [m[1] for m in models_names]
    else:
        fields = ['dataset', 'N', 'D'] + [m[1] for m in models_names]

    results = {}
    for f in fs:
        results[f] = {'table':{f:[] for f in fields}, 'vals':[]}

    with Database(db_loc) as db:

        for dataset in datasets:
            for f in fs:
                results[f]['table']['dataset'].append(dataset[:10])
                results[f]['table']['N'].append(ALL_DATATSETS[dataset].N)
                results[f]['table']['D'].append(ALL_DATATSETS[dataset].D)
                if task == 'classification':
                    results[f]['table']['K'].append(ALL_DATATSETS[dataset].K)

            row = {f:[] for f in fs}
            for model, name in models_names:
                res = db.read(task, fs, {'model':model, 
                                         'dataset':dataset})
                #res = db.read(task, fs, {'dataset':dataset})
                    
                if len(res) == 0:
                    for f in fs:
                        results[f]['table'][name].append('')
                        row[f].append(np.nan)
                else:
                    print('{} {} {}'.format(model, dataset, len(res)))
                    for i, f in enumerate(fs):
                        L = [float(l[i]) for l in res]
                        m = np.average(L)
                        std = np.std(L) if len(L) > 1 else np.nan
                        if m < 1000 and m > -1000:
                            r = '{:.3f}({:.3f})'.format(m, std)
                            row[f].append(m)
                        else:
                            r = 'nan'
                            row[f].append(np.nan)

                        results[f]['table'][name].append(r)

            #             stderr = np.std(L)/float(len(L))**0.5
            #             r = '{:.3f} ({:.3f})'.format(m, stderr)
            for f in fs:   
                results[f]['vals'].append(row[f])


    for f in fs:
        if 'unnormalized' not in f:
            vals = np.array(results[f]['vals'])

            avgs = np.nanmean(vals, 0)
            meds = np.nanmedian(vals, 0)
            rks = np.nanmean(rankarray(vals), 0)

            for s, n in [[avgs, 'avg'], [meds, 'median'], [rks, 'avg rank']]:
                results[f]['table']['dataset'].append(n)
                results[f]['table']['N'].append('')
                results[f]['table']['D'].append('')
                if task == 'classification':
                    results[f]['table']['K'].append('')
                for ss, name in zip(s, [m[1] for m in models_names]):
                    results[f]['table'][name].append('{:.3f}'.format(ss))
    
    return results, fields


In [5]:
models_names = [['RegNet', 'SGD'], ['RegNetNL', 'NL'], ['RegNetNL_LP', 'largeNL']]
regression_datasets = ['wilson_elevators', 'wilson_skillcraft', 'wilson_pol', 'wilson_keggdirected', 'wilson_keggundirected', 'wilson_protein']
fs = ['test_loglik', 'test_rmse', 'test_loglik_unnormalized', 'test_rmse_unnormalized', 'test_calibration']

results, fields = read_regression_classification(fs, models_names, regression_datasets, 'regression', 
                                                 db_loc='../tasks/nl_test.db')


RegNet wilson_elevators 20
RegNetNL wilson_elevators 20
RegNetNL_LP wilson_elevators 20
RegNet wilson_skillcraft 20
RegNetNL wilson_skillcraft 20
RegNetNL_LP wilson_skillcraft 20
RegNet wilson_pol 20
RegNetNL wilson_pol 20
RegNetNL_LP wilson_pol 20
RegNet wilson_keggdirected 20
RegNetNL wilson_keggdirected 20
RegNetNL_LP wilson_keggdirected 20
RegNet wilson_keggundirected 20
RegNetNL wilson_keggundirected 20
RegNetNL_LP wilson_keggundirected 20
RegNet wilson_protein 20
RegNetNL wilson_protein 20
RegNetNL_LP wilson_protein 20


In [6]:
#print('normalised test loglikelihood')
#display(HTML(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_html(index=False)))
# print(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_latex())

print('unnormalized test loglikelihood')
display(HTML(pandas.DataFrame(results['test_loglik_unnormalized']['table'], columns=fields).to_html(index=False)))


#print('normalised test rmse')
#display(HTML(pandas.DataFrame(results['test_rmse']['table'], columns=fields).to_html(index=False)))

print('unnormalised test rmse')
display(HTML(pandas.DataFrame(results['test_rmse_unnormalized']['table'], columns=fields).to_html(index=False)))

print('test prediction interval coverage')
display(HTML(pandas.DataFrame(results['test_calibration']['table'], columns=fields).to_html(index=False)))


unnormalized test loglikelihood


dataset,N,D,SGD,NL,largeNL
wilson_ele,16599,18,0.810(0.146),0.685(0.027),0.803(0.042)
wilson_ski,3338,19,-0.172(0.032),-0.103(0.046),-0.050(0.050)
wilson_pol,15000,26,-2.447(0.217),-4.647(0.020),-2.841(0.226)
wilson_keg,48827,20,0.235(1.876),-0.625(0.059),0.664(0.072)
wilson_keg,63608,27,0.700(0.062),-1.191(0.111),0.675(0.048)
wilson_pro,45730,9,-0.613(0.036),-0.654(0.025),-0.619(0.013)


unnormalised test rmse


dataset,N,D,SGD,NL,largeNL
wilson_ele,16599,18,0.093(0.005),0.121(0.003),0.097(0.003)
wilson_ski,3338,19,0.279(0.013),0.268(0.012),0.253(0.011)
wilson_pol,15000,26,3.051(0.469),25.240(0.516),4.099(1.272)
wilson_keg,48827,20,0.145(0.101),0.450(0.029),0.122(0.008)
wilson_keg,63608,27,0.142(0.038),0.801(0.082),0.121(0.005)
wilson_pro,45730,9,0.446(0.011),0.461(0.010),0.445(0.005)


test prediction interval coverage


dataset,N,D,SGD,NL,largeNL
wilson_ele,16599.0,18.0,0.855(0.037),0.927(0.006),0.870(0.013)
wilson_ski,3338.0,19.0,0.977(0.009),0.951(0.014),0.937(0.012)
wilson_pol,15000.0,26.0,0.939(0.018),0.987(0.003),0.921(0.019)
wilson_keg,48827.0,20.0,0.966(0.003),0.992(0.002),0.958(0.008)
wilson_keg,63608.0,27.0,0.962(0.011),0.936(0.010),0.965(0.003)
wilson_pro,45730.0,9.0,0.916(0.007),0.919(0.003),0.920(0.004)
avg,,,0.936,0.952,0.929
median,,,0.951,0.944,0.929
avg rank,,,1.833,2.333,1.833


In [5]:
models_names = [['RegNet', 'SGD'],  ['RegNetpcavi', 'PCA+VI'],
                ['RegNetfreq_dirvi', 'FD+VI'], 
               ['RegNetpcalow_rank_gaussian', 'PCA+SWAG'], ['RegNetfreq_dirlow_rank_gaussian', 'FD+SWAG']]
regression_datasets = ['wilson_elevators', 'wilson_keggdirected', 'wilson_keggundirected', 'wilson_protein']
fs = ['test_loglik', 'test_rmse', 'test_loglik_unnormalized', 'test_rmse_unnormalized', 'test_calibration']

results, fields = read_regression_classification(fs, models_names, regression_datasets, 'regression', 
                                                 db_loc='../tasks/het_db_results.db')

RegNet wilson_elevators 5
RegNetpcavi wilson_elevators 4
RegNetfreq_dirvi wilson_elevators 4
RegNetpcalow_rank_gaussian wilson_elevators 5
RegNetfreq_dirlow_rank_gaussian wilson_elevators 5
RegNet wilson_keggdirected 5
RegNetpcavi wilson_keggdirected 4
RegNetfreq_dirvi wilson_keggdirected 4
RegNetpcalow_rank_gaussian wilson_keggdirected 5
RegNetfreq_dirlow_rank_gaussian wilson_keggdirected 5
RegNet wilson_keggundirected 5
RegNetpcavi wilson_keggundirected 4
RegNetfreq_dirvi wilson_keggundirected 5
RegNetpcalow_rank_gaussian wilson_keggundirected 5
RegNetfreq_dirlow_rank_gaussian wilson_keggundirected 5
RegNet wilson_protein 6
RegNetpcavi wilson_protein 5
RegNetfreq_dirvi wilson_protein 4
RegNetpcalow_rank_gaussian wilson_protein 6
RegNetfreq_dirlow_rank_gaussian wilson_protein 5


In [6]:
print('unnormalized test loglikelihood')
display(HTML(pandas.DataFrame(results['test_loglik_unnormalized']['table'], columns=fields).to_html(index=False)))

print('unnormalised test rmse')
display(HTML(pandas.DataFrame(results['test_rmse_unnormalized']['table'], columns=fields).to_html(index=False)))

display(HTML(pandas.DataFrame(results['test_calibration']['table'], columns=fields).to_html(index=False)))


unnormalized test loglikelihood


dataset,N,D,SGD,PCA+VI,FD+VI,PCA+SWAG,FD+SWAG
wilson_ele,16599,18,0.760(0.069),0.934(0.038),0.957(0.021),0.981(0.027),0.961(0.024)
wilson_keg,48827,20,0.666(0.022),0.666(0.021),0.599(0.017),0.414(0.012),0.422(0.011)
wilson_keg,63608,27,0.711(0.064),0.711(0.067),0.713(0.061),0.721(0.053),0.720(0.055)
wilson_pro,45730,9,-0.597(0.022),-0.588(0.024),-0.604(0.028),-0.593(0.027),-0.604(0.032)


unnormalised test rmse


dataset,N,D,SGD,PCA+VI,FD+VI,PCA+SWAG,FD+SWAG
wilson_ele,16599,18,0.094(0.003),0.090(0.001),0.090(0.001),0.091(0.002),0.091(0.002)
wilson_keg,48827,20,0.122(0.003),0.123(0.003),0.125(0.004),0.134(0.005),0.133(0.005)
wilson_keg,63608,27,0.118(0.007),0.118(0.008),0.118(0.007),0.118(0.007),0.118(0.007)
wilson_pro,45730,9,0.441(0.008),0.443(0.010),0.445(0.012),0.444(0.011),0.446(0.013)


dataset,N,D,SGD,PCA+VI,FD+VI,PCA+SWAG,FD+SWAG
wilson_ele,16599.0,18.0,0.841(0.017),0.879(0.009),0.888(0.009),0.907(0.011),0.898(0.007)
wilson_keg,48827.0,20.0,0.964(0.002),0.965(0.002),0.970(0.002),0.981(0.001),0.980(0.001)
wilson_keg,63608.0,27.0,0.962(0.012),0.961(0.013),0.961(0.012),0.967(0.002),0.967(0.002)
wilson_pro,45730.0,9.0,0.920(0.005),0.929(0.004),0.925(0.004),0.929(0.003),0.926(0.003)
avg,,,0.922,0.934,0.936,0.946,0.943
median,,,0.941,0.945,0.943,0.948,0.946
avg rank,,,1.500,2.500,2.500,4.750,3.750


In [8]:
models_names = [['RegNet', 'SGD'], ['RegNetpcaess', 'PCA+ESS'], ['RegNetpcavi', 'PCA+VI'],
               ['RegNetpcalow_rank_gaussian', 'PCA+SWAG']]
regression_datasets = ['wilson_elevators', 'wilson_keggdirected', 'wilson_keggundirected', 'wilson_protein']
fs = ['test_loglik', 'test_rmse', 'test_loglik_unnormalized', 'test_rmse_unnormalized', 'test_calibration']

results, fields = read_regression_classification(fs, models_names, regression_datasets, 'regression', 
                                                 db_loc='../tasks/het_db_results.db')

RegNet wilson_elevators 10
RegNetpcaess wilson_elevators 10
RegNetpcavi wilson_elevators 10
RegNetpcalow_rank_gaussian wilson_elevators 10
RegNet wilson_keggdirected 10
RegNetpcaess wilson_keggdirected 10
RegNetpcavi wilson_keggdirected 10
RegNetpcalow_rank_gaussian wilson_keggdirected 10
RegNet wilson_keggundirected 10
RegNetpcaess wilson_keggundirected 10
RegNetpcavi wilson_keggundirected 10
RegNetpcalow_rank_gaussian wilson_keggundirected 10
RegNet wilson_protein 10
RegNetpcaess wilson_protein 10
RegNetpcavi wilson_protein 10
RegNetpcalow_rank_gaussian wilson_protein 10


In [9]:
print('unnormalized test loglikelihood')
display(HTML(pandas.DataFrame(results['test_loglik_unnormalized']['table'], columns=fields).to_html(index=False)))

print('unnormalised test rmse')
display(HTML(pandas.DataFrame(results['test_rmse_unnormalized']['table'], columns=fields).to_html(index=False)))

display(HTML(pandas.DataFrame(results['test_calibration']['table'], columns=fields).to_html(index=False)))

unnormalized test loglikelihood


dataset,N,D,SGD,PCA+ESS,PCA+VI,PCA+SWAG
wilson_ele,16599,18,0.838(0.108),0.970(0.041),0.934(0.038),0.981(0.030)
wilson_keg,48827,20,0.667(0.022),0.662(0.017),0.666(0.018),0.415(0.013)
wilson_keg,63608,27,0.719(0.046),0.726(0.032),0.720(0.043),0.725(0.038)
wilson_pro,45730,9,-0.604(0.027),-0.577(0.021),-0.591(0.025),-0.604(0.031)


unnormalised test rmse


dataset,N,D,SGD,PCA+ESS,PCA+VI,PCA+SWAG
wilson_ele,16599,18,0.092(0.003),0.090(0.002),0.090(0.002),0.090(0.002)
wilson_keg,48827,20,0.121(0.003),0.122(0.003),0.123(0.003),0.134(0.005)
wilson_keg,63608,27,0.125(0.024),0.125(0.023),0.125(0.023),0.125(0.023)
wilson_pro,45730,9,0.443(0.009),0.440(0.007),0.444(0.009),0.447(0.011)


dataset,N,D,SGD,PCA+ESS,PCA+VI,PCA+SWAG
wilson_ele,16599.0,18.0,0.857(0.031),0.893(0.017),0.878(0.010),0.904(0.012)
wilson_keg,48827.0,20.0,0.965(0.002),0.966(0.003),0.965(0.003),0.982(0.003)
wilson_keg,63608.0,27.0,0.962(0.012),0.965(0.009),0.962(0.012),0.965(0.009)
wilson_pro,45730.0,9.0,0.917(0.007),0.928(0.007),0.926(0.007),0.924(0.007)
avg,,,0.925,0.938,0.933,0.944
median,,,0.940,0.946,0.944,0.944
avg rank,,,1.250,3.250,2.000,3.500


In [6]:
import pickle

In [11]:
with open('/home/wesley/Documents/Papers/subspace_deeplearning/figs/data/heteroscedastic_regression.pkl', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)