In [81]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
from scipy.special import logsumexp
import pandas as pd
import seaborn as sns
sns.set_context('paper', font_scale=1.3)
red, blue, green = sns.color_palette('Set1', 3)

import os
from datetime import datetime, timedelta

from rakott.mpl import fig_panel_labels, fig_xlabel, fig_ylabel, savefig_bbox

from inference import find_start_day
from ppc import load_data

def load_chain(job_id, country, burn_fraction=0.6):
    fname = os.path.join(output_folder, job_id, 'inference', '{}.npz'.format(country))
    inference_data = np.load(fname)
    nsteps, ndim, N, Td1, Td2, model_type = inference_data['params']
    logliks = inference_data['logliks']
    nchains = logliks.size // nsteps
    logliks = logliks.reshape(nchains, nsteps)
    nburn = int(nsteps*burn_fraction)
    logliks = logliks[:, nburn:]
    return logliks

def inliers(logliks, PLOT=False):
    chain_mean_loglik = logliks.mean(axis=1)
    std_mean_loglikg = chain_mean_loglik.std(ddof=1)
    mean_mean_loglikg = chain_mean_loglik.mean()
    idx = abs(chain_mean_loglik - mean_mean_loglikg) < 3*std_mean_loglikg
    if PLOT:
        if idx.any():
            plt.plot(logliks[idx, ::1000].T, '.k', label='inliers')
        if (~idx).any():
            plt.plot(logliks[~idx, ::1000].T, '.r', label='outliers')
        plt.ylabel('Log-likelihood')
        plt.legend()
    return idx

def WAIC(logliks):
    logliks = logliks[inliers(logliks)]
    S = logliks.size
    llpd = -np.log(S) + logsumexp(logliks)
    p1 = 2*(-np.log(S) + logsumexp(logliks) - logliks.mean())
    p2 = np.var(logliks, ddof=1)
    return -2*(llpd + -p1), -2*(llpd + -p2)

In [82]:
job_ids = ['2020-05-14-n1-normal-1M', '2020-05-14-n1-notau-1M', '2020-05-15-n1-fixed-tau-1M']
countries = 'Austria Belgium Denmark France Germany Italy Norway Spain Sweden Switzerland United_Kingdom Wuhan'.split(' ')
output_folder = r'/Users/yoavram/Library/Mobile Documents/com~apple~CloudDocs/EffectiveNPI-Data/output'

In [83]:
results = []
for country in countries:
    for job_id in job_ids:
        chain_fname = os.path.join(output_folder, job_id, 'inference', '{}.npz'.format(country))
        logliks = load_chain(job_id, country)
        waic1, waic2 = WAIC(logliks)
        results.append(dict(
            country=country,
            job_id=job_id,
            WAIC1=waic1,
            WAIC2=waic2
        ))

In [160]:
df = pd.DataFrame(results)
df.loc[df['job_id'] == '2020-05-14-n1-normal-1M', 'job_id'] = 'Free'
df.loc[df['job_id'] == '2020-05-14-n1-notau-1M', 'job_id'] = 'No'
df.loc[df['job_id'] == '2020-05-15-n1-fixed-tau-1M', 'job_id'] = 'Fixed'
df = df.rename(columns={'country':'Country', 'job_id':'Model'})
df['Country'] = [x.replace('_', ' ') for x in df['Country']]
df.loc[df['Country']=='Wuhan', 'Country'] = 'Wuhan China'
df.head()

Unnamed: 0,Country,Model,WAIC1,WAIC2
0,Austria,Free,26.791404,28.401017
1,Austria,No,38.765399,39.703423
2,Austria,Fixed,25.592279,26.678583
3,Belgium,Free,29.118949,30.62063
4,Belgium,No,27.995771,28.804464


In [161]:
df = pd.pivot(df, index='Country', columns='Model')
df

Unnamed: 0_level_0,WAIC1,WAIC1,WAIC1,WAIC2,WAIC2,WAIC2
Model,Fixed,Free,No,Fixed,Free,No
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Austria,25.592279,26.791404,38.765399,26.678583,28.401017,39.703423
Belgium,28.190326,29.118949,27.995771,29.383127,30.62063,28.804464
Denmark,33.650988,34.969676,48.660911,38.561618,37.336706,49.630238
France,47.770211,47.749859,70.374954,49.896301,49.598799,72.172448
Germany,213.527954,156.852629,308.844802,214.948668,158.901193,310.650453
Italy,299.763164,230.995553,429.764433,301.394116,233.072121,433.423936
Norway,32.404837,33.640207,36.682201,34.039545,36.072503,37.540257
Spain,58.578385,58.049608,140.185261,59.92756,59.542263,141.962792
Sweden,23.51263,24.089671,27.462311,25.932879,25.910516,28.351759
Switzerland,72.817315,70.322332,98.509943,74.896038,72.965631,99.650838


In [162]:
df = df.drop(columns='WAIC1')
df = df.droplevel(0, axis=1)
df.head()

Model,Fixed,Free,No
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Austria,26.678583,28.401017,39.703423
Belgium,29.383127,30.62063,28.804464
Denmark,38.561618,37.336706,49.630238
France,49.896301,49.598799,72.172448
Germany,214.948668,158.901193,310.650453


In [163]:
idx = df['Free']==df.min(axis=1)
df.loc[idx, 'Free'] = ['\\textbf{'+'{:.2f}'.format(x)+'}' for x in df.loc[idx, 'Free']] 
df.loc[~idx, 'Free'] = ['{:.2f}'.format(x) for x in df.loc[~idx, 'Free']] 
df.head()

Model,Fixed,Free,No
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Austria,26.678583,28.40,39.703423
Belgium,29.383127,30.62,28.804464
Denmark,38.561618,\textbf{37.34},49.630238
France,49.896301,\textbf{49.60},72.172448
Germany,214.948668,\textbf{158.90},310.650453


In [164]:
df.to_csv('../figures/Table-WAIC.csv', index='Country', escapechar='@', float_format="%.2f")