# Make Tables for Paper

In [1]:
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from zedstat.textable import textable


DATA_DIR = 'results/enet_who_comparison/'
OUT_DIR = 'tables/'
os.makedirs(OUT_DIR, exist_ok=True)
FILES = ['north_h1n1', 'south_h1n1', 'north_h3n2', 'south_h3n2']

NORTH_YEARS = []
for i in np.arange(3, 24):
    YEAR = ''
    if i < 10:
        YEAR += '200' + str(i)
    else:
        YEAR += ('20' + str(i))
    if i + 1 < 10:
        YEAR += '-0' + str(i + 1)
    else:
        YEAR += '-' + str(i + 1)
    NORTH_YEARS.append(YEAR)
        
SOUTH_YEARS = []
for i in np.arange(3, 24):
    if i < 10:
        SOUTH_YEARS.append('200' + str(i))
    else:
        SOUTH_YEARS.append('20' + str(i))

## Two-Cluster Enet vs. WHO Recommendations 

In [30]:
dfs = []
for FILE in FILES:
    # Two-cluster
    df = pd.read_csv(DATA_DIR + FILE + '.csv')
    # Single-cluster
    df2 = pd.read_csv(DATA_DIR + FILE + '_single_cluster.csv')
    df['name_single'] = df2['name']
    df['ha_enet_error_single'] = df2['ha_enet_error']
    # Select columns
    df = df[['name_who','name_0','name_1','name_single','ha_who_error','ha_enet_error', 'ha_enet_error_single']]
    if FILE[:5] == 'north':
        df['Season'] = NORTH_YEARS
    else:
        df['Season'] = SOUTH_YEARS
    df = df.set_index('Season')
    df = df.rename(columns={'name_who':'WHO Recommendation',
                            'name_0':'Enet Recommendation, Cluster 1',
                            'name_1':'Enet Recommendation, Cluster 2',
                            'name_single':'Enet Recommendation, Single Cluster',
                            'ha_who_error':'WHO Error',
                            'ha_enet_error':'Enet Error',
                            'ha_enet_error_single':'Enet Error Single'})
    dfs.append(df)
    textable(df, tabname = OUT_DIR + FILE + '.tex', FORMAT='%1.1f')
dfs[0].round(2)

Unnamed: 0_level_0,WHO Recommendation,"Enet Recommendation, Cluster 1","Enet Recommendation, Cluster 2","Enet Recommendation, Single Cluster",WHO Error,Enet Error,Enet Error Single
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2003-04,A/New Caledonia/20/99,A/Memphis/1/2001,A/HaNoi/2704/2002,A/Memphis/1/2001,8.29,5.29,11.29
2004-05,A/New Caledonia/20/99,A/Memphis/1/2001,A/Memphis/1/2001,A/Memphis/1/2001,7.43,9.71,9.71
2005-06,A/New Caledonia/20/99,A/Malaysia/25862/2003,A/Malaysia/25862/2003,A/Malaysia/25862/2003,10.56,6.38,6.38
2006-07,A/New Caledonia/20/99,A/Yazd/144/2006,A/Malaysia/30025/2004,A/New York/230/2003,10.42,5.67,6.37
2007-08,A/Solomon Islands/3/2006,A/New York/1050/2006,A/Incheon/2647/2007,A/New York/1050/2006,10.55,11.17,13.56
2008-09,A/Brisbane/59/2007,A/England/545/2007,A/Hong_Kong/2613/2007,A/England/545/2007,28.9,27.76,28.5
2009-10,A/Brisbane/59/2007,A/Hawaii/02/2008,A/Hong_Kong/H090-751-V3,A/Hawaii/02/2008,426.16,2.37,426.15
2010-11,A/California/7/2009,A/OKINAWA/283/2009,A/Qingdao/FF86/2009,A/OKINAWA/283/2009,10.58,7.49,7.79
2011-12,A/California/7/2009,A/Florida/14/2010,A/Taiwan/66179/2010,A/Florida/14/2010,12.79,7.61,7.63
2012-13,A/California/7/2009,A/England/WTSI1769/2010,A/Mexico/3723/2011,A/Singapore/GP2892/2010,12.65,8.33,5.36


## Enet Improvement Table

In [2]:
subtype = ['H1N1']*4 + ['H3N2']*4
hemisphere = 2*(['North']*2 + ['South']*2)
clusters = 4*[1, 2]

who_err_20 = [] # 2 decade error
enet_err_20 = [] 
who_err_10 = [] # 1 decade error
enet_err_10 = [] 

print('Averages omitting 2009-10 H1N1 pandemic season:\n')
for i in range(0, 4):
    for cluster in ['_single_cluster', '']:
        df = pd.read_csv(DATA_DIR + FILES[i] + cluster + '.csv')[:-1]
        WHO = 'ha_who_error'
        ENET = 'ha_enet_error'
        # WHO
        who_err_20.append(np.mean(df[WHO]))
        who_err_10.append(np.mean(df[WHO][-10:]))
        # Enet
        enet_err_20.append(np.mean(df[ENET]))
        enet_err_10.append(np.mean(df[ENET][-10:]))
        # Print results omitting 2009-10 H1N1 pandemic season
        if 'h1n1' in FILES[i]:
            who_no_pandemic = np.mean(df[WHO].drop(6))
            enet_no_pandemic = np.mean(df[ENET].drop(6))
            improvement_no_pandemic = 100 * (who_no_pandemic - enet_no_pandemic) / who_no_pandemic
            print(FILES[i] + cluster)
            print(f'\tWHO: {who_no_pandemic:.3f}')
            print(f'\tEnet: {enet_no_pandemic:.3f}')
            print(f'\tEnet: {improvement_no_pandemic:.3f}')

improvement_20 = 100 * (np.array(who_err_20) - np.array(enet_err_20)) / np.array(enet_err_20)
improvement_10 = 100 * (np.array(who_err_10) - np.array(enet_err_10)) / np.array(enet_err_10)

enet_improvement = pd.DataFrame({'Subtype':subtype,
                                 'Hemisphere':hemisphere,
                                 'Clusters':clusters,
                                 'WHO Error (2 decade)':who_err_20,
                                 'Enet Error (2 decade)':enet_err_20,
                                 'Improvement (2 decade) (%)':improvement_20,
                                 'WHO Error (1 decade)':who_err_10,
                                 'Enet Error (1 decade)':enet_err_10,
                                 'Improvement (1 decade) (%)':improvement_10})
enet_improvement.to_csv(DATA_DIR + 'enet_improvement.csv', index=False)
enet_improvement = enet_improvement.set_index('Subtype')
textable(enet_improvement, tabname = OUT_DIR + 'enet_improvement.tex', FORMAT='%1.2f')
enet_improvement.round(2)

Averages omitting 2009-10 H1N1 pandemic season:

north_h1n1_single_cluster
	WHO: 13.154
	Enet: 9.419
	Enet: 28.396
north_h1n1
	WHO: 13.154
	Enet: 8.457
	Enet: 35.706
south_h1n1_single_cluster
	WHO: 12.759
	Enet: 8.573
	Enet: 32.808
south_h1n1
	WHO: 12.759
	Enet: 7.236
	Enet: 43.287


Unnamed: 0_level_0,Hemisphere,Clusters,WHO Error (2 decade),Enet Error (2 decade),Improvement (2 decade) (%),WHO Error (1 decade),Enet Error (1 decade),Improvement (1 decade) (%)
Subtype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
H1N1,North,1,33.8,30.26,11.73,13.78,8.24,67.27
H1N1,North,2,33.8,8.15,314.64,13.78,7.13,93.29
H1N1,South,1,32.22,28.25,14.07,13.47,8.35,61.31
H1N1,South,2,32.22,26.93,19.64,13.47,7.26,85.59
H3N2,North,1,11.18,7.86,42.13,10.94,9.53,14.72
H3N2,North,2,11.18,6.54,70.86,10.94,7.29,49.93
H3N2,South,1,11.08,9.07,22.11,10.49,9.88,6.22
H3N2,South,2,11.08,7.65,44.94,10.49,8.55,22.73


## Enet Improvement over Random Strain Table

In [4]:
subtype = ['H1N1']*2 + ['H3N2']*2
hemisphere = 2*(['North'] + ['South'])

random_err_20 = [] # 2 decade error
enet_err_20 = [] 
random_err_10 = [] # 1 decade error
enet_err_10 = [] 

print('Averages omitting 2009-10 H1N1 pandemic season:\n')
for i in range(0, 4):
    df = pd.read_csv(DATA_DIR + FILES[i] + '_single_cluster_random.csv')[:-1]
    RANDOM = 'ha_random_error'
    ENET = 'ha_enet_error'
    # Random
    random_err_20.append(np.mean(df[RANDOM]))
    random_err_10.append(np.mean(df[RANDOM][-10:]))
    # Enet
    enet_err_20.append(np.mean(df[ENET]))
    enet_err_10.append(np.mean(df[ENET][-10:]))
    # Print results omitting 2009-10 H1N1 pandemic season
    if 'h1n1' in FILES[i]:
        random_no_pandemic = np.mean(df[RANDOM].drop(6))
        enet_no_pandemic = np.mean(df[ENET].drop(6))
        improvement_no_pandemic = 100 * (random_no_pandemic - enet_no_pandemic) / random_no_pandemic
        print(FILES[i])
        print(f'\tRandom: {random_no_pandemic:.3f}')
        print(f'\tEnet: {enet_no_pandemic:.3f}')
        print(f'\tEnet: {improvement_no_pandemic:.3f}')

improvement_20 = 100 * (np.array(random_err_20) - np.array(enet_err_20)) / np.array(enet_err_20)
improvement_10 = 100 * (np.array(random_err_10) - np.array(enet_err_10)) / np.array(enet_err_10)

enet_improvement = pd.DataFrame({'Subtype':subtype,
                                 'Hemisphere':hemisphere,
                                 'Random Error (2 decade)':random_err_20,
                                 'Enet Error (2 decade)':enet_err_20,
                                 'Improvement (2 decade) (%)':improvement_20,
                                 'Random Error (1 decade)':random_err_10,
                                 'Enet Error (1 decade)':enet_err_10,
                                 'Improvement (1 decade) (%)':improvement_10})
enet_improvement.to_csv(DATA_DIR + 'enet_improvement_random.csv', index=False)
enet_improvement = enet_improvement.set_index('Subtype')
textable(enet_improvement, tabname = OUT_DIR + 'enet_improvement_random.tex', FORMAT='%1.2f')
enet_improvement.round(2)

Averages omitting 2009-10 H1N1 pandemic season:

north_h1n1
	Random: 13.396
	Enet: 9.419
	Enet: 29.692
south_h1n1
	Random: 15.884
	Enet: 8.573
	Enet: 46.029


ValueError: All arrays must be of the same length