In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta
import pylab as plt
%matplotlib inline

import seaborn as sns

## Combine, add a zed column, and plot (if needed)

In [2]:
def combine(csv_ours, csv_otherTeams, zed_func, plot=False):
    
    df_ours = pd.read_csv(csv_ours, index_col=0)
    dates = pd.date_range(start='2020-04-11', freq='7D', periods=len(df_ours))
    dates_1 = dates + timedelta(days=7)
    df_otherTeams = pd.read_csv(csv_otherTeams, index_col=0, parse_dates=True)
    
    cols = [x for x in df_ours.columns if x.endswith('1')]
    df_ours = df_ours[cols].copy()
    df_ours.index = dates_1
    
    df_combined = df_otherTeams.join(df_ours, how='outer')
    
    #========================== add the zed column =======================START
    df_combined['zed'] = zed_func(df_combined)
    #========================== add the zed column =======================END
    
    if plot:
        fig, ax = plt.subplots(1, 1, figsize=(8, 6))
    cols = ['Columbia-UNC-SurvCon', 'QJHong-Encounter', 'zed']
    df_tmp = df_combined.loc['2020-07-25':'2020-10-31', :]

    errs = []
    for col in cols:
        if col == 'grd1':
            continue
        err_vec = ((df_tmp[col] - df_tmp['grd1']) / df_tmp['grd1']).values
        err_vec = (df_tmp[col] - df_tmp['grd1']).values
        # mse = np.sqrt((err ** 2).mean())
        err = np.abs(err_vec).mean()
        errs.append(err)
        if plot:
            sns.distplot(err_vec, ax=ax, label=f'{col}\nmse={err:.2e}')
    if plot:
        ax.legend()
        ax.set_title(f'One-Week-Ahead {target} Prediction Error', fontsize=15)
    
        fig, ax = plt.subplots(1, 1, figsize=(15, 7.5))
        df_combined['grd1'].plot(ax=ax, linewidth=5)
        df_combined[['Columbia-UNC-SurvCon', 'QJHong-Encounter']].plot(ax=ax, marker='v')
        df_combined[['zed']].plot(ax=ax, marker='o', style='--', linewidth=2, ms=5)
        ax.set_title(f'One-Week-Ahead {target} Prediction Error', fontsize=15)
    
    return errs

## Functions to get the zed columns 
add new functions here

In [3]:
def zed_func_1(x, c=.5, gamma=0.000065):
    zed = (x['ET1'] + x['RF1'] + c * x['TF1']) / (2 + c) 
    arr = zed.diff().fillna(0).values
    panel = np.array([np.exp(-gamma * a) for a in arr])
    return zed * panel

def zed_func_2(x, c=.5, d=2.6):
    zed = (x['ET1'] + x['RF1'] + c * x['TF1']) / (2 + c) 
    A = zed.ewm(alpha=.6).mean()
    B = x['grd1'].values
    B = np.append(B[0], B[:-1])
    A = d * A + (1 - d) * B
    return A

def zed_func_3(x, c=.5):
    zed = ((x['ET1'] + x['RF1'] + c * x['TF1']) / (2 + c)).values 
    
    results = [zed[0], zed[1]]
    grd = x['grd1'].values
    for i in range(2, len(x)):
        r = grd[i - 1] - .5 * grd[i - 2] + .5 * zed[i]
        results.append(r)
    return results

## Run through all run results to test zed functions

In [4]:
target = 'death'
idx = '00'
zed_funcs = [zed_func_1, zed_func_2, zed_func_3]

dfs = []
for f, zed_func in enumerate(zed_funcs):
    E = []
    for i in range(20):
        idx = str(i).zfill(2)
        csv_ours = f'results_forecast/forecast_US_{target}_tweak_{idx}.csv'
        csv_otherTeams = f'../CovidTeam_results/forecast_otherTeam_1-wk-ahead-inc-{target}.csv'
        errs = combine(csv_ours, csv_otherTeams, zed_func=zed_func)
        E.append(errs)
    
    df_tmp = pd.DataFrame(
        data=E, 
        index=range(20), 
        columns=['Columbia-UNC-SurvCon', 'QJHong-Encounter', 'zed']
    ).describe().loc[['mean', 'std'], ['zed']]
    df_tmp.columns = [f'func{f}']

    dfs.append(df_tmp)

pd.concat(dfs, axis=1)

Unnamed: 0,func0,func1,func2
mean,449.53885,609.756863,563.469667
std,16.871623,49.877527,11.039252
