# Bayesian Forecaster

#### Imports

In [26]:
import numpy as np
import pymc as pm
import pytensor.tensor as pt
import pandas as pd
import arviz as az

#### data setup

In [27]:
# importing data
data_directory = '../data/experiment_small/'
data = np.load(data_directory + 'network_params/data_network.npy')
data = np.transpose(data, (2,1, 0))

#reshaping and trimming to exclude the last year
data = data.reshape(len(data),2*len(data[0]))
data = data[:-3*365,:]
diffs = np.diff(data, axis=0)
T,D = diffs.shape

# creating the dataframe
hospital_list = np.loadtxt(data_directory + 'network_params/hospitals.txt', dtype=str, delimiter='\n')
df_cols = np.array([[h + ' supply',h + ' demand'] for h in hospital_list]).reshape(D)
df = pd.DataFrame(diffs, columns=df_cols)

df

Unnamed: 0,Orange County supply,Orange County demand,Los Angeles supply,Los Angeles demand,Alta Bates supply,Alta Bates demand,Oakland supply,Oakland demand
0,5.269148,-2.146400,6.420427,-2.615462,1.428242,-0.581811,0.733457,-0.298784
1,-0.194062,5.754776,-0.236411,7.011886,-0.052597,1.559839,-0.027011,0.801030
2,4.743011,6.515198,5.778534,7.938270,1.285468,1.765898,0.660131,0.906852
3,6.743011,-2.519152,8.215959,-3.069363,1.827692,-0.682785,0.938575,-0.350640
4,2.301498,-2.136676,2.803628,-2.602916,0.623677,-0.579046,0.320281,-0.297352
...,...,...,...,...,...,...,...,...
1700,-0.866539,-2.080897,-1.055859,-2.535404,-0.234885,-0.564013,-0.120620,-0.289641
1701,-1.241602,-4.885025,-1.512805,-5.952142,-0.336518,-1.324084,-0.172818,-0.679958
1702,-1.896454,7.244912,-2.310643,8.827497,-0.514024,1.963706,-0.263970,1.008430
1703,-4.976423,0.571552,-6.063481,0.696358,-1.348839,0.154928,-0.692670,0.079561


#### constructing the BVAR model

In [28]:
def ar_update(beta,n,h,df):
    update = []
    for i in range(n):
        beta_temp = pm.math.sum([
            pm.math.sum(beta[i,j] * df.values[h-(j+1):-(j+1)],axis=-1)
            for j in range(h)
        ],axis=0)
        
        update.append(beta_temp)

    update = pm.math.stack(update, axis=-1)
    return update

def bvar_model(h,df,priors):
    coords = {
        "lags": np.arange(h)+1,
        "vars": df.columns.tolist(),
        "eqs": df.columns.tolist(),
        "time": [t for t in df.index[h:]]
    }

    with pm.Model(coords=coords) as model:
        beta = pm.Normal(
            "beta",
            mu=priors["beta"]["mu"],
            sigma=priors["beta"]["sigma"],
            dims=("vars","lags","eqs")
        )
        alpha = pm.Normal(
            "alpha",
            mu=priors["alpha"]["mu"],
            sigma=priors["alpha"]["sigma"],
            dims=("vars",)
        )
        data_obs = pm.Data(
            "data_obs",
            df.values[h:], 
            dims=("time","vars"),
            mutable=True
        )

        n = df.shape[1]
        betaX = ar_update(beta,n,h,df)
        betaX = pm.Deterministic(
            "betaX",
            betaX,
            dims=("time",)
        )
        mu = alpha + betaX
        sigma = pm.HalfNormal(
            "noise",
            sigma=priors["noise"]["sigma"],
            dims=["vars"]
        )
        obs = pm.Normal(
            "obs",
            mu=mu,
            sigma=sigma,
            observed=data_obs,
            dims=["time", "vars"]
            )
        trace = pm.sample(chains=4, random_seed=42,target_accept=0.95)
    return model,trace

#### results

In [None]:
h = 7
priors = {
    "beta": {"mu": 0.0, "sigma": 5.0},
    "alpha": {"mu": 0.0, "sigma": 5.0},
    "noise": {"sigma": 0.5}
}

model, trace = bvar_model(h, df, priors)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta, alpha, noise]


In [22]:
#pm.model_to_graphviz(model)
az.summary(trace, var_names=["alpha", "beta","noise"])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha[Orange County supply],-0.545,0.566,-1.349,0.350,0.258,0.195,5.0,17.0,2.22
alpha[Orange County demand],-3.942,7.055,-20.200,1.108,3.398,2.587,5.0,11.0,2.91
alpha[Los Angeles supply],-1.046,2.642,-8.000,2.028,1.008,0.745,7.0,19.0,1.75
alpha[Los Angeles demand],4.213,6.783,-2.327,20.696,3.153,2.388,5.0,11.0,2.60
alpha[Alta Bates supply],-0.694,2.740,-8.515,2.917,0.894,0.678,8.0,21.0,1.97
...,...,...,...,...,...,...,...,...,...
noise[Los Angeles demand],4.652,0.075,4.523,4.791,0.012,0.008,43.0,228.0,1.08
noise[Alta Bates supply],1.074,0.020,1.038,1.111,0.003,0.002,43.0,47.0,1.06
noise[Alta Bates demand],1.029,0.018,0.998,1.062,0.003,0.002,42.0,252.0,1.09
noise[Oakland supply],0.549,0.008,0.532,0.563,0.002,0.001,19.0,43.0,1.18
