### Demonstration of Full Bayesian Gaussian Process Regression (GPR) with multiple-units time series data

The dataset we are going to use in this demo is the physical integrity rights index (PIRI) data, which comes from Strezhnev, Anton, Judith G Kelley and Beth A Simmons. 2021. “Testing for Negative Spillovers: Is Promoting Human Rights Really Part of the “Problem”?” International
Organization 75(1):71–102

#### Going Fully Bayesian - Sampling Hyperparamters with NUTS

So far our inference relies on a single set of hyperparameters optimized using empirical Bayes. However, we have not really accounted for *uncertainty associated with hyperparameters* that might lead to model misspecification. Hence, we may adopt a fully Bayesian inference strategy that adds another layer of prior structures on the hyperparameters, where the parameters specifying shapes of those priors are sometimes referred as *hyper-hyperparameters*.

Gpytorch has integrated pyro, a probabilistic programming language specifically designed to reason probability and uncertainty in large-scale machine learning research, for sampling GP hyperparameters and performing fully Bayesian inference. Here we follow [gpytorch doc](https://docs.gpytorch.ai/en/stable/examples/01_Exact_GPs/GP_Regression_Fully_Bayesian.html) to demonstrate how to sample hyperparameters with No-U-Turn Sampler ([NUTS](https://www.jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf)).

In [1]:
# load gpytoch and other packages
import torch
import numpy as np
import pandas as pd
import gpytorch
from scipy.stats import norm
from typing import Optional, Tuple
from matplotlib import pyplot as plt
from gpytorch.means import LinearMean
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.kernels import ScaleKernel, RBFKernel

# load pyro packages
import pyro
from pyro.infer.mcmc import NUTS, MCMC
import os
smoke_test = ('CI' in os.environ)
num_samples = 2 if smoke_test else 100
warmup_steps = 2 if smoke_test else 100
from gpytorch.priors import UniformPrior

#### Load and setup data

In [2]:
def load_PIRI_data():
    # read data
    url = "https://raw.githubusercontent.com/yahoochen97/GP_gradient/main/hb_data_complete.csv"
    data = pd.read_csv(url, index_col=[0])

    # all zero PIRI for new zealand and netherland
    data = data.loc[~data['country'].isin(['N-ZEAL','NETHERL'])]

    countries = sorted(data.country.unique())
    years = data.year.unique()
    n = len(countries)
    m = len(years)

    # build data
    country_dict = dict(zip(countries, range(n)))
    year_dict = dict(zip(years, range(m)))

    # x is:
    # 1: year number
    # 2: country id
    # 3: AIShame (treatment indicator)
    # 4: cat_rat
    # 5: ccpr_rat
    # 6: democratic
    # 7: log(gdppc)
    # 8: log(pop)
    # 9: Civilwar2
    # 10: War
    x = torch.zeros(data.shape[0], 10)
    x[:,0] = torch.as_tensor(list(map(year_dict.get, data.year)))
    x[:,1] = torch.as_tensor(list(map(country_dict.get, data.country)))
    x[:,2] = torch.as_tensor(data.AIShame.to_numpy())
    x[:,3] = torch.as_tensor(data.cat_rat.to_numpy())
    x[:,4] = torch.as_tensor(data.ccpr_rat.to_numpy())
    x[:,5] = torch.as_tensor(data.democratic.to_numpy())
    x[:,6] = torch.as_tensor(data.log_gdppc.to_numpy())
    x[:,7] = torch.as_tensor(data.log_pop.to_numpy())
    x[:,8] = torch.as_tensor(data.Civilwar2.to_numpy())
    x[:,9] = torch.as_tensor(data.War.to_numpy())
    # x[:,10] = torch.as_tensor(data.PIRI.to_numpy())
    y = torch.as_tensor(data.PIRILead1.to_numpy()).double()

    unit_means = torch.zeros(n,)
    for i in range(n):
        unit_means[i] = y[x[:,1]==i].mean()

    return x.double(), y.double(), unit_means.double(), data, countries, years

train_x, train_y, unit_means, data, countries, years = load_PIRI_data()

#### Customization of mean and kernel

In [3]:
class ConstantVectorMean(gpytorch.means.mean.Mean):
    def __init__(self, d=1, prior=None, batch_shape=torch.Size(), **kwargs):
        super().__init__()
        self.batch_shape = batch_shape
        self.register_parameter(name="constantvector",\
                 parameter=torch.nn.Parameter(torch.zeros(*batch_shape, d)))
        if prior is not None:
            self.register_prior("constantvector_prior", prior, "constantvector")

    def forward(self, input):
        return self.constantvector[input.int().reshape((-1,)).tolist()]
    
class MaskMean(gpytorch.means.mean.Mean):
    def __init__(
        self,
        base_mean: gpytorch.means.mean.Mean,
        active_dims: Optional[Tuple[int, ...]] = None,
        **kwargs,
    ):
        super().__init__()
        if active_dims is not None and not torch.is_tensor(active_dims):
            active_dims = torch.tensor(active_dims, dtype=torch.long)
        self.active_dims = active_dims
        self.base_mean = base_mean
    
    def forward(self, x, **params):
        return self.base_mean.forward(x.index_select(-1, self.active_dims), **params)

#### Build GPR model for multi-unit time-series data

In [4]:
# model specification: PIRI gp model with unit trends
# x_it : AIShame + cat_rat + ccpr_rat 
#            + democratic + log(gdppc) + log(pop) 
#            + Civilwar2 + War 
# y_i(t) ~ u_i(t) + f(x_{it}) + ε
# f(x_{it}) ~ GP(0, K_x)
# u_i(t) ~ GP(b_i, K_t)
import statsmodels.formula.api as sm

lm = sm.ols('PIRILead1 ~ AIShame  + cat_rat + ccpr_rat \
            + democratic + log_gdppc + log_pop \
            + Civilwar2 + War + C(year) + C(country) + PIRI', data).fit()

coefs = lm.params.to_dict()
covariate_names = ["AIShame" ,"cat_rat" , "ccpr_rat",
           "democratic",  "log_gdppc", "log_pop",
            "Civilwar2", "War"]
x_weights = list(map(coefs.get, covariate_names))

class GPModel(gpytorch.models.PyroGP):
    def __init__(self, train_x, train_y, likelihood):
        # Define all the variational stuff
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            num_inducing_points=train_y.numel(),
        )
        variational_strategy = gpytorch.variational.VariationalStrategy(
            self, train_x, variational_distribution
        )
        
        super(GPModel, self).__init__(
            variational_strategy,
            likelihood,
            num_data=train_y.numel(),
            name_prefix="simple_regression_model"
        )
        self.likelihood = likelihood

        # constant country-level mean
        self.mean_module = MaskMean(active_dims=1, \
               base_mean=ConstantVectorMean(d=train_x[:,1].unique().size()[0]))
        # linear mean for continuous covariates
        self.x_mean_module = MaskMean(active_dims=[2,3,4,5,6,7,8,9], base_mean=LinearMean(input_size=8, bias=False))
        # year kernel * country kernel
        self.unit_covar_module = ScaleKernel(RBFKernel(active_dims=0)*RBFKernel(active_dims=1))
        self.x_covar_module = torch.nn.ModuleList([ScaleKernel(RBFKernel(\
            active_dims=(i))) for i in [6,7]])
        # dummy mean for categorical covariates
        self.binary_covar_module = torch.nn.ModuleList([ScaleKernel(RBFKernel(\
            active_dims=(i))) for i in [3,4,5,8,9]])
        self.effect_covar_module = ScaleKernel(RBFKernel(active_dims=2))

    def forward(self, x):
        mean_x = self.mean_module(x) + self.x_mean_module(x)
        unit_covar_x = self.unit_covar_module(x)
        effect_covar_x = self.effect_covar_module(x)
        covar_x = unit_covar_x + effect_covar_x
        for i, _ in enumerate(self.x_covar_module):
            covar_x += self.x_covar_module[i](x)
        for i, _ in enumerate(self.binary_covar_module):
            covar_x += self.binary_covar_module[i](x)
        
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

#### Hyperparameter tuning and training of GPR model

In [5]:
likelihood = GaussianLikelihood()
model = GPModel(train_x, train_y, likelihood).double()

# initialize model parameters
hypers = {
    'mean_module.base_mean.constantvector': unit_means,
    'x_mean_module.base_mean.weights': torch.tensor(x_weights),
    'likelihood.noise_covar.noise': torch.tensor(0.25),
    'unit_covar_module.base_kernel.kernels.0.lengthscale': torch.tensor(6),
    'unit_covar_module.base_kernel.kernels.1.lengthscale': torch.tensor(0.01),
    'unit_covar_module.outputscale': torch.tensor(4),
    'x_covar_module.0.outputscale': torch.tensor(1),
    'x_covar_module.1.outputscale': torch.tensor(1),
    'binary_covar_module.0.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.1.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.2.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.3.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.4.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.0.outputscale': torch.tensor(1),
    'binary_covar_module.1.outputscale': torch.tensor(1),
    'binary_covar_module.2.outputscale': torch.tensor(1),
    'binary_covar_module.3.outputscale': torch.tensor(1),
    'binary_covar_module.4.outputscale': torch.tensor(1),
    'effect_covar_module.base_kernel.lengthscale': torch.tensor(0.01),
    'effect_covar_module.outputscale': torch.tensor(1)
}    

model = model.initialize(**hypers)

we register hyperpriors to existing gp model and likelihood using `register_prior()` function.

In [6]:
model.unit_covar_module.register_prior("outputscale_prior", UniformPrior(1, 9), "outputscale")
model.unit_covar_module.base_kernel.kernels[0].register_prior("lengthscale_prior", UniformPrior(1, 12), "lengthscale")
for i in range(len(model.x_covar_module)):
    model.x_covar_module[i].register_prior("outputscale_prior", UniformPrior(0.1, 4), "outputscale")
    model.x_covar_module[i].base_kernel.register_prior("lengthscale_prior", UniformPrior(0.1, 4), "lengthscale")
for i in range(len(model.binary_covar_module)):
    model.binary_covar_module[i].register_prior("outputscale_prior", UniformPrior(0.1, 4), "outputscale")
model.effect_covar_module.register_prior("outputscale_prior", UniformPrior(0.1, 4), "outputscale")
likelihood.register_prior("noise_prior", UniformPrior(0.01, 1), "noise")

define pyro model and train NUTS

In [7]:
# train model
model.train()
likelihood.train()

def pyro_model(x, y):
    with gpytorch.settings.fast_computations(1, 1, 1):
        sampled_model = model.pyro_sample_from_prior()
        output = sampled_model.likelihood(sampled_model(x))
        pyro.sample("obs", output, obs=y)
    return y

nuts_kernel = NUTS(pyro_model)  
mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=smoke_test)
mcmc_run.run(train_x, train_y)

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:2189.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution


RuntimeError: !(has_different_input_dtypes && !config.promote_inputs_to_common_dtype_ && (has_undefined_outputs || config.enforce_safe_casting_to_output_ || config.cast_common_dtype_to_outputs_)) INTERNAL ASSERT FAILED at "/Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/TensorIterator.cpp":407, please report a bug to PyTorch. 

In [18]:
import os
import tqdm
smoke_test = ('CI' in os.environ)
num_iter = 2 if smoke_test else 200
num_particles = 1 if smoke_test else 256


def train(lr=0.01):
    optimizer = pyro.optim.Adam({"lr": 0.1})
    elbo = pyro.infer.Trace_ELBO(num_particles=num_particles, vectorize_particles=True, retain_graph=True)
    svi = pyro.infer.SVI(model.model, model.guide, optimizer, elbo)
    model.train()

    for i in range(num_iter):
        model.zero_grad()
        loss = svi.step(train_x, train_y)
        

train()

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:2189.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution
