Demonstration of GPR for PIRI data with first-order autoregression

In [29]:
# load packages
import torch
import pandas as pd
import gpytorch
from typing import Optional, Tuple
from matplotlib import pyplot as plt
from gpytorch.means import LinearMean
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.kernels import ScaleKernel, RBFKernel
from gpytorch.kernels.kernel import Kernel
from gpytorch.lazy import InterpolatedLazyTensor
from gpytorch.utils.broadcasting import _mul_broadcast_shape

Implement constant mean module, and mask mean module

In [30]:
class ConstantVectorMean(gpytorch.means.mean.Mean):
    def __init__(self, d=1, prior=None, batch_shape=torch.Size(), **kwargs):
        super().__init__()
        self.batch_shape = batch_shape
        self.register_parameter(name="constantvector",\
                 parameter=torch.nn.Parameter(torch.zeros(*batch_shape, d)))
        if prior is not None:
            self.register_prior("mean_prior", prior, "constantvector")

    def forward(self, input):
        return self.constantvector[input.int().reshape((-1,)).tolist()]
    
class MaskMean(gpytorch.means.mean.Mean):
    def __init__(
        self,
        base_mean: gpytorch.means.mean.Mean,
        active_dims: Optional[Tuple[int, ...]] = None,
        **kwargs,
    ):
        super().__init__()
        if active_dims is not None and not torch.is_tensor(active_dims):
            active_dims = torch.tensor(active_dims, dtype=torch.long)
        self.active_dims = active_dims
        self.base_mean = base_mean
    
    def forward(self, x, **params):
        return self.base_mean.forward(x.index_select(-1, self.active_dims), **params)


load data

In [31]:
def load_PIRI_data():
    # read data
    data = pd.read_csv("hb_data_complete.csv", index_col=[0])

    # all zero PIRI for new zealand and netherland
    data = data.loc[~data['country'].isin(['N-ZEAL','NETHERL'])]

    countries = sorted(data.country.unique())
    years = data.year.unique()
    n = len(countries)
    m = len(years)

    # build data
    country_dict = dict(zip(countries, range(n)))
    year_dict = dict(zip(years, range(m)))

    # x is:
    # 1: year number
    # 2: country id
    # 3: AIShame (treatment indicator)
    # 4: cat_rat
    # 5: ccpr_rat
    # 6: democratic
    # 7: log(gdppc)
    # 8: log(pop)
    # 9: Civilwar2
    # 10: War
    # 11: PIRI
    x = torch.zeros(data.shape[0], 11)
    x[:,0] = torch.as_tensor(list(map(year_dict.get, data.year)))
    x[:,1] = torch.as_tensor(list(map(country_dict.get, data.country)))
    x[:,2] = torch.as_tensor(data.AIShame.to_numpy())
    x[:,3] = torch.as_tensor(data.cat_rat.to_numpy())
    x[:,4] = torch.as_tensor(data.ccpr_rat.to_numpy())
    x[:,5] = torch.as_tensor(data.democratic.to_numpy())
    x[:,6] = torch.as_tensor(data.log_gdppc.to_numpy())
    x[:,7] = torch.as_tensor(data.log_pop.to_numpy())
    x[:,8] = torch.as_tensor(data.Civilwar2.to_numpy())
    x[:,9] = torch.as_tensor(data.War.to_numpy())
    x[:,10] = torch.as_tensor(data.PIRI.to_numpy())
    y = torch.as_tensor(data.PIRILead1.to_numpy()).double()

    return x.double(), y.double(), data, countries, years

train_x, train_y, data, countries, years = load_PIRI_data()

Build GPR model with first-order autoregression

In [32]:
# PIRI baseline model with first-order autoregression
# PIRILead1 ~ AIShame + PIRI + cat_rat + ccpr_rat 
#            + democratic + log(gdppc) + log(pop) 
#            + Civilwar2 + War + as.factor(year) + as.factor(country)

class GPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, ard_num_dim=None):
        super().__init__(train_x, train_y, likelihood)
        # time and unit fixed effects
        self.n = train_x[:,1].unique().size()[0]
        self.T = train_x[:,0].unique().size()[0]
        self.i_mean_module =  MaskMean(active_dims=1, base_mean=ConstantVectorMean(d=self.n))
        self.t_mean_module = MaskMean(active_dims=0, base_mean=ConstantVectorMean(d=self.T))
        # self.i_covar_module = ScaleKernel(active_dims=1, base_kernel=myIndicatorKernel(self.n))
        # self.t_covar_module = ScaleKernel(active_dims=0, base_kernel=myIndicatorKernel(self.T))
        
        # covariate effect
        # self.x_mean_module = MaskMean(active_dims=[6,7,10], base_mean=LinearMean(input_size=3))
        self.x_covar_module = torch.nn.ModuleList([ScaleKernel(RBFKernel(\
            active_dims=(i))) for i in [6,7,10]])
        self.binary_covar_module = torch.nn.ModuleList([ScaleKernel(RBFKernel(\
            active_dims=(i))) for i in [3,4,5,8,9]])
        self.effect_covar_module = ScaleKernel(RBFKernel(active_dims=2))

    def forward(self, x):
        mean_x = self.i_mean_module(x) + self.t_mean_module(x) # + self.x_mean_module(x)
        covar_x = self.effect_covar_module(x) # self.i_covar_module(x) + self.t_covar_module(x) + 
        for i, _ in enumerate(self.x_covar_module):
            covar_x += self.x_covar_module[i](x)
        for i, _ in enumerate(self.binary_covar_module):
            covar_x += self.binary_covar_module[i](x)
        
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

initialize model

In [16]:
likelihood = GaussianLikelihood()
model = GPModel(train_x, train_y, likelihood).double()

# initialize model parameters
hypers = {'likelihood.noise_covar.noise': torch.tensor(0.25),
    'x_covar_module.0.outputscale': torch.tensor(0.25),
    'x_covar_module.1.outputscale': torch.tensor(0.25),
    'x_covar_module.2.outputscale': torch.tensor(0.25),
    'binary_covar_module.0.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.1.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.2.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.3.base_kernel.lengthscale': torch.tensor(0.01),
    'binary_covar_module.4.base_kernel.lengthscale': torch.tensor(0.01),
    'effect_covar_module.base_kernel.lengthscale': torch.tensor(0.01),
    'effect_covar_module.outputscale': torch.tensor(0.25)
}    

model.initialize(**hypers)

GPModel(
  (likelihood): GaussianLikelihood(
    (noise_covar): HomoskedasticNoise(
      (raw_noise_constraint): GreaterThan(1.000E-04)
    )
  )
  (i_mean_module): MaskMean(
    (base_mean): ConstantVectorMean()
  )
  (t_mean_module): MaskMean(
    (base_mean): ConstantVectorMean()
  )
  (x_covar_module): ModuleList(
    (0): ScaleKernel(
      (base_kernel): RBFKernel(
        (raw_lengthscale_constraint): Positive()
      )
      (raw_outputscale_constraint): Positive()
    )
    (1): ScaleKernel(
      (base_kernel): RBFKernel(
        (raw_lengthscale_constraint): Positive()
      )
      (raw_outputscale_constraint): Positive()
    )
    (2): ScaleKernel(
      (base_kernel): RBFKernel(
        (raw_lengthscale_constraint): Positive()
      )
      (raw_outputscale_constraint): Positive()
    )
  )
  (binary_covar_module): ModuleList(
    (0): ScaleKernel(
      (base_kernel): RBFKernel(
        (raw_lengthscale_constraint): Positive()
      )
      (raw_outputscale_constraint):

train model by optimizing hypers

In [33]:
# train model
model.train()
likelihood.train()

torch.manual_seed(12345)

# freeze length scale in the country component in unit covar
# freeze constant unit means
all_params = set(model.parameters())
final_params = list(all_params - \
            {
        #   model.x_covar_module[0].raw_outputscale,
        #   model.x_covar_module[1].raw_outputscale,
            model.binary_covar_module[0].base_kernel.raw_lengthscale,
            model.binary_covar_module[1].base_kernel.raw_lengthscale,
            model.binary_covar_module[2].base_kernel.raw_lengthscale,
            model.binary_covar_module[3].base_kernel.raw_lengthscale,
            model.binary_covar_module[4].base_kernel.raw_lengthscale,
            model.effect_covar_module.base_kernel.raw_lengthscale})
        #   model.effect_covar_module.raw_outputscale})
optimizer = torch.optim.Adam(final_params, lr=0.1)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

training_iter = 50
for i in range(training_iter):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = model(train_x)
    # Calc loss and backprop gradients
    loss = -mll(output, train_y)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f '  % (
        i + 1, training_iter, loss.item()
    ))
    optimizer.step()

Iter 1/50 - Loss: 2.546 
Iter 2/50 - Loss: 2.392 
Iter 3/50 - Loss: 2.263 
Iter 4/50 - Loss: 2.157 
Iter 5/50 - Loss: 2.070 
Iter 6/50 - Loss: 1.998 
Iter 7/50 - Loss: 1.940 
Iter 8/50 - Loss: 1.893 
Iter 9/50 - Loss: 1.854 
Iter 10/50 - Loss: 1.823 
Iter 11/50 - Loss: 1.798 
Iter 12/50 - Loss: 1.777 
Iter 13/50 - Loss: 1.761 
Iter 14/50 - Loss: 1.747 
Iter 15/50 - Loss: 1.736 
Iter 16/50 - Loss: 1.726 
Iter 17/50 - Loss: 1.719 
Iter 18/50 - Loss: 1.712 
Iter 19/50 - Loss: 1.706 
Iter 20/50 - Loss: 1.701 
Iter 21/50 - Loss: 1.697 
Iter 22/50 - Loss: 1.693 
Iter 23/50 - Loss: 1.689 
Iter 24/50 - Loss: 1.686 
Iter 25/50 - Loss: 1.683 
Iter 26/50 - Loss: 1.680 
Iter 27/50 - Loss: 1.677 
Iter 28/50 - Loss: 1.674 
Iter 29/50 - Loss: 1.671 
Iter 30/50 - Loss: 1.668 
Iter 31/50 - Loss: 1.666 
Iter 32/50 - Loss: 1.664 
Iter 33/50 - Loss: 1.661 
Iter 34/50 - Loss: 1.659 
Iter 35/50 - Loss: 1.657 
Iter 36/50 - Loss: 1.655 
Iter 37/50 - Loss: 1.653 
Iter 38/50 - Loss: 1.651 
Iter 39/50 - Loss: 1.

generate posterior of PIRI effects

In [34]:
full_model_state = model.state_dict()

with torch.no_grad(), gpytorch.settings.fast_pred_var():
    out = likelihood(model(train_x))
    mu_f = out.mean
    V = out.covariance_matrix
    L = torch.linalg.cholesky(V, upper=False)

# model.load_state_dict(full_model_state)
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    for i,_ in enumerate(model.x_covar_module):
        model.x_covar_module[i].outputscale = 0
    for i,_ in enumerate(model.binary_covar_module):
        model.binary_covar_module[i].outputscale = 0
    effect_covar = model(train_x).covariance_matrix

# get posterior effect mean
alpha = torch.linalg.solve(L.t(),torch.linalg.solve(L,train_y-mu_f))
tmp = torch.linalg.solve(L, effect_covar)
post_effect_mean = effect_covar @ alpha
# get posterior effect covariance
post_effect_covar = effect_covar - tmp.t() @ tmp

effect = post_effect_mean[train_x[:,2]==1].mean() - post_effect_mean[train_x[:,2]==0].mean()
effect_std = post_effect_covar.diag().mean().sqrt()
BIC = (6+5+1+len(years)+len(countries)+1)*torch.log(torch.tensor(train_x.size()[0])) + 2*loss*train_x.size()[0]
print("effect: {:0.3f} +- {:0.3f}\n".format(effect, effect_std))
print("model evidence: {:0.3f} \n".format(-loss*train_x.size()[0]))
print("BIC: {:0.3f} \n".format(BIC))

effect: 0.235 +- 0.037

model evidence: -3507.045 

BIC: 8309.840 

