#### Electoral parties

In [23]:
# load gpytoch and other packages
import torch
import numpy as np
import pandas as pd
import gpytorch
from scipy.stats import norm
from matplotlib import pyplot as plt
from gpytorch.means import ZeroMean, LinearMean
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.kernels import ScaleKernel, RBFKernel
from datetime import datetime

torch.set_default_dtype(torch.float64)
torch.manual_seed(12345)

<torch._C.Generator at 0x7fd69db045a0>

In [24]:
# read data
data = pd.read_stata("./data/rep_clark_2006a.dta")
data = data[["enep1", "eneg", "logmag", "uppertier", "enpres", "proximity1"]]

# train_x: eneg, logmag, uppertier, enpres, proximity1
train_x = torch.tensor((data.eneg.values, data.logmag.values, data.uppertier.values,\
    data.enpres.values, data.proximity1.values, data.eneg.values*data.logmag.values,\
    data.uppertier.values*data.eneg.values, data.proximity1.values*data.eneg.values)).t().double()
train_y = torch.tensor(data.enep1.values).double()

In [25]:
from gpytorch.models import ExactGP

class GPModel(ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = LinearMean(input_size=train_x.size(1), bias=True)
        self.covar_module = ScaleKernel(RBFKernel(ard_num_dims=train_x.size(1)))

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [26]:
likelihood = GaussianLikelihood()
model = GPModel(train_x, train_y, likelihood).double()

# train model
model.train()
likelihood.train()

all_params = set(model.parameters())
optimizer = torch.optim.Adam(all_params, lr=0.1)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

training_iter = 500
for i in range(training_iter):
    optimizer.zero_grad()
    output = model(train_x)
    loss = -mll(output, train_y)
    loss.backward()
    if i % 50 == 0:
        print('Iter %d/%d - Loss: %.3f '  % (
            i , training_iter, loss.item()
        ))
    optimizer.step()

Iter 0/500 - Loss: 200.988 
Iter 50/500 - Loss: 2.575 
Iter 100/500 - Loss: 1.867 
Iter 150/500 - Loss: 1.710 
Iter 200/500 - Loss: 1.640 
Iter 250/500 - Loss: 1.602 
Iter 300/500 - Loss: 1.575 
Iter 350/500 - Loss: 1.554 
Iter 400/500 - Loss: 1.525 
Iter 450/500 - Loss: 1.505 


In [27]:
model.eval()
likelihood.eval()

# number of empirically sample 
n_samples = 100

with gpytorch.settings.fast_pred_var():
    test_x = train_x.clone().detach().requires_grad_(True)
    observed_pred = likelihood(model(test_x))
    x_grad = torch.autograd.grad(observed_pred.mean.sum(), test_x, retain_graph=True)[0]

    sampled_pred = observed_pred.rsample(torch.Size([n_samples]))
    sampled_dydtest_x = torch.stack([torch.autograd.grad(pred.sum(),\
                                     test_x, retain_graph=True)[0] for pred in sampled_pred])
    



In [32]:
est_std = np.sqrt(sampled_dydtest_x.mean(1).var(0) + \
                  sampled_dydtest_x.var(1).mean(0)).round(decimals=5)
covariate_names = ["eneg", "logmag", "uppertier", "enpres", "proximity1",\
                   "logmag:eneg","uppertier:eneg","proximity1:eneg"]
results = pd.DataFrame({"x": covariate_names, \
                        'est_mean': x_grad.mean(axis=0),
                        'est_std': est_std})
results["t"] = results['est_mean'].values/results['est_std'].values
results["pvalue"] = 1 - norm.cdf(np.abs(results["t"].values))
print(results)

                 x  est_mean   est_std         t        pvalue
0             eneg -1.835348  14.27166 -0.128601  4.488367e-01
1           logmag  0.283421   0.23720  1.194860  1.160709e-01
2        uppertier -0.032908   0.05732 -0.574119  2.829436e-01
3           enpres  0.496657   0.28705  1.730209  4.179645e-02
4       proximity1 -0.727712   0.11130 -6.538296  3.111178e-11
5      logmag:eneg  0.224086   0.22973  0.975434  1.646726e-01
6   uppertier:eneg  0.058874   0.08464  0.695576  2.433472e-01
7  proximity1:eneg -0.046701   0.18241 -0.256024  3.989660e-01


In [33]:
model.eval()
likelihood.eval()

BIC = (5+5+1+1)*torch.log(torch.tensor(train_x.size()[0])) + 2*loss*train_x.size()[0]
print("model evidence: {:0.3f} \n".format(-loss*train_x.size()[0]))
print("BIC: {:0.3f} \n".format(BIC))

model evidence: -725.102 

BIC: 1524.462 



In [34]:
import statsmodels.formula.api as sm
lm = sm.ols('enep1 ~ eneg+logmag+uppertier+enpres+proximity1+\
        logmag:eneg+uppertier:eneg+proximity1:enpres', data).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:                  enep1   R-squared:                       0.397
Model:                            OLS   Adj. R-squared:                  0.387
Method:                 Least Squares   F-statistic:                     39.28
Date:                Tue, 31 Oct 2023   Prob (F-statistic):           5.38e-48
Time:                        14:17:50   Log-Likelihood:                -826.23
No. Observations:                 487   AIC:                             1670.
Df Residuals:                     478   BIC:                             1708.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             2.9157      0.17