#### Exile on Online Opposition

Examine the effect of exile on percentage of tweets on repression of Venezuela government.

ESBERG, J., & SIEGEL, A. (2023). How Exile Shapes Online Opposition: Evidence from Venezuela. American Political Science Review, 117(4), 1361-1378.

In [9]:
# load gpytoch and other packages
import torch
import numpy as np
import pandas as pd
import gpytorch
from scipy.stats import norm
from matplotlib import pyplot as plt
from gpytorch.means import ZeroMean, LinearMean
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.kernels import ScaleKernel, RBFKernel
from datetime import datetime

from gpytorch.means import Mean
from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
from typing import Optional, Tuple
from torch.utils.data import TensorDataset, DataLoader

torch.set_default_dtype(torch.float64)
torch.manual_seed(12345)

num_inducing = 3000
batch_size = 256
num_epochs = 50

we build a Gaussian process regression model

In [10]:
def diff_month(d1, d2):
    d1 = datetime.strptime(d1,"%Y-%m-%d")
    d2 = datetime.strptime(d2,"%Y-%m-%d")
    return (d1.year - d2.year) * 12 + d1.month - d2.month

def to_month(d1):
    return datetime(2013 + int(d1 / 12), ((1 +d1) % 12) + 1, 1)

class ConstantVectorMean(gpytorch.means.mean.Mean):
    def __init__(self, d=1, prior=None, batch_shape=torch.Size(), **kwargs):
        super().__init__()
        self.batch_shape = batch_shape
        self.register_parameter(name="constantvector",\
                 parameter=torch.nn.Parameter(torch.zeros(*batch_shape, d)))
        if prior is not None:
            self.register_prior("mean_prior", prior, "constantvector")

    def forward(self, input):
        return self.constantvector[input.int().reshape((-1,)).tolist()]
    
class MaskMean(gpytorch.means.mean.Mean):
    def __init__(
        self,
        base_mean: gpytorch.means.mean.Mean,
        active_dims: Optional[Tuple[int, ...]] = None,
        **kwargs,
    ):
        super().__init__()
        if active_dims is not None and not torch.is_tensor(active_dims):
            active_dims = torch.tensor(active_dims, dtype=torch.long)
        self.active_dims = active_dims
        self.base_mean = base_mean
    
    def forward(self, x, **params):
        return self.base_mean.forward(x.index_select(-1, self.active_dims), **params)

class GPModel(ApproximateGP):
    def __init__(self, inducing_points, unit_num):
        self.unit_num = unit_num
        variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))
        variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=False)
        super(GPModel, self).__init__(variational_strategy)

        # linear mean
        self.mean_module = LinearMean(input_size=(2), bias=False)
        self.unit_mean = torch.nn.ModuleList([LinearMean(input_size=(1),bias=True) for _ in range(unit_num)])
        self.covar_module = ScaleKernel(RBFKernel(ard_num_dims=(2), active_dims=[2,3]))
        self.t_covar_module = ScaleKernel(RBFKernel(active_dims=[0])*RBFKernel(active_dims=[1]))
        self.g_covar_module = ScaleKernel(RBFKernel(active_dims=[1]))

    def forward(self, x):
        mean_x = self.mean_module(x[:,2:]) 
        for i in range(self.unit_num):
            mean_x[x[:,0]==i] += self.unit_mean[i](x[i,1].reshape((-1,1)))
        covar_x =  self.covar_module(x) + self.t_covar_module(x)  + self.g_covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

load data and train

In [11]:
# read data
Y_name = "perc_repression" # "perc_harsh_criticism"
data = pd.read_csv("./data/exile.csv")
data = data[[Y_name, "tweeted_exile", "month","num_tweets", "actor.id"]]

# xs: unit id, month, log_num_tweets, tweeted_exile
xs = data.month.apply(lambda x: diff_month(x,"2013-01-01"))
xs = torch.tensor(np.array([data["actor.id"].astype('category').cat.codes.values.reshape((-1,)),\
            xs.values.reshape((-1,)),
            np.log(data.num_tweets.values+1).reshape((-1,)), \
            data['tweeted_exile'].values.reshape((-1,))]).T)
# xs = torch.cat((xs, (xs[:, 1] * xs[:, -1]).reshape(-1,1)), dim=1)
ys = torch.tensor(data[Y_name].values).double()

to_unit = dict(enumerate(data["actor.id"].astype('category').cat.categories))
del data

# define inducing points and learn
inducing_points = xs[np.random.choice(xs.size(0),num_inducing,replace=False),:]
# inducing_points = xs[xs[:,1] % 10==0]
model = GPModel(inducing_points=inducing_points, unit_num=xs[:,0].unique().size()[0]).double()
likelihood = GaussianLikelihood().double()
del inducing_points

train_dataset = TensorDataset(xs, ys)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

hypers = {
'mean_module.weights': torch.tensor([0, 5]),
'covar_module.outputscale': 9,
'covar_module.base_kernel.lengthscale': torch.std(xs[:,2:4],axis=0),
't_covar_module.base_kernel.kernels.1.lengthscale': torch.tensor([12]),
't_covar_module.outputscale': 4,
'g_covar_module.base_kernel.lengthscale': torch.tensor([24]),
'g_covar_module.outputscale': 9
}    

model = model.initialize(**hypers)

# initialize model parameters
model.t_covar_module.base_kernel.kernels[0].raw_lengthscale.requires_grad_(False)
model.t_covar_module.base_kernel.kernels[0].lengthscale = 0.01
# model.t_covar_module.base_kernel.kernels[1].raw_lengthscale.requires_grad_(False)
# model.covar_module.base_kernel.raw_lengthscale.requires_grad_(False)
likelihood.noise = 9.

# train model
model.train()
likelihood.train()

optimizer = torch.optim.Adam([
    {'params': list(set(model.parameters()) \
                - {model.t_covar_module.base_kernel.kernels[0].raw_lengthscale,\
                })},
    {'params': likelihood.parameters()},
], lr=0.1)

# "Loss" for GPs
mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=ys.size(0))

for i in range(num_epochs):
    for j, (x_batch, y_batch) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(x_batch)
        loss = -mll(output, y_batch)
        loss.backward()
        optimizer.step()
        if j % 50 == 0:
            print('Epoch %d Iter %d - Loss: %.3f' % (i + 1, j+1, loss.item()))


Epoch 1 Iter 1 - Loss: 203.004


KeyboardInterrupt: 

generate predictive values

In [None]:
# set model and likelihood to evaluation mode
model.eval()
likelihood.eval()

with torch.no_grad(), gpytorch.settings.fast_pred_var():
    out = model(xs)
    mll.combine_terms = True
    loss = -mll(out, ys)
    mu_f = out.mean.numpy()
    lower, upper = out.confidence_region()

# store results
results = pd.DataFrame({"gpr_mean":mu_f})
results['true_y'] = ys
results['gpr_lwr'] = lower
results['gpr_upr'] = upper
results['month'] = np.array([to_month(x) for x in xs[:,1].numpy().astype(int)])
results['unit'] = np.array([to_unit[x] for x in xs[:,0].numpy().astype(int)])
results['exile'] = xs[:,3].numpy().astype(int)

test_x0 = xs.clone().detach().requires_grad_(False)
test_x0[:,3] = 0

# in eval mode the forward() function returns posterioir
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    out0 = model(test_x0)
    lower, upper = out0.confidence_region()

results['cf'] = out0.mean.numpy()
results['cf_lower'] = lower
results['cf_upper'] = upper

if Y_name == "perc_harsh_criticism":
    abbr = "crit"
else:
    abbr = "repr"
results.to_csv("./results/exile_{}_fitted_gpr.csv".format(abbr),index=False) #save to file


In [None]:
model.eval()
likelihood.eval()

# copy training tesnor to test tensors and set exile to 1 and 0
test_x1 = xs.clone().detach().requires_grad_(False)
test_x1[:,3] = 1
test_x0 = xs.clone().detach().requires_grad_(False)
test_x0[:,3] = 0

# in eval mode the forward() function returns posterioir
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    out = model(xs)
    mll.combine_terms = False
    loss, _ , _ = mll(out, ys)
    loss = -loss*out.event_shape[0]
    out1 = model(test_x1)
    out0 = model(test_x0)

# compute ATE and its uncertainty
effect = out1.mean.numpy()[xs[:,3]==1].mean() - out0.mean.numpy()[xs[:,3]==1].mean()
effect_std = np.sqrt((out1.variance.detach().numpy()[xs[:,3]==1].mean()\
                    +out0.variance.detach().numpy()[xs[:,3]==1].mean()))
BIC = (3+2+1)*\
    torch.log(torch.tensor(xs.size()[0])) + 2*loss # *xs.size(0)/batch_size
print("ATE: {:0.3f} +- {:0.3f}\n".format(effect, effect_std))
print("model evidence: {:0.3f} \n".format(-loss))
print("BIC: {:0.3f} \n".format(BIC))


ATE: 5.098 +- 2.533

model evidence: -118381.267 

BIC: 236822.707 

tensor([21.6032], grad_fn=<AddBackward0>)
tensor(7.6672, grad_fn=<SoftplusBackward0>)
tensor([[12.8047]], grad_fn=<SoftplusBackward0>)
tensor(3.1736, grad_fn=<SoftplusBackward0>)
tensor([[19.4684]], grad_fn=<SoftplusBackward0>)
tensor(6.1357, grad_fn=<SoftplusBackward0>)
tensor([[4.2738, 0.2695]], grad_fn=<SoftplusBackward0>)
