In [1]:
import math
import tqdm
import torch
import gpytorch
from matplotlib import pyplot as plt

In [2]:
import urllib.request
import os
from scipy.io import loadmat
from math import floor


# this is for running the notebook in our testing framework
smoke_test = ('CI' in os.environ)


# NOTE, CAN NO LONGER DOWNLOAD THIS DATA DIRECTLY FROM GDRIVE LINK
if not smoke_test and not os.path.isfile('./data/elevators.mat'):
    print('Downloading \'elevators\' UCI dataset...')
    urllib.request.urlretrieve(
        'https://drive.google.com/uc?export=download&id=1jhWL3YUHvXIaftia4qeAyDwVxo6j1alk',
        './data/elevators.mat'
    )


if smoke_test:  # this is for running the notebook in our testing framework
    X, y = torch.randn(20, 3), torch.randn(20)
else:
    # using just the first 1200 points
    num_data = 1200
    data = torch.Tensor(loadmat('./data/elevators.mat')['data'])[1000:num_data+1000]
    X = data[:, :-1]
    X = X - X.min(0)[0]
    X = 2 * (X / X.max(0)[0]) - 1
    X = X.nan_to_num(0.)
    y = data[:, -1]


train_n = int(floor(0.8 * len(X)))
train_x = X[:train_n, :].contiguous()
train_y = y[:train_n].contiguous()

test_x = X[train_n:, :].contiguous()
test_y = y[train_n:].contiguous()

if torch.cuda.is_available():
    train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()

In [3]:
data_dim = train_x.size(-1)

class LargeFeatureExtractor(torch.nn.Sequential):
    def __init__(self):
        super(LargeFeatureExtractor, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 128))
        # self.add_module('relu1', torch.nn.ReLU())
        # self.add_module('linear2', torch.nn.Linear(1000, 500))
        # self.add_module('relu2', torch.nn.ReLU())
        # self.add_module('linear3', torch.nn.Linear(500, 50))
        # self.add_module('relu3', torch.nn.ReLU())
        # self.add_module('linear4', torch.nn.Linear(50, 3))
        self.add_module('tanh1', torch.nn.Tanh())

feature_extractor = LargeFeatureExtractor()

In [4]:
class GPRTest(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRTest, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(ard_num_dims=128)
        )

    def forward(self, projected_x):
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [5]:
class ModularModel(gpytorch.models.GP):
    def __init__(self, representation, regressor, likelihood, train_y):
        """
            Args:
                - gp (gpytorch.models.ExactGP): A GP that expects to operate on features extracted by a GCN.
                - gcn (torch.nn.Module): Some PyTorch module that extracts graph features.
                - train_x (any input to gcn): The training data as expected by the GCN
                - train_y (torch.Tensor): Training labels
        """
        super(ModularModel, self).__init__()
        self.representation = representation
        self.regressor = regressor
        self.likelihood = likelihood

        self.train_targets = train_y
    
    def forward(self, train_x):
        train_h = self.representation(train_x)
        if self.training:
            self.regressor.set_train_data(train_h, self.train_targets, strict=False)
        return self.regressor(train_h)

In [6]:
class IntegratedModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(IntegratedModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = (
            # gpytorch.kernels.GridInterpolationKernel(
            gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
            # num_dims=128, grid_size=100
        )
        self.feature_extractor = feature_extractor

        # This module will scale the NN features so that they're nice values
        # self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

    def forward(self, x):
        # We're first putting our data through a deep net (feature extractor)
        projected_x = self.feature_extractor(x)
        # projected_x = self.scale_to_bounds(projected_x)  # Make the NN values "nice"

        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [7]:
def make_model_and_likelihood(architecture='modular'):
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    
    if architecture == 'modular':
        gprt = GPRTest(train_x, train_y, likelihood)
        # the requirement to give train_y is unnecessary with a smarter training loop
        model = ModularModel(feature_extractor, gprt, likelihood, train_y)
    else:
        model = IntegratedModel(train_x, train_y, likelihood)

    if torch.cuda.is_available():
        model = model.cuda()
        likelihood = likelihood.cuda()
    
    return model, likelihood

In [8]:
def train_model(model, likelihood, train_x, train_y, smoke_test=False):
    def _train():
        # iterator = tqdm.notebook.tqdm(range(training_iterations))
        for i in range(training_iterations):
            # Zero backprop gradients
            optimizer.zero_grad()
            # Get output from model
            output = model(train_x)
            # Calc loss and backprop derivatives
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()
        return model

    smoke_test = False
    training_iterations = 2 if smoke_test else 60

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam([
        {'params': model.parameters()},
    ], lr=0.01)

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    # train
    model = _train()
    
    return model

def test(model):
    from scipy.stats import pearsonr
    model.eval()
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(test_x)
    mae = torch.mean(torch.abs(preds.mean - test_y)).item()
    corr, _ = pearsonr(preds.mean.tolist(), test_y.tolist())
    return mae, corr

results = []
for architecture in ['modular', 'integrated']:
    for i in tqdm.tqdm(range(4)):
        model, likelihood = make_model_and_likelihood(architecture=architecture)
        model = train_model(model, likelihood, train_x, train_y, smoke_test=False)
        mae, corr = test(model)
        results.append({'architecture': architecture, 'mae': mae, 'corr': corr})

100%|█████████████████████████████████████████████| 4/4 [00:23<00:00,  5.79s/it]
100%|█████████████████████████████████████████████| 4/4 [00:18<00:00,  4.54s/it]


In [9]:
import pandas as pd
pd.DataFrame(results).groupby('architecture').agg(('mean', 'std'))

Unnamed: 0_level_0,mae,mae,corr,corr
Unnamed: 0_level_1,mean,std,mean,std
architecture,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
integrated,0.084036,0.001815,0.90835,0.004538
modular,0.084179,0.002189,0.911018,0.005147
