In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
import pandas as pd
import urllib.request
import math
import gpytorch
from matplotlib import pyplot as plt
import random
import numpy as np
import urllib.request
import os
from math import floor
import black_box
import tqdm

In [2]:
data_dim = 102

class FCN(torch.nn.Sequential):
    def __init__(self):
        super(FCN, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 50))
        self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(50, 2))

feature_extractor = FCN()

class GPRegressionModel(gpytorch.models.ExactGP):
        def __init__(self, train_x, train_y, likelihood):
            super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
            self.mean_module = gpytorch.means.ConstantMean()
            self.covar_module = gpytorch.kernels.GridInterpolationKernel(
                gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=2)),
                num_dims=2, grid_size=100
            )
            self.feature_extractor = feature_extractor

            # This module will scale the NN features so that they're nice values
            self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

        def forward(self, x):
            # We're first putting our data through a deep net (feature extractor)
            projected_x = self.feature_extractor(x)
            projected_x = self.scale_to_bounds(projected_x)  # Make the NN values "nice"

            mean_x = self.mean_module(projected_x)
            covar_x = self.covar_module(projected_x)
            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


def train(train_x, train_y, num_device=0):
    device = torch.device(f'cuda:{num_device}' if torch.cuda.is_available() else 'cpu')
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(train_x, train_y, likelihood)

    model.to(device)
    likelihood.to(device)

    training_iterations = 60

    optimizer = torch.optim.Adam([
    {'params': model.feature_extractor.parameters()},
    {'params': model.covar_module.parameters()},
    {'params': model.mean_module.parameters()},
    {'params': model.likelihood.parameters()},], lr=0.01)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    demo_parity = 0
    iterator = range(training_iterations)
    
    model.train()
    likelihood.train()
    
    best_loss = 10e5
    penalty_count = 0
    for i in tqdm.tqdm(range(training_iterations)):
#         print(i)
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(train_x.to(device))

        # Calc loss and backprop derivatives
        loss = -mll(output, train_y.to(device))
        
        if loss >= best_loss:
            penalty_count += 1
        else:
            penalty_count = 0
            best_loss = loss
        
        if penalty_count == 5:
            break
        
        loss.backward()
        optimizer.step()


        #     model.eval()
        #     likelihood.eval()

    return model, likelihood
