In [None]:
import torch
import matplotlib.pyplot as plt

import online_gp
import gpytorch

import seaborn as sns
sns.set(font_scale=2.0)

label_fs = 40

sns.set_style('whitegrid')
sns.set_palette("bright")

palette = sns.color_palette("Paired", 10)
palette.reverse()

torch.__version__

In [None]:
palette[0] = "#57068c"
palette[-2] = "#28619e"
palette[-1] = "#3dbbdb"

In [None]:
from online_gp.models import VariationalGPModel

In [None]:
gpytorch.__file__

In [None]:
import pandas as pd

fx_rawdata = pd.read_csv('https://raw.githubusercontent.com/trungngv/cogp/master/data/fx/fx2007-processed.csv',
                        header=None)

inputs = torch.arange(0, fx_rawdata[3].shape[0]).view(-1,1).float()
targets = torch.from_numpy(fx_rawdata[3].values).float() 

inputs, targets = inputs[:60], targets[:60]

tmean = targets.mean()
tstd = targets.std()
targets = (targets - tmean) / tstd

imean = inputs.mean()
istd = inputs.std()
inputs = (inputs - imean) / istd

In [None]:
def fit_model(mll, model, optimizer, x, y, num_steps=1000, verbose=True):
    for i in range(num_steps):
        loss = -mll(model(x), y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if i % (num_steps // 10 if num_steps > 10 else 1) == 0 and verbose:
            print("Loss: ", loss)

In [None]:
def make_basic_plot(model, x, y, old_x=None, old_y=None, bounds=(-6., 6.), col = -2):
    model.eval()
    with torch.no_grad():
        test_x = torch.linspace(*bounds).view(-1,1)
        pred_dist = vargp_model(test_x)
        pred_induc = vargp_model(vargp_model.variational_strategy.inducing_points.data.view(-1,1))
        
    plt.plot(test_x, pred_dist.mean, label = "Predictive Mean", color = palette[col], linewidth=4, zorder=3)
    plt.fill_between(
        test_x.view(-1), *[x.detach() for x in pred_dist.confidence_region()], alpha = 0.3, color = palette[col],
        zorder=6
    )
    
    plt.scatter(x, y, color = "#d71e5e", label = "Current Data", marker = "x", s=100, zorder=20)
    plt.scatter(vargp_model.variational_strategy.inducing_points.data, pred_induc.mean.detach(), 
            color = "#220337", marker="*", label = "Inducing Points", s=150, zorder=15)
    if old_x is not None:
        plt.scatter(
            old_x, old_y, color = "#d71e5e", marker = "x", alpha = 0.3, s=100, label = "Old Data", zorder=15)
        
    # plt.legend()
    plt.xlabel("x", fontsize = 16)
    plt.ylabel("y", fontsize = 16)

## Fit in Online Mode


In [None]:
from torch import Tensor
from gpytorch.lazy import LazyTensor
from typing import Union

def _pivoted_cholesky_init(
    train_inputs: Tensor,
    kernel_matrix: Union[Tensor, LazyTensor],
    max_length: int,
    epsilon: float = 1e-10,
) -> Tensor:
    r"""
    A pivoted cholesky initialization method for the inducing points, originally proposed in
    [burt2020svgp] with the algorithm itself coming from [chen2018dpp]. Code is a PyTorch version from
    [chen2018dpp], copied from https://github.com/laming-chen/fast-map-dpp/blob/master/dpp.py.
    Args:
        train_inputs [Tensor]: training inputs
        kernel_matrix [Tensor or Lazy Tensor]: kernel matrix on the training inputs
        max_length [int]: number of inducing points to initialize
        epsilon [float]: numerical jitter for stability.
    """
    # this is numerically equivalent to iteratively performing a pivoted cholesky
    # while storing the diagonal pivots at each iteration
    # TODO: use gpytorch's pivoted cholesky instead once that gets an exposed list
    # TODO: this probably won't work in batch mode.
    item_size = kernel_matrix.shape[-2]
    cis = torch.zeros((max_length, item_size))
    di2s = kernel_matrix.diag()
    selected_items = []
    selected_item = torch.argmax(di2s)
    selected_items.append(selected_item)
    while len(selected_items) < max_length:
        k = len(selected_items) - 1
        ci_optimal = cis[:k, selected_item]
        di_optimal = torch.sqrt(di2s[selected_item])
        elements = kernel_matrix[..., selected_item, :]
        eis = (elements - torch.matmul(ci_optimal, cis[:k, :])) / di_optimal
        cis[k, :] = eis
        di2s = di2s - eis.pow(2.0)
        di2s[selected_item] = -(torch.tensor(float("inf")))
        selected_item = torch.argmax(di2s)
        if di2s[selected_item] < epsilon:
            break
        selected_items.append(selected_item)
    ind_points = train_inputs[torch.stack(selected_items)]
    return ind_points

In [None]:
cm = gpytorch.kernels.SpectralMixtureKernel(num_mixtures=3)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
vargp_model = VariationalGPModel(
    _pivoted_cholesky_init(inputs[:20], cm(inputs[:20]), 15), 
    streaming=False, 
    likelihood = likelihood,
    covar_module = cm,
    learn_inducing_locations=False,
)
mll = gpytorch.mlls.VariationalELBO(likelihood=likelihood, model=vargp_model, num_data=20, beta = 1.0)

optimizer = torch.optim.Adam(list(vargp_model.parameters()), lr = 1e-2)

fit_model(mll, vargp_model, optimizer, inputs[:20], targets[:20])


In [None]:
plt.figure(figsize = (6, 3))
make_basic_plot(vargp_model, inputs[:20], targets[:20], bounds=(-3., 3))
# plt.savefig("./plots/osvgp_pivchol_full_t20.pdf", bbox_inches="tight")

In [None]:
%pdb

In [None]:
step = 1

for i in range(20, 60, step):
    print("Starting step: ", i)
    next_x = inputs[i:i+step]
    next_y = targets[i:i+step]
    
    with gpytorch.settings.cholesky_jitter(1e-3):
        vargp_model.update_variational_parameters(
            next_x, 
            next_y, 
            _pivoted_cholesky_init(inputs[:(i+step)], vargp_model.covar_module(inputs[:(i+step)]).add_jitter(1e-4), 15)
        )
    
    vargp_model.zero_grad()
    vargp_model.train()

    mll = gpytorch.mlls.VariationalELBO(
        likelihood=likelihood, 
        model=vargp_model, 
        num_data=step, 
        beta = 1.0,
        combine_terms=True
    )
    
    fit_model(mll, vargp_model, optimizer, next_x, next_y, num_steps=10, verbose=False)
    
    if i % 20 == 0 or i == 59:
        plt.figure(figsize = (6,3))
        make_basic_plot(
            vargp_model, 
            next_x, 
            next_y, 
            old_x=inputs[:i], 
            old_y=targets[:i], 
            bounds=(-3., 3.)
        )
        plt.savefig("./plots/osvgp_pivchol_full_t"+str(i)+".pdf", bbox_inches="tight")
        plt.show()
        


## Fit in Online Mode (Coreset-Like)


In [None]:

cm = gpytorch.kernels.SpectralMixtureKernel(num_mixtures=3)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
vargp_model = online_gp.models.VariationalGPModel(
    _pivoted_cholesky_init(inputs[:20], cm(inputs[:20]), 15), 
    streaming=False, 
    likelihood = likelihood,
    covar_module = cm,
    learn_inducing_locations=False,
)
mll = gpytorch.mlls.VariationalELBO(likelihood=likelihood, model=vargp_model, num_data=20, beta = 1.0)

optimizer = torch.optim.Adam(list(vargp_model.parameters()), lr = 1e-2)

fit_model(mll, vargp_model, optimizer, inputs[:20], targets[:20])



In [None]:
plt.figure(figsize=(6,3))
make_basic_plot(vargp_model, inputs[:20], targets[:20], bounds=(-3., 3), col = 0)
plt.savefig("./plots/osvgp_pivchol_partial_t20.pdf", bbox_inches="tight")

In [None]:
%pdb

In [None]:
step = 1

for i in range(20, 60, step):
    print("Starting step: ", i)
    next_x = inputs[i:i+step]
    next_y = targets[i:i+step]
    
    with gpytorch.settings.cholesky_jitter(1e-3):
        stacked_data = torch.cat((next_x, vargp_model.variational_strategy.inducing_points.detach()))
        
        vargp_model.update_variational_parameters(
            next_x, 
            next_y, 
            _pivoted_cholesky_init(stacked_data, cm(stacked_data).add_jitter(1e-4), 15)
        )
    
    vargp_model.zero_grad()
    vargp_model.train()

    mll = gpytorch.mlls.VariationalELBO(
        likelihood=likelihood, 
        model=vargp_model, 
        num_data=step, 
        beta = 1.0,
        combine_terms=True
    )
    
    fit_model(mll, vargp_model, optimizer, next_x, next_y, num_steps=10, verbose=False)
    
    if i % 20 == 0 or i == 59:
        plt.figure(figsize=(6,3))
        make_basic_plot(
            vargp_model, 
            next_x, 
            next_y, 
            old_x=inputs[:i], 
            old_y=targets[:i], 
            bounds=(-3., 3.),
            col=0
        )
        plt.savefig("./plots/osvgp_pivchol_partial_t"+str(i)+".pdf", bbox_inches="tight")
        plt.show()


## Random movement of inducing pts?

In [None]:
cm = gpytorch.kernels.SpectralMixtureKernel(num_mixtures=3)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
vargp_model = online_gp.models.VariationalGPModel(
    _pivoted_cholesky_init(inputs[:20], cm(inputs[:20]), 15), 
    streaming=False, 
    likelihood = likelihood,
    covar_module = cm,
    learn_inducing_locations=False,
)
mll = gpytorch.mlls.VariationalELBO(likelihood=likelihood, model=vargp_model, num_data=20, beta = 1.0)

optimizer = torch.optim.Adam(list(vargp_model.parameters()), lr = 1e-2)

fit_model(mll, vargp_model, optimizer, inputs[:20], targets[:20])

In [None]:
plt.figure(figsize=(6,3))
make_basic_plot(vargp_model, inputs[:20], targets[:20], bounds=(-3., 3), col = -1)
plt.savefig("./plots/osvgp_none_t20.pdf", bbox_inches="tight")

In [None]:
# optimizer = torch.optim.SGD(list(vargp_model.parameters()), lr = 1e-4)


In [None]:

step = 1

for i in range(20, 60, step):
    print("Starting step: ", i)
    next_x = inputs[i:i+step]
    next_y = targets[i:i+step]
    
    rm = max(1, torch.randint(15, torch.Size()).item())
    ind_pts = vargp_model.variational_strategy.inducing_points.clone().detach()
    
    new_inducing = torch.cat((
        ind_pts[:rm-1],
        ind_pts[rm:],
        next_x.view(-1,1),
        ))
    # print(ind_pts.shape, new_inducing.shape, next_x.shape, rm)
    with gpytorch.settings.cholesky_jitter(1e-3):       
        vargp_model.update_variational_parameters(
            next_x, 
            next_y, 
            new_inducing,
        )
    
    vargp_model.zero_grad()
    vargp_model.train()

    mll = gpytorch.mlls.VariationalELBO(
        likelihood=likelihood, 
        model=vargp_model, 
        num_data=step, 
        beta = 1.,
        combine_terms=True
    )
    
    fit_model(mll, vargp_model, optimizer, next_x, next_y, num_steps=10, verbose=False)
    
    if i % 20 == 0 or i == 59:
        plt.figure(figsize=(6,3))
        make_basic_plot(
            vargp_model, 
            next_x, 
            next_y, 
            old_x=inputs[:i], 
            old_y=targets[:i], 
            bounds=(-3., 3.),
            col=-1
        )
        plt.savefig("./plots/osvgp_pivchol_none_t"+str(i)+".pdf", bbox_inches="tight")
        plt.show()

In [None]:
make_basic_plot(
    vargp_model, 
    next_x, 
    next_y, 
    old_x=inputs[:i], 
    old_y=targets[:i], 
    bounds=(-3., 3.),
    col=-1
)
plt.legend(ncol = 4, loc = "upper center", bbox_to_anchor = (0.5, -0.2))
# plt.savefig("plots/legend.pdf", bbox_inches="tight")

In [None]:
plt.plot(torch.rand(1), torch.rand(1), label = "Resampled O-SGPR", color = palette[-1], linewidth=4)
plt.plot(torch.rand(1), torch.rand(1), label = "Piv. Chol. O-SGPR", color = palette[0], linewidth=4)
plt.scatter(torch.rand(1), torch.rand(1), color = "#d71e5e", 
            label = "Current Data", marker = "x", s=100, zorder=20)
plt.scatter(torch.rand(1), torch.rand(1), color = "#d71e5e", 
            label = "Old Data", marker = "x", s=100, zorder=20, alpha = 0.3)
plt.scatter(torch.rand(1), torch.rand(1), color = "#220337", marker="*",
            label = "Inducing Points", s=150, zorder=20)
plt.legend(ncol = 5, loc = "upper center", bbox_to_anchor = (0.5, -0.2))
plt.savefig("sgpr_legend.pdf", bbox_inches = "tight")