goal
- figure out convolutional gp impl in gpflow and convgp
    - https://github.com/GPflow/GPflow/blob/develop/gpflow/kernels/convolutional.py
    - https://github.com/GPflow/GPflow/blob/1e1de824397c828a47d9eca002251041296c91d4/gpflow/covariances/kufs.py
        - Kuu,Kuf computes `K(Zpatch)` and `K(Zpatch, patches(X))`!
        - Kuf: do averaging of patch-wise response afterwards

In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt

import gpflow
import tensorflow as tf
import tensorflow_probability as tfp

from gpflow import set_trainable
from gpflow.ci_utils import is_continuous_integration

gpflow.config.set_default_float(np.float64)
gpflow.config.set_default_jitter(1e-4)
gpflow.config.set_default_summary_fmt("notebook")

# for reproducibility of this notebook:
np.random.seed(123)
tf.random.set_seed(42)

MAXITER = 2 if is_continuous_integration() else 100
NUM_TRAIN_DATA = (
    5 if is_continuous_integration() else 100
)  # This is less than in the original rectangles dataset
NUM_TEST_DATA = 7 if is_continuous_integration() else 300
H = W = 14  # width and height. In the original paper this is 28
IMAGE_SHAPE = [H, W]

In [None]:
def make_rectangle(arr, x0, y0, x1, y1):
    arr[y0:y1, x0] = 1
    arr[y0:y1, x1] = 1
    arr[y0, x0:x1] = 1
    arr[y1, x0 : x1 + 1] = 1


def make_random_rectangle(arr):
    x0 = np.random.randint(1, arr.shape[1] - 3)
    y0 = np.random.randint(1, arr.shape[0] - 3)
    x1 = np.random.randint(x0 + 2, arr.shape[1] - 1)
    y1 = np.random.randint(y0 + 2, arr.shape[0] - 1)
    make_rectangle(arr, x0, y0, x1, y1)
    return x0, y0, x1, y1


def make_rectangles_dataset(num, w, h):
    d, Y = np.zeros((num, h, w)), np.zeros((num, 1))
    for i, img in enumerate(d):
        for j in range(1000):  # Finite number of tries
            x0, y0, x1, y1 = make_random_rectangle(img)
            rw, rh = y1 - y0, x1 - x0
            if rw == rh:
                img[:, :] = 0
                continue
            Y[i, 0] = rw > rh
            break
    return (
        d.reshape(num, w * h).astype(gpflow.config.default_float()),
        Y.astype(gpflow.config.default_float()),
    )

In [None]:
X, Y = data = make_rectangles_dataset(NUM_TRAIN_DATA, *IMAGE_SHAPE)
Xt, Yt = test_data = make_rectangles_dataset(NUM_TEST_DATA, *IMAGE_SHAPE)

plt.figure(figsize=(8, 3))
for i in range(4):
    plt.subplot(1, 4, i + 1)
    plt.imshow(X[i, :].reshape(*IMAGE_SHAPE))
    plt.title(Y[i, 0])
    

In [None]:
rbf_m = gpflow.models.SVGP(
    gpflow.kernels.SquaredExponential(),
    gpflow.likelihoods.Bernoulli(),
    gpflow.inducing_variables.InducingPoints(X.copy()),
)
rbf_training_loss_closure = rbf_m.training_loss_closure(data, compile=True)
rbf_elbo = lambda: -rbf_training_loss_closure().numpy()
print("RBF elbo before training: %.4e" % rbf_elbo())

set_trainable(rbf_m.inducing_variable, False)
start_time = time.time()
res = gpflow.optimizers.Scipy().minimize(
    rbf_training_loss_closure,
    variables=rbf_m.trainable_variables,
    method="l-bfgs-b",
    options={"disp": True, "maxiter": MAXITER},
)
print(f"{res.nfev / (time.time() - start_time):.3f} iter/s")

train_acc = np.mean((rbf_m.predict_y(X)[0] > 0.5).numpy().astype("float") == Y)
test_acc = np.mean((rbf_m.predict_y(Xt)[0] > 0.5).numpy().astype("float") == Yt)
print(f"Train acc: {train_acc * 100}%\nTest acc : {test_acc*100}%")
print("RBF elbo after training: %.4e" % rbf_elbo())

In [None]:
f64 = lambda x: np.array(x, dtype=np.float64)
positive_with_min = lambda: tfp.bijectors.AffineScalar(shift=f64(1e-4))(tfp.bijectors.Softplus())
constrained = lambda: tfp.bijectors.AffineScalar(shift=f64(1e-4), scale=f64(100.0))(
    tfp.bijectors.Sigmoid()
)
max_abs_1 = lambda: tfp.bijectors.AffineScalar(shift=f64(-2.0), scale=f64(4.0))(
    tfp.bijectors.Sigmoid()
)

colour_channels = 1
patch_shape = [3, 3]
conv_k = gpflow.kernels.Convolutional(gpflow.kernels.SquaredExponential(), IMAGE_SHAPE, patch_shape)
conv_k.base_kernel.lengthscales = gpflow.Parameter(1.0, transform=positive_with_min())
# Weight scale and variance are non-identifiable. We also need to prevent variance from shooting off crazily.
conv_k.base_kernel.variance = gpflow.Parameter(1.0, transform=constrained())
conv_k.weights = gpflow.Parameter(conv_k.weights.numpy(), transform=max_abs_1())
conv_f = gpflow.inducing_variables.InducingPatches(
    np.unique(conv_k.get_patches(X).numpy().reshape(-1, 9), axis=0)
)


In [None]:
print(conv_k.image_shape)
print(conv_k.patch_shape)
print(conv_k.base_kernel)
print(conv_k.colour_channels)
print(conv_k.patch_len, conv_k.num_patches)


In [None]:
# conv_k.get_patches(X).shape = TensorShape([100, 144, 9])
image_shape = IMAGE_SHAPE
patch_shape = patch_shape
num_data = tf.shape(X)[0]
print(X.shape)
# (N, H*W)
castX = tf.transpose(tf.reshape(X, [num_data, -1, colour_channels]), [0, 2, 1])
print(castX.shape)
# (N, 1, H*W)
patches = tf.image.extract_patches(
            # (N, H, W, 1)
            tf.reshape(castX, [-1, image_shape[0], image_shape[1], 1], name="rX"),
            [1, patch_shape[0], patch_shape[1], 1],
            [1, 1, 1, 1],
            [1, 1, 1, 1],
            "VALID")
print(patches.shape)
# (N, 12, 12, 9)
shp = tf.shape(patches)  # img x out_rows x out_cols
reshaped_patches = tf.reshape(
    patches, [num_data, 1 * shp[1] * shp[2], shp[3]]
)
print(reshaped_patches.shape)
# (N, n_patches_per_im, pH*pW) 
# (100, 144, 9)

patch_uniq = np.unique(conv_k.get_patches(X).numpy().reshape(-1, 9), axis=0)
print(patch_uniq.shape)
# (45, 9)


fig, axs = plt.subplots(12,12,figsize=(10,10))
for i in range(12):
    for j in range(12):
        ax = axs[i,j]
        ax.imshow(patches[0,i,j,:].numpy().reshape((3,3)))
        ax.set_xticks([]); ax.set_yticks([])

In [None]:
# conv_k.K
# def K(self, X, X2=None):
#     Xp = self.get_patches(X)  # [N, P, patch_len]
#     Xp2 = Xp if X2 is None else self.get_patches(X2)
#     bigK = self.base_kernel.K(Xp, Xp2)  # [N, num_patches, N, num_patches]
#     W2 = self.weights[:, None] * self.weights[None, :]  # [P, P]
#     W2bigK = bigK * W2[None, :, None, :]
#     return tf.reduce_sum(W2bigK, [1, 3]) / self.num_patches ** 2.0
self = conv_k
X2 = X[:50]

# len(X)=N, len(X2)=M
Xp = self.get_patches(X)
Xp2 = Xp if X2 is None else self.get_patches(X2)
print('Xp', Xp.shape, Xp2.shape) # (N, num_patches, patch_len)
bigK = self.base_kernel.K(Xp, Xp2)
print('bigK', bigK.shape)   # [N, num_patches, M, num_patches]
W2 = self.weights[:, None] * self.weights[None, :]  # [num_patches, num_patches]
print('w^2', W2.shape) # (144, 144)
W2bigK = bigK * W2[None, :, None, :] # [N, num_patches, M, num_patches]
print('w^2*K', W2bigK.shape) # w^2*K (100, 144, 100, 144)
print('K', W2bigK.shape, tf.math.reduce_min(W2bigK).numpy(), tf.math.reduce_max(W2bigK).numpy())
K = tf.reduce_sum(W2bigK, [1, 3]) / self.num_patches ** 2.0
print('K', K.shape, tf.math.reduce_min(K).numpy(), tf.math.reduce_max(K).numpy())

In [None]:
bigKzx = self.base_kernel.K(conv_f.Z, Xp)
K2 = tf.reduce_sum(bigKzx, [2])/self.num_patches
K2

In [None]:
conv_m = gpflow.models.SVGP(conv_k, gpflow.likelihoods.Bernoulli(), conv_f)
set_trainable(conv_m.inducing_variable, False)
set_trainable(conv_m.kernel.base_kernel.variance, False)
set_trainable(conv_m.kernel.weights, False)
conv_training_loss_closure = conv_m.training_loss_closure(data, compile=True)
conv_elbo = lambda: -conv_training_loss_closure().numpy()
print("conv elbo before training: %.4e" % conv_elbo())
start_time = time.time()
res = gpflow.optimizers.Scipy().minimize(
    conv_training_loss_closure,
    variables=conv_m.trainable_variables,
    method="l-bfgs-b",
    options={"disp": True, "maxiter": MAXITER / 10},
)
print(f"{res.nfev / (time.time() - start_time):.3f} iter/s")

In [None]:
set_trainable(conv_m.kernel.base_kernel.variance, True)
res = gpflow.optimizers.Scipy().minimize(
    conv_training_loss_closure,
    variables=conv_m.trainable_variables,
    method="l-bfgs-b",
    options={"disp": True, "maxiter": MAXITER},
)
train_acc = np.mean((conv_m.predict_y(X)[0] > 0.5).numpy().astype("float") == Y)
test_acc = np.mean((conv_m.predict_y(Xt)[0] > 0.5).numpy().astype("float") == Yt)
print(f"Train acc: {train_acc * 100}%\nTest acc : {test_acc*100}%")
print("conv elbo after training: %.4e" % conv_elbo())

In [None]:
gpflow.utilities.print_summary(conv_m)
