In [9]:
# Copyright (c) 2015-2016, the authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
%matplotlib notebook

import unittest
import deepgp
import GPy
import os
import h5py
import tables

import edward as ed
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from edward.models import Bernoulli, MultivariateNormalTriL, Normal
from edward.util import rbf
from observations import crabs
from sklearn.decomposition import PCA
import scipy

base_path = os.path.dirname("__file__")

In [16]:
def const(x):
    x = tf.constant(x,dtype=tf.float32)
    return x

plt.subplots(4,8,figsize=(10,4))

def build_toy_dataset(N, D, K, Ngrid = 10, prior_l=0.01, prior_sig_f=5, l=1, sig_f=2, sig_n=1):
    Nall = Ngrid**2 + N
    tgrid = np.linspace(-5, 5, num=N).reshape(-1,1)

    sess = ed.get_session()
    with sess.as_default():
        Kprior = rbf(const(tgrid),lengthscale=const(prior_l),variance=const(prior_sig_f)).eval()

    z_mu = np.zeros(N)
    z_cov = 1*np.eye(N)# + Kprior
    z = np.random.multivariate_normal(z_mu, z_cov,K).T
   
    zgridx,zgridy = np.meshgrid(np.linspace(z[:,0].min(), z[:,0].max(), num=Ngrid), 
                                np.linspace(z[:,1].min(), z[:,1].max(), num=Ngrid))
    zgrid = np.vstack((zgridx.reshape(1,-1),zgridy.reshape(1,-1))).T
    zall = np.vstack((z,zgrid))

    plt.subplot(2,4,1)
    plt.imshow(Kprior,aspect='auto')
    plt.colorbar()
    plt.subplot(2,4,2)
    plt.plot(z)

    ctrsx,ctrsy = np.meshgrid(np.linspace(-2, 2, num=np.ceil(D**(1/K))), 
                              np.linspace(-2, 2, num=np.ceil(D**(1/K))))
    ctrs = np.vstack((ctrsx.reshape(1,-1),ctrsy.reshape(1,-1))).T
    ctrs = ctrs[:D,:]
    
    with sess.as_default():
        xall = rbf(const(zall),const(ctrs),lengthscale=const(l),variance=const(sig_f)).eval()

    x = xall[:N,:]
    max_x = np.max(abs(x))
    print('before adding noise: ', max_x)
    xgrid = xall[N:,:]
    noise = np.random.randn(x.shape[0],x.shape[1])
    x = x + 0.5*max_x*noise
    print('after adding noise: ', np.max(abs(x)))

    extent = z[:,0].min(), z[:,0].max(), z[:,1].min(), z[:,1].max()
    plt.subplot(2,4,4)
    plt.imshow(np.cov(x),aspect='auto')
    plt.colorbar()
    
    ii = np.random.randint(0,D,16)

    for i in range(16):
        plt.subplot(4,8,i+17)
        plt.imshow(xgrid[:,ii[i]].reshape(Ngrid,Ngrid),extent=extent,aspect='auto')
        plt.axis('off')

    return x,xgrid,z,zgrid,tgrid,Kprior

N = 500  # number of data points, number of time points
D = 100  # data dimensionality, number of neurons
K = 2  # latent dimensionality

x_true, xgrid, z_true, zgrid, tgrid, Kprior_true = build_toy_dataset(N, D, K)
print(x_true.shape,xgrid.shape)
print(z_true.shape,zgrid.shape)

print("Number of data points N={}".format(N))
print("Number of features D={}".format(D))
print("Number of latent dimensions K={}".format(K))

pca = PCA(n_components=K)
z_init = pca.fit_transform(x_true)


def align_z(x,z):
    wgt = np.linalg.lstsq(x-x.mean(), z-z.mean())[0]   
    xx = np.dot(x-x.mean(),wgt)
    return xx

# visualize the covariance of Z
plt.subplot(2,4,3)
plt.plot(z_true[:10,:])
plt.plot(align_z(z_init,z_true)[:10,:])

scipy.io.savemat('syn2d.mat',mdict={'x_true':x_true, 'xgrid':xgrid, 'z_true':z_true, 'zgrid':zgrid, 
                                  'tgrid':tgrid, 'Kprior_true':Kprior_true})

<IPython.core.display.Javascript object>

before adding noise:  1.99999
after adding noise:  5.37694989579
(500, 100) (100, 100)
(500, 2) (100, 2)
Number of data points N=500
Number of features D=100
Number of latent dimensions K=2
