This notebook contains notes, things related to the goodness-of-fit test.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import autograd.numpy as np

import matplotlib
import matplotlib.pyplot as plt
import kgof.data as data
import kgof.density as density
import kgof.glo as glo
import kgof.goftest as gof
import kgof.kernel as kernel
import kgof.plot as plot
import kgof.util as util

import scipy.stats as stats

In [None]:
import kgof.plot
kgof.plot.set_default_matplotlib_options()

## Gaussian-Bernoulli RBM Data

Explore how the data from a GB-RBM look like.

In [None]:
def gbrbm_perturb(std_perturb_B, dx=50, dh=10, seed=98):
    """
    Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix
    (the matrix linking the latent and the observation) is perturbed.

    - var_perturb_B: Gaussian noise variance for perturbing B.
    - dx: observed dimension
    - dh: latent dimension

    Return p (density), data source
    """
    with util.NumpySeedContext(seed=seed):
        B = np.random.randint(0, 2, (dx, dh))*2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        p = density.GaussBernRBM(B, b, c)

        B_perturb = B.copy()
        if std_perturb_B > 1e-7:
            B_perturb[0, 0] = B_perturb[0, 0] + \
                np.random.randn(1)*std_perturb_B
        ds = data.DSGaussBernRBM(B_perturb, b, c, burnin=2000)

    return p, ds


In [None]:
dx=50
dh=40
n = 1000
p, ds = gbrbm_perturb(0.0, dx=dx, dh=dh, seed=78)
dat = ds.sample(n, seed=43)

In [None]:
import pandas as pd

sub_vars = 4
X = dat.data()
df = pd.DataFrame(X[:, :sub_vars])
# http://stackoverflow.com/questions/7941207/is-there-a-function-to-make-scatterplot-matrices-in-matplotlib
Axes = pd.tools.plotting.scatter_matrix(df, alpha=0.2, figsize=(10, 10))

#y ticklabels
fontsize = 24
[plt.setp(item.yaxis.get_majorticklabels(), 'size', fontsize) for item in Axes.ravel()]
#x ticklabels
[plt.setp(item.xaxis.get_majorticklabels(), 'size', fontsize) for item in Axes.ravel()]
#y labels
[plt.setp(item.yaxis.get_label(), 'size', fontsize) for item in Axes.ravel()]
#x labels
[plt.setp(item.xaxis.get_label(), 'size', fontsize) for item in Axes.ravel()]
# dimx = 0
# dimy = 4
# plt.plot(X[:, dimx], X[:, dimy], 'k.')
fname = 'rbm_scatter_d{0}_dh{1}_v{2}_n{3}.pdf'.format(dx, dh, sub_vars, n)
plt.savefig(fname, bbox_inches='tight')

## Plot RBM density

In [None]:
for s in range(1, 5):
    seed = s
    p, ds_per = gbrbm_perturb(np.sqrt(0.1), dx=2, dh=10, seed=seed)
#     ds = p.get_datasource()
    ds = ds_per
    dat = ds.sample(1000, seed=seed+3)
    X = dat.data()

    xmin, ymin = np.min(X, 0)
    xmax, ymax = np.max(X, 0)
    nx = 50
    ny = 50
    domx = np.linspace(xmin, xmax, nx)
    domy = np.linspace(ymin, ymax, ny)
    XX, YY = np.meshgrid(domx, domy)
    all_points = np.dstack((XX, YY)).reshape(-1, 2)
    flat_den = p.log_den(all_points)
    ZZ = flat_den.reshape(ny, nx)

    plt.figure(figsize=(8, 5))
    plt.contourf(XX, YY, ZZ, 20)
    plt.colorbar()
    plt.plot(X[:, 0], X[:, 1], 'k.', alpha=0.3)
    plt.title('Log density')

## Test DSGaussCosFreq

In [None]:
for s in range(1, 3):
    sigma2 = 5
    freqs = np.array([2.0, 1])
    p = density.GaussCosFreqs(sigma2, freqs)
    ds = p.get_datasource()

    seed = s
    dat = ds.sample(1000, seed=seed+3)
    X = dat.data()

    xmin, ymin = np.min(X, 0)
    xmax, ymax = np.max(X, 0)
    nx = 50
    ny = 50
    domx = np.linspace(xmin, xmax, nx)
    domy = np.linspace(ymin, ymax, ny)
    XX, YY = np.meshgrid(domx, domy)
    all_points = np.dstack((XX, YY)).reshape(-1, 2)
    flat_den = p.log_den(all_points)
    ZZ = flat_den.reshape(ny, nx)

    plt.figure(figsize=(8, 5))
    plt.contourf(XX, YY, ZZ, 20)
    plt.colorbar()
    plt.plot(X[:, 0], X[:, 1], 'k.', alpha=0.3)
    plt.title('Log density')

In [None]:
p_sigma2 = 10
p_freqs = np.array([1.0, 1.0])
q_sigma2 = 10
q_freqs = np.array([1.0, 1.0])

p = density.GaussCosFreqs(p_sigma2, p_freqs)    
p_ds = p.get_datasource()
p = density.GaussCosFreqs(q_sigma2, q_freqs)    
q_ds = p.get_datasource()

In [None]:
n = 1000
seed = 8
p_dat = p_ds.sample(n, seed=seed)
q_dat = q_ds.sample(n, seed=seed+1)

X = p_dat.data()
Y = q_dat.data()

In [None]:
plt.figure(figsize=(8, 8))
plt.plot(X[:, 0], X[:, 1], 'b.', alpha=0.4)
plt.plot(Y[:, 0], Y[:, 1], 'r.', alpha=0.4)

## Permutation

In [None]:
k = 3
means = np.random.randn(k, 2)
variances = np.random.rand(k)*4
p = density.IsoGaussianMixture(means, variances)

grad = p.grad_log(np.zeros((5,2)))

In [None]:
grad