This notebook contains notes, things related to the goodness-of-fit test.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import autograd.numpy as np

import matplotlib
import matplotlib.pyplot as plt
import kgof.data as data
import kgof.density as density
import kgof.glo as glo
import kgof.goftest as gof
import kgof.kernel as kernel
import kgof.plot as plot
import kgof.util as util

import scipy.stats as stats

In [None]:
import kgof.plot
kgof.plot.set_default_matplotlib_options()

## Gaussian-Bernoulli RBM Data

Explore how the data from a GB-RBM look like.

In [None]:
def gbrbm_perturb(var_perturb_B, dx=50, dh=10):
    """
    Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix
    (the matrix linking the latent and the observation) is perturbed.

    - var_perturb_B: Gaussian noise variance for perturbing B.
    - dx: observed dimension
    - dh: latent dimension

    Return p (density), data source
    """
    with util.NumpySeedContext(seed=10):
        B = np.random.randint(0, 2, (dx, dh))*2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        p = density.GaussBernRBM(B, b, c)

        B_perturb = np.copy(B)
        B_perturb[0, 0] = B_perturb[0, 0] + \
            np.random.randn(1)*np.sqrt(var_perturb_B)
        ds = data.DSGaussBernRBM(B_perturb, b, c, burnin=500)

    return p, ds

In [None]:
p, ds = gbrbm_perturb(0.0, dx=20, dh=10)
dat = ds.sample(600, seed=31)

In [None]:
import pandas as pd

X = dat.data()
df = pd.DataFrame(X[:, :5])
# http://stackoverflow.com/questions/7941207/is-there-a-function-to-make-scatterplot-matrices-in-matplotlib
axes = pd.tools.plotting.scatter_matrix(df, alpha=0.2, figsize=(10, 10))
# dimx = 0
# dimy = 4
# plt.plot(X[:, dimx], X[:, dimy], 'k.')

In [None]:
util.meddistance(X, subsample=1000)