Plot the Stein witness function, and the mean/std objective as a function of the test locations.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import kgof
import kgof.data as data
import kgof.density as density
import kgof.goftest as gof
import kgof.kernel as kernel
import kgof.util as util
import matplotlib
import matplotlib.pyplot as plt
import autograd.numpy as np
import scipy.stats as stats

In [None]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 18
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

## Stein witness function

In [None]:
def generic_contourf(p, dat, k, func, title=None):
    """
    func: (p, dat, k, V) |-> value. A function computing the values to plot.
    """
    # should be an n x 2 matrix. 2d data.
    X = dat.data()
    max0, max1 = np.max(X, 0)
    min0, min1 = np.min(X, 0)
    
    #sd1, sd2 = np.std(XY, 0)
    sd0, sd1 = ((max0-min0)*0.4, (max1-min1)*0.4)
    # form a test location grid to try 
    nd0 = 50
    nd1 = 50
    loc0_cands = np.linspace(min0-sd0/2, max0+sd0/2, nd0)
    loc1_cands = np.linspace(min1-sd1/2, max1+sd1/2, nd1)
    lloc0, lloc1 = np.meshgrid(loc0_cands, loc1_cands)
    # nd1 x nd0 x 2
    loc3d = np.dstack((lloc0, lloc1))
    # #candidates x 2
    all_loc2s = np.reshape(loc3d, (-1, 2) )

    # all_locs = #candidates x J x 2
    #all_locs = np.array( [np.vstack((c, loc1)) for c in all_loc2s] )
    
    # evaluate the function on each candidate T on the grid. Size = (#candidates, )
    stat_grid = np.array([func(p, dat, k, np.array([T])) for T in all_loc2s])
    stat_grid = np.reshape(stat_grid, (nd1, nd0) )

    den_grid = np.exp(p.log_normalized_den(all_loc2s))
    den_grid = np.reshape(den_grid, (nd1, nd0))
    #ax = fig.gca(projection='3d')
    #ax.plot_surface(lloc1, lloc2, stat_grid, rstride=8, cstride=8, alpha=0.3)
    #cset = ax.contourf(lloc1, lloc2, stat_grid, zdir='z', offset=0, cmap=cm.coolwarm)
    plt.figure(figsize=(10, 6))
    # Plot the unnormalized density
    CS = plt.contour(
        lloc0, lloc1, den_grid, alpha=0.6, 
        #colors=('#500000', '#900000', '#d00000'),
        #colors=plt.cm.Blues(3),
    )
    #plt.clabel(CS, fontsize=12, inline=1, fmt='%1.1f', colors='k')
    plt.contourf(lloc0, lloc1, stat_grid, cmap=plt.cm.Greys, alpha=0.7)
    
    #plt.gca().get_xaxis().set_visible(False)
    #plt.gca().get_yaxis().set_visible(False)
    #plt.axis('off')
    #plt.colorbar()

    max_stat = np.max(stat_grid)
    #plt.xlabel('')
    #plt.ylabel('')
    n = X.shape[0]
    if title is not None:
        pass
        #plt.title(title)
    #ax.view_init(elev=max_stat*2, azim=90)

    # plot the data
    plt.plot(X[:, 0], X[:, 1], '.m', markeredgecolor='m', markersize=4, alpha=0.8)
    #plt.xlabel('$X$')
    #plt.ylabelel('$Y$')
    
    # return the locations V
    
    max_ind = np.argmax(stat_grid.reshape(-1))
    V = all_loc2s[max_ind]
    print 'V: %s'%V
    
    # put a star at the highest location
    plt.plot(V[0], V[1], 'r*', markersize=25)
    return V
 
def func_fssd(p, dat, k, V):
    """
    Return the value of FSSD test statistic.
    """
    fssd = gof.FSSD(p, k, V, alpha=0.01, n_simulate=1000, seed=11)
    return fssd.compute_stat(dat)

In [None]:
# true p
seed = 15
d = 2
mean = np.zeros(d)
variance = 1
isonorm = density.IsotropicNormal(mean, variance)

In [None]:
# sample
n = 800

# only one dimension of the mean is shifted
#draw_mean = mean + np.hstack((1, np.zeros(d-1)))
draw_mean = mean + 1
draw_variance = variance + 0
X = util.randn(n, d, seed=seed+3)*np.sqrt(draw_variance) + draw_mean
dat = data.Data(X)

In [None]:
# Kernel
sig2 = util.meddistance(X, subsample=1000)**2
k = kernel.KGauss(sig2)

# Test
J = 1
alpha = 0.01

# random test locations
V = util.fit_gaussian_draw(X, J, seed=seed+1)
fssd = gof.FSSD(isonorm, k, V, alpha=alpha, n_simulate=2000, seed=2)
fssd.perform_test(dat)

In [None]:
p = isonorm

In [None]:
generic_contourf(p, dat, k, func_fssd)
plt.title('FSSD Statistic')
plt.colorbar()
plt.grid()

In [None]:
def func_fssd_power_criterion(p, dat, k, V):
    """
    Return the value of the power criterion of FSSD.
    """
    return gof.FSSD.power_criterion(p, dat, k, V)
    
generic_contourf(p, dat, k, func_fssd_power_criterion)
plt.title('mean/std')
plt.colorbar()
plt.grid()

In [None]:
def func_fssd_ustat_std(p, dat, k, V):
    """
    Return the standard deviation of the U-statistic
    """
    fssd = gof.FSSD(p, k, V, alpha=0.01, n_simulate=1000, seed=11)
    X = dat.data()
    fea_tensor = fssd.feature_tensor(X)
    _, variance = gof.FSSD.ustat_h1_mean_variance(fea_tensor, return_variance=True)
    return np.sqrt(variance)
    
generic_contourf(p, dat, k, func_fssd_ustat_std)
plt.title('U-statistic standard deviation')
plt.colorbar()
plt.grid()