A notebook to test and demonstrate the `MMD test` of Gretton et al., 2012 used as a goodness-of-fit test. Require the ability to sample from the density `p`.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import freqopttest.tst as tst
import kgof
import kgof.data as data
import kgof.density as density
import kgof.goftest as gof
import kgof.mmd as mgof
import kgof.kernel as ker
import kgof.util as util
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

In [None]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 16
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

## MMD test (as a goodness-of-fit test)

In [None]:
# true p
seed = 20
d = 1
# sample
n = 400
alpha = 0.05

mean = np.zeros(d)
variance = 1

In [None]:
p = density.IsotropicNormal(mean, variance)
q_mean = mean.copy()
q_variance = variance
# q_mean[0] = 1

ds = data.DSIsotropicNormal(q_mean, q_variance+1)
# q_means = np.array([ [0], [0]])
# q_variances = np.array([0.01, 1])
# ds = data.DSIsoGaussianMixture(q_means, q_variances, pmix=[0.2, 0.8])

In [None]:
# Test
dat = ds.sample(n, seed=seed+2)
X = dat.data()
# Use median heuristic to determine the Gaussian kernel width
sig2 = util.meddistance(X, subsample=1000)**2
k = ker.KGauss(sig2)


In [None]:
mmd_test = mgof.QuadMMDGof(p, k, n_permute=400, alpha=alpha, seed=seed)
mmd_result = mmd_test.perform_test(dat)
mmd_result

In [None]:
print 'Reject H0?: {0}'.format(mmd_result['h0_rejected'])

sim_stats = mmd_result['list_permuted_mmd2']
stat = mmd_result['test_stat']
unif_weights = np.ones_like(sim_stats)/float(len(sim_stats))
plt.hist(sim_stats, label='Simulated', weights=unif_weights)
plt.plot([stat, stat], [0, 0], 'r*', markersize=30, label='Stat')
plt.legend(loc='best')


## MMD test with parameter search

In [None]:
mmd_opt = mgof.QuadMMDGofOpt(p, n_permute=400, alpha=alpha, seed=seed)
mmd_opt_result = mmd_opt.perform_test(dat, tr_proportion=0.2)
mmd_opt_result