A notebook to demonstrate `cgoftest.KSSDTest`

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'



In [None]:
import torch
import torch.distributions as dists

import kcgof
import kcgof.log as klog
import kcgof.util as util
import kcgof.cdensity as cden
import kcgof.cdata as cdat
import kcgof.cgoftest as cgof
import kcgof.kernel as ker
import kcgof.plot as plot

In [None]:
import matplotlib
import matplotlib.pyplot as plt

# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 20
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

## Ordinary least squares with Gaussian noise

$$p(y|x) = \mathcal{N}(slope*x+c, variance)$$

### KSSD Test

In [None]:
dx = 1

slope = torch.tensor([1.0])
noise_variance = 1.2
c = 1.0

# set up the conditional probability model
p = cden.CDGaussianOLS(slope, c=c, variance=noise_variance)

In [None]:
# generate some toy data 
# CondSource
# If the following parameters are the same as above, then H0 is true.
# Can perturb these to have an H1 case.
cs = cdat.CSGaussianOLS(slope*1.5, c=c, variance=noise_variance)

# sample Y conditioned on X
n = 300 # sample size
px = dists.Normal(0, 1) 
X = px.sample((n, dx))
Y = cs(X, seed=24)

Plot the data and the model

In [None]:
ep = 0.7
domX = torch.linspace(torch.min(X)-ep, torch.max(X)+ep, 200)
domY = torch.linspace(torch.min(Y).item()-ep, torch.max(Y).item()+ep, 200)

plt.figure(figsize=(10, 8))
plot.plot_2d_cond_model(
    p, 
    lambda X: torch.exp(px.log_prob(X)), 
    X, Y, domX=domX, domY=domY, 
    cmap='pink_r', levels=50)
# plt.xlabel('$x$')
# plot.plot_2d_cond_data(X, Y)

In [None]:
# kernels
# k = kernel on X
# l = kernel on Y
k = ker.PTKGauss(sigma2=1.0)
l = ker.PTKGauss(sigma2=2.0)

In [None]:
# Construct a KSSD test object
kssdtest = cgof.KSSDTest(p, k, l, alpha=0.05, n_bootstrap=400, seed=9)

In [None]:
result = kssdtest.perform_test(X, Y, return_simulated_stats=True)
result

In [None]:
test_stat = result['test_stat']
plt.figure(figsize=(10, 6))
plt.hist(result['sim_stats'], density=True, label='Bootstrapped');
# plt.stem([test_stat, test_stat], [0, 0.002], 'r', label='Observed', use_line_collection=True)
plt.xlabel('KSSD statistic')
plt.legend()

print('H0 rejected?: {}'.format(result['h0_rejected']))
print('Observed stat: {:.3f}'.format(result['test_stat']))

### FSCD Test

The Finite Set Conditional Discrepancy (FSCD) test

In [None]:
# J x dx torch tensor specifying J test locations
V = torch.tensor([[1.0]])
fscdtest = cgof.FSCDTest(p, k, l, V, alpha=0.05, n_bootstrap=400, seed=10)

In [None]:
fscd_result = fscdtest.perform_test(X, Y, return_simulated_stats=True)

In [None]:
test_stat = fscd_result['test_stat']
plt.figure(figsize=(10, 6))
plt.hist(fscd_result['sim_stats'], density=True, label='Bootstrapped');
# plt.stem([test_stat, test_stat], [0, 0.002], 'r', label='Observed', use_line_collection=True)
plt.xlabel('FSCD statistic')
plt.legend()

print('H0 rejected?: {}'.format(fscd_result['h0_rejected']))
print('Observed stat: {:.3f}'.format(fscd_result['test_stat']))