Skip to content

Commit

Permalink
Merge pull request #19 from wgurecky/100d_gauss_test
Browse files Browse the repository at this point in the history
100D multivariate normal distribution test
  • Loading branch information
wgurecky authored Jul 18, 2020
2 parents f5c039d + fe658a7 commit 057820d
Show file tree
Hide file tree
Showing 11 changed files with 244 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ install:
- pip install coveralls pytest-cov
- pip install -r requirements.txt
- FC=gfortran python setup.py develop
script: pytest --cov=bipymc
script: travis_wait 20 pytest --cov=bipymc -m "not heavy"
after_success: coveralls
26 changes: 2 additions & 24 deletions bipymc/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import h5py
import numpy as np
import scipy.stats as stats
from bipymc.util import var_ball


class McmcChain(object):
Expand All @@ -23,33 +24,10 @@ def __init__(self, theta_0, varepsilon=1e-6, global_id=0, mpi_comm=None, mpi_ran
self.global_id = global_id
theta_0_flat = np.asarray(theta_0).flatten()
self._dim = len(theta_0_flat)
theta_0_flat = np.asarray(theta_0_flat) + self.var_ball(varepsilon, self._dim)
theta_0_flat = np.asarray(theta_0_flat) + var_ball(varepsilon, self._dim)
# init the 2d array of shape (iteration, <theta_vec>)
self._chain = np.array([theta_0_flat])

@staticmethod
def var_ball(varepsilon, dim):
"""!
@brief Draw single sample from tight gaussian ball
"""
var_epsilon = 0.
if np.all(np.asarray(varepsilon) > 0):
var_epsilon = np.random.multivariate_normal(np.zeros(dim),
np.eye(dim) * np.asarray(varepsilon),
size=1)[0]
return var_epsilon

@staticmethod
def var_box(varepsilon, dim):
"""!
@brief Draw single sample from tight uniform box
"""
var_epsilon = 0.
if np.all(np.asarray(varepsilon) > 0):
var_epsilon = np.random.uniform(low=-np.asarray(varepsilon) * np.ones(dim),
high=np.asarray(varepsilon) * np.ones(dim))
return var_epsilon

def set_t_kernel(self, t_kernel):
"""!
@brief Set valid transition kernel
Expand Down
3 changes: 2 additions & 1 deletion bipymc/demc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from bipymc.samplers import *
from mpi4py import MPI
import sys
from bipymc.util import var_box, var_ball


class DeMcMpi(DeMc):
Expand Down Expand Up @@ -178,7 +179,7 @@ def _update_chain_pool(self, k, c_id, current_chain, prop_chain_pool, prop_chain
# Generate proposal vector
prop_vector = gamma * (mut_a_chain_state - mut_b_chain_state)
prop_vector += current_chain.current_pos
prop_vector += McmcChain.var_ball(epsilon ** 2.0, self.dim)
prop_vector += var_ball(epsilon ** 2.0, self.dim)

# Metropolis ratio
alpha = self._mut_prop_ratio(self._frozen_ln_like_fn,
Expand Down
5 changes: 3 additions & 2 deletions bipymc/dream.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
from bipymc.demc import DeMcMpi
from bipymc.chain import McmcChain
from bipymc.util import var_ball, var_box
from mpi4py import MPI
import sys

Expand Down Expand Up @@ -79,8 +80,8 @@ def _update_chain_pool(self, k, c_id, current_chain, prop_chain_pool, prop_chain
gamma = gamma_base

# Generate proposal vector
eps_u = McmcChain.var_box(u_epsilon, self.dim)
eps_n = McmcChain.var_ball(epsilon ** 2.0, self.dim)
eps_u = var_box(u_epsilon, self.dim)
eps_n = var_ball(epsilon ** 2.0, self.dim)
prop_vector = ((np.ones(self.dim) + eps_u) * gamma * \
np.sum(mut_a_chain_state - mut_b_chain_state, axis=0) + eps_n) * update_dims

Expand Down
3 changes: 2 additions & 1 deletion bipymc/samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import scipy.stats as stats
from bipymc.proposals import *
from bipymc.chain import *
from bipymc.util import var_ball, var_box


class McmcSampler(object):
Expand Down Expand Up @@ -282,7 +283,7 @@ def _mcmc_run(self, n, theta_0, varepsilon=1e-6, **kwargs):
# generate proposal vector
prop_vector = gamma * (mut_a_chain.current_pos - mut_b_chain.current_pos)
prop_vector += current_chain.current_pos
prop_vector += McmcChain.var_ball(varepsilon * 1e-3, dim)
prop_vector += var_ball(varepsilon * 1e-3, dim)

# note the probability ratio can be computed in parallel
# accept or reject mutated chain loc
Expand Down
28 changes: 28 additions & 0 deletions bipymc/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import print_function, division
import numpy as np


def var_ball(varepsilon, dim):
"""!
@brief Draw single sample from tight gaussian ball
@param varepsilon float or 1d_array of len dim
@param dim dimension of gaussian ball
"""
eps = 0.
if np.all(np.asarray(varepsilon) > 0):
eps = np.random.multivariate_normal(np.zeros(dim),
np.eye(dim) * np.asarray(varepsilon),
size=1)[0]
return eps

def var_box(varepsilon, dim):
"""!
@brief Draw single sample from tight uniform box
@param varepsilon float or 1d_array of len dim
@param dim dimension of uniform distribution
"""
eps = 0.
if np.all(np.asarray(varepsilon) > 0):
eps = np.random.uniform(low=-np.asarray(varepsilon) * np.ones(dim),
high=np.asarray(varepsilon) * np.ones(dim))
return eps
56 changes: 56 additions & 0 deletions bipymc/utils/d100_gauss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/python
##
# Description: Implements 100d normal dist
##
import numpy as np
from scipy.stats import multivariate_normal
import matplotlib.pyplot as plt


class Gauss_100D(object):
"""
High dimension multivariate normal from LANL DREAM report
"""
def __init__(self, rho=0.5, dim=100):
# rho is pairwise correlation between all rvs
self.mu = np.zeros(dim)
self.var = np.sqrt(np.arange(dim) + 1.0)
self.cov = np.zeros((dim, dim))
for i in range(dim):
for j in range(dim):
if i == j:
self.cov[i][j] = self.var[i] ** 2.0
else:
self.cov[i][j] = self.var[i] * self.var[j] * rho
self.dim = dim
self.rho = rho
self.rv_100d = multivariate_normal(self.mu, self.cov)

def pdf(self, y):
# return self.rv_100d.pdf(y) / 1e-100
return self.rv_100d.pdf(y)

def ln_like(self, y):
assert len(y) == self.dim
return np.log(self.pdf(y))

def rvs(self, n_samples):
rv_samples = self.rv_100d.rvs(size=n_samples)
return rv_samples


if __name__ == "__main__":
d100_gauss = Gauss_100D()
y = d100_gauss.rvs(10)
print(y.shape)

p = d100_gauss.pdf(np.zeros(100))
print(p)
p = d100_gauss.pdf(np.ones(100))
print(p)

ln_p = d100_gauss.ln_like(np.zeros(100))
print(ln_p)
ln_p = d100_gauss.ln_like(np.ones(100))
print(ln_p)
print(np.diag(d100_gauss.cov))
Binary file added doc/images/bimodal_mont.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 24 additions & 0 deletions license.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Copyright (c) 2018 William Gurecky
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY William Gurecky ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL William Gurecky BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
111 changes: 111 additions & 0 deletions tests/test_100dgauss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/python
##
# Description: Tests samplers on a 100d gauss shaped distribution
##
from __future__ import print_function, division
import unittest
import numpy as np
from six import iteritems
from mpi4py import MPI
import matplotlib.pyplot as plt
import time
import pytest
#
from bipymc.utils import banana_rv, dblgauss_rv, d100_gauss
from bipymc.mc_plot import mc_plot
from bipymc.demc import DeMcMpi
from bipymc.dream import DreamMpi
from bipymc.dram import DrMetropolis, Dram
np.random.seed(42)
n_samples = 500000
n_burn = 200000


@pytest.mark.heavy
class TestMcmc100DGauss(unittest.TestCase):
def setUp(self):
"""
Setup the 100D gauss distribution
"""
self.comm = MPI.COMM_WORLD
self.gauss = d100_gauss.Gauss_100D()
self.sampler_dict = {
'demc': (self._setup_demc(self.gauss.ln_like), 200),
'dream': (self._setup_dream(self.gauss.ln_like), 100),
}

if self.comm.rank == 0:
# plot true pdf and true samples
self._plot_gauss()

def test_samplers(self):
"""
Test ability of each mcmc method to draw samples
from the 100d gauss distribution.
"""
global n_burn
global n_samples
for sampler_name, (my_mcmc, n_chains) in iteritems(self.sampler_dict):
t0 = time.time()
my_mcmc.run_mcmc(n_samples)
t1 = time.time()
theta_est, sig_est, chain = my_mcmc.param_est(n_burn=n_burn)
theta_est_, sig_est_, full_chain = my_mcmc.param_est(n_burn=0)

if self.comm.rank == 0:
print("=== " + str(sampler_name) + " ===")
print("Sampler walltime: %d (s)" % int(t1 - t0))
print("Esimated params: %s" % str(theta_est))
print("Estimated params sigma: %s " % str(sig_est))
print("Acceptance fraction: %f" % my_mcmc.acceptance_fraction)
try:
print("P_cr: %s" % str(my_mcmc.p_cr))
except:
pass
y1, y2 = chain[:, 0], chain[:, 1]

if sampler_name == 'demc' or sampler_name == 'dream':
self.assertAlmostEqual(0.0, theta_est[0], delta=0.2)
self.assertAlmostEqual(0.0, theta_est[1], delta=0.2)

# plot mcmc samples
plt.figure()
plt.scatter(y1, y2, s=2, alpha=0.08)
plt.grid(ls='--', alpha=0.5)
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.savefig(str(sampler_name) + "_100d_gauss_slice_sample.png")
plt.close()

# plot mcmc chains
"""
mc_plot.plot_mcmc_indep_chains(full_chain, n_chains,
labels=["x1", "x2"],
savefig=str(sampler_name) + "_chains.png",
scatter=True)
"""

def _plot_gauss(self):
n_samples = 10000
y = self.gauss.rvs(n_samples)
y1, y2 = y[:, 0], y[:, 1]
plt.figure()
plt.scatter(y1, y2, s=2, alpha=0.3)
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.grid(ls='--', alpha=0.5)
plt.savefig("true_100d_gauss_slice_samples.png")
plt.close()

def _setup_demc(self, log_like_fn, n_chains=200):
theta_0 = np.zeros(100)
my_mcmc = DeMcMpi(log_like_fn, theta_0, n_chains=n_chains, mpi_comm=self.comm)
return my_mcmc

def _setup_dream(self, log_like_fn, n_chains=100):
global n_burn
theta_0 = np.zeros(100)
my_mcmc = DreamMpi(log_like_fn, theta_0, n_chains=n_chains, mpi_comm=self.comm,
n_cr_gen=50, burnin_gen=int(n_burn / n_chains))
return my_mcmc

17 changes: 15 additions & 2 deletions tests/test_dblgauss.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,23 @@ def test_samplers(self):

# plot mcmc samples
plt.figure()
plt.scatter(y1, y2, s=2, alpha=0.15)
plt.scatter(y1, y2, s=2, alpha=0.10)
plt.grid(ls='--', alpha=0.5)
plt.xlim(-2, 4)
plt.ylim(-2, 4)
plt.axvline(theta_est[0], -10, 10, c='r')
plt.axhline(theta_est[1], -10, 10, c='r')
plt.xlabel("x1")
plt.ylabel("x2")
plt.title(sampler_name + " " + r"$ \mu=(%0.2f,%0.2f)$" % (theta_est[0], theta_est[1]))
plt.savefig(str(sampler_name) + "_bimodal_gauss_sample.png")
plt.close()

# plot mcmc chains
mc_plot.plot_mcmc_indep_chains(full_chain, n_chains,
labels=["x1", "x2"],
savefig=str(sampler_name) + "_chains.png",
scatter=True)
scatter=True, nburn=int(n_burn))

def _plot_bimodal_gauss(self):
n_samples = 10000
Expand All @@ -96,6 +101,11 @@ def _plot_bimodal_gauss(self):
plt.xlim(-2, 4)
plt.ylim(-2, 4)
plt.grid(ls='--', alpha=0.5)
plt.axvline(1.5, -10, 10, c='r')
plt.axhline(1.5, -10, 10, c='r')
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("True " + r"$ \mu=(%0.2f,%0.2f)$" % (1.5, 1.5))
plt.savefig("true_bimodal_gauss_asamples.png")
plt.close()

Expand All @@ -106,6 +116,9 @@ def _plot_bimodal_gauss(self):
p = self.bimodal_gauss.pdf(y1, y2)
plt.contourf(y1, y2, p)
plt.grid(ls='--', alpha=0.5)
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("True PDF")
plt.colorbar(label='probability density')
plt.savefig("true_bimodal_gauss_pdf.png")
plt.close()
Expand Down

0 comments on commit 057820d

Please sign in to comment.