Skip to content

Commit

Permalink
moving around distribution stuff. more work on SV MCMC
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 14, 2011
1 parent d772f88 commit 6d1577e
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 91 deletions.
74 changes: 63 additions & 11 deletions statlib/distributions.py
@@ -1,14 +1,10 @@
from numpy import sqrt, log, exp, pi
import numpy as np

from scipy.stats import norm, chi2
from scipy.special import gammaln as gamln, gamma as gam
import scipy.stats as stats
import scipy.special as special

## Non-central T distribution

from scipy.stats import rv_continuous
import matplotlib.pyplot as plt
import statlib.plotting as plotting

#-------------------------------------------------------------------------------
# Distribution-related functions
Expand Down Expand Up @@ -53,18 +49,74 @@ def rgamma(a, b, n=None):
Sample from gamma(a, b) distribution using rate parameterization. For
reducing cognitive dissonance moving between R and Python
"""
if n:
return np.random.gamma(a, scale=1./b, size=n)
else:
return np.random.gamma(a, scale=1./b)
return np.random.gamma(a, scale=1./b, size=n)

rnorm = np.random.normal
dnorm = stats.norm.pdf
rtnorm = stats.truncnorm.rvs
rmvnorm = np.random.multivariate_normal

# rmvnorm = np.random.multivariate_normal

def make_t_ci(df, level, scale, alpha=0.10):
sigma = stats.t(df).ppf(1 - alpha / 2)
upper = level + sigma * scale
lower = level - sigma * scale
return lower, upper

def rmvnorm(mu, cov, size=1):
"""
Compute multivariate normal draws from possibly singular covariance matrix
using singular value decomposition (SVD)
"""
# TODO: find reference
U, s, Vh = np.linalg.svd(cov)
cov_sqrt = np.dot(U * np.sqrt(s), Vh)

# N(0, 1) draws
draws = np.random.randn(size, len(cov))
return np.dot(draws, cov_sqrt) + mu


# TODO arbitrary mixture...e.g. betas or gammas

class NormalMixture(object):
"""
Simple class for storing mixture of normals
Parameters
----------
"""
def __init__(self, means, variances, weights):
if not (len(means) == len(variances) == len(weights)):
raise ValueError('inputs must be all same length sequences')

self.means = np.asarray(means)
self.variances = np.asarray(variances)
self.weights = np.asarray(weights)

self.dists = [stats.norm(m, np.sqrt(v))
for m, v in zip(self.means, self.variances)]

def pdf(self, x):
"""
Evaluate mixture probability density function
"""
density = 0
for w, dist in zip(self.weights, self.dists):
density += w * dist.pdf(x)

return density

def plot(self, style='k', ax=None, **plot_kwds):

max_sd = np.sqrt(self.variances.max())

# overkill perhaps
lo = self.means.min() - 4 * max_sd
hi = self.means.max() + 4 * max_sd

if ax is None:
ax = plt.gca()

plotting.plot_support(self.pdf, lo, hi, style=style,
**plot_kwds)
109 changes: 29 additions & 80 deletions statlib/svmodel.py
Expand Up @@ -15,7 +15,8 @@

class SVModel(object):
"""
Univariate stochastic volatility (SV) model using normal approximation
Univariate AR(1) stochastic volatility (SV) model using normal approximation
for observation noise (see notes)
Parameters
----------
Expand Down Expand Up @@ -43,13 +44,15 @@ def __init__(self, data, nu_approx=None):
if nu_approx is None:
nu_approx = get_log_chi2_normal_mix()

# HACK
self.phi_prior = 0.5, 0.1

self.nobs = len(data)
self.data = data
self.nu_approx = nu_approx

# HACK
self.phi_prior = 0.5, 0.1
self.mu_prior = 0, 1
self.v_prior = 1

def sample(self, niter=1000, nburn=0, thin=1):
self._setup_storage(niter, nburn, thin)

Expand All @@ -68,7 +71,7 @@ def sample(self, niter=1000, nburn=0, thin=1):
phi = self._sample_phi(phi, z, mu, v)
mu = self._sample_mu()
v = self._sample_v()
z = self._sample_z()
z = self._sample_z(gamma, phi, v)

if i % thin == 0 and i >= 0:
k = i // thin
Expand Down Expand Up @@ -115,98 +118,44 @@ def _sample_phi(self, phi, z, mu, v):

r = z - mu
sumsq = (r[:-1] ** 2).sum()
A = v * sumsq

prop_mean = ((r[1:] * r[:-1]).sum() / sumsq) / A + prior_m / prior_v
prop_var = 1 / A + 1 / prior_v
proposal = dist.rtrunc_norm(prop_mean, np.sqrt(prop_var), 0, 1)
css_mean = (r[1:] * r[:-1]).sum() / sumsq
css_var = v * sumsq

prop_prec = 1 / css_var + 1 / prior_v
prop_mean = (css_mean / css_var + prior_m / prior_v) / prop_prec

phistar = dist.rtrunc_norm(prop_mean, np.sqrt(1 / prop_prec), 0, 1)

a_prop = 0
a_phi = 0
def accept_factor(phi):
return (np.sqrt(1 - phistar**2) *
np.exp(0.5 * phistar**2 * (z[0] - mu)**2 / v))

if rand() < a_prop / a_phi:
return proposal
if rand() < accept_factor(phistar) / accept_factor(phi):
return phistar
else:
return phi

def _sample_mu(self, gamma, phi, v):
# Invoke Cython code
def _sample_mu(self, z, phi, v):
pass

def _sample_v(self, z, mu, phi):
pass

def _sample_z(self, gamma, phi, v):
# FFBS in Cython
mu = ffbs.univ_sv_ffbs(self.y, phi,
self.nu_approx.means,
self.nu_approx.variances,
gamma, v)

return mu

def _sample_v(self):
pass

def _sample_z(self):
# FFBS
pass

def sample_measure(weights):
return weights.cumsum().searchsorted(rand())

def rmvnorm(mu, cov, size=1):
"""
Compute multivariate normal draws from possibly singular covariance matrix
using singular value decomposition (SVD)
"""
# TODO: find reference
U, s, Vh = np.linalg.svd(cov)
cov_sqrt = np.dot(U * np.sqrt(s), Vh)

# N(0, 1) draws
draws = np.random.randn(size, len(cov))
return np.dot(draws, cov_sqrt) + mu

# TODO arbitrary mixture...e.g. betas or gammas

class NormalMixture(object):
"""
Simple class for storing mixture of some distributions
Parameters
----------
"""
def __init__(self, means, variances, weights):
if not (len(means) == len(variances) == len(weights)):
raise ValueError('inputs must be all same length sequences')

self.means = np.asarray(means)
self.variances = np.asarray(variances)
self.weights = np.asarray(weights)

self.dists = [stats.norm(m, np.sqrt(v))
for m, v in zip(self.means, self.variances)]

def pdf(self, x):
"""
Evaluate mixture probability density function
"""
density = 0
for w, dist in zip(self.weights, self.dists):
density += w * dist.pdf(x)

return density

def plot(self, style='k', ax=None, **plot_kwds):
max_sd = np.sqrt(self.variances.max())

# overkill perhaps
lo = self.means.min() - 4 * max_sd
hi = self.means.max() + 4 * max_sd

if ax is None:
ax = plt.gca()

plotting.plot_support(self.pdf, lo, hi, style=style,
**plot_kwds)

def compare_mix_with_logchi2(draws=5000):
# make a graph showing the mixture approx

plt.figure()

mixture = get_log_chi2_normal_mix()
Expand All @@ -224,7 +173,7 @@ def get_log_chi2_normal_mix():
weights = [0.0073, 0.0000, 0.1056, 0.2575, 0.3400, 0.2457, 0.0440]
means = [-5.7002, -4.9186, -2.6216, -1.1793, -0.3255, 0.2624, 0.7537]
variances = [1.4490, 1.2949, 0.6534, 0.3157, 0.1600, 0.0851, 0.0418]
return NormalMixture(means, variances, weights)
return dist.NormalMixture(means, variances, weights)

if __name__ == '__main__':
import statlib.datasets as ds
Expand Down

0 comments on commit 6d1577e

Please sign in to comment.