moving around distribution stuff. more work on SV MCMC

wesm · Mar 14, 2011 · 6d1577e · 6d1577e
1 parent d772f88
commit 6d1577e
Show file tree

Hide file tree

Showing 2 changed files with 92 additions and 91 deletions.
diff --git a/statlib/distributions.py b/statlib/distributions.py
@@ -1,14 +1,10 @@
-from numpy import sqrt, log, exp, pi
 import numpy as np
 
-from scipy.stats import norm, chi2
-from scipy.special import gammaln as gamln, gamma as gam
 import scipy.stats as stats
 import scipy.special as special
 
-## Non-central T distribution
-
-from scipy.stats import rv_continuous
+import matplotlib.pyplot as plt
+import statlib.plotting as plotting
 
 #-------------------------------------------------------------------------------
 # Distribution-related functions
@@ -53,18 +49,74 @@ def rgamma(a, b, n=None):
     Sample from gamma(a, b) distribution using rate parameterization. For
     reducing cognitive dissonance moving between R and Python
     """
-    if n:
-        return np.random.gamma(a, scale=1./b, size=n)
-    else:
-        return np.random.gamma(a, scale=1./b)
+    return np.random.gamma(a, scale=1./b, size=n)
 
 rnorm = np.random.normal
 dnorm = stats.norm.pdf
 rtnorm = stats.truncnorm.rvs
-rmvnorm = np.random.multivariate_normal
+
+# rmvnorm = np.random.multivariate_normal
 
 def make_t_ci(df, level, scale, alpha=0.10):
     sigma = stats.t(df).ppf(1 - alpha / 2)
     upper = level + sigma * scale
     lower = level - sigma * scale
     return lower, upper
+
+def rmvnorm(mu, cov, size=1):
+    """
+    Compute multivariate normal draws from possibly singular covariance matrix
+    using singular value decomposition (SVD)
+    """
+    # TODO: find reference
+    U, s, Vh = np.linalg.svd(cov)
+    cov_sqrt = np.dot(U * np.sqrt(s), Vh)
+
+    # N(0, 1) draws
+    draws = np.random.randn(size, len(cov))
+    return np.dot(draws, cov_sqrt) + mu
+
+
+# TODO arbitrary mixture...e.g. betas or gammas
+
+class NormalMixture(object):
+    """
+    Simple class for storing mixture of normals
+
+    Parameters
+    ----------
+    """
+    def __init__(self, means, variances, weights):
+        if not (len(means) == len(variances) == len(weights)):
+            raise ValueError('inputs must be all same length sequences')
+
+        self.means = np.asarray(means)
+        self.variances = np.asarray(variances)
+        self.weights = np.asarray(weights)
+
+        self.dists = [stats.norm(m, np.sqrt(v))
+                      for m, v in zip(self.means, self.variances)]
+
+    def pdf(self, x):
+        """
+        Evaluate mixture probability density function
+        """
+        density = 0
+        for w, dist in zip(self.weights, self.dists):
+            density += w * dist.pdf(x)
+
+        return density
+
+    def plot(self, style='k', ax=None, **plot_kwds):
+
+        max_sd = np.sqrt(self.variances.max())
+
+        # overkill perhaps
+        lo = self.means.min() - 4 * max_sd
+        hi = self.means.max() + 4 * max_sd
+
+        if ax is None:
+            ax = plt.gca()
+
+        plotting.plot_support(self.pdf, lo, hi, style=style,
+                              **plot_kwds)
diff --git a/statlib/svmodel.py b/statlib/svmodel.py
@@ -15,7 +15,8 @@
 
 class SVModel(object):
     """
-    Univariate stochastic volatility (SV) model using normal approximation
+    Univariate AR(1) stochastic volatility (SV) model using normal approximation
+    for observation noise (see notes)
 
     Parameters
     ----------
@@ -43,13 +44,15 @@ def __init__(self, data, nu_approx=None):
         if nu_approx is None:
             nu_approx = get_log_chi2_normal_mix()
 
-        # HACK
-        self.phi_prior = 0.5, 0.1
-
         self.nobs = len(data)
         self.data = data
         self.nu_approx = nu_approx
 
+        # HACK
+        self.phi_prior = 0.5, 0.1
+        self.mu_prior = 0, 1
+        self.v_prior = 1
+
     def sample(self, niter=1000, nburn=0, thin=1):
         self._setup_storage(niter, nburn, thin)
 
@@ -68,7 +71,7 @@ def sample(self, niter=1000, nburn=0, thin=1):
             phi = self._sample_phi(phi, z, mu, v)
             mu = self._sample_mu()
             v = self._sample_v()
-            z = self._sample_z()
+            z = self._sample_z(gamma, phi, v)
 
             if i % thin == 0 and i >= 0:
                 k = i // thin
@@ -115,98 +118,44 @@ def _sample_phi(self, phi, z, mu, v):
 
         r = z - mu
         sumsq = (r[:-1] ** 2).sum()
-        A = v * sumsq
 
-        prop_mean = ((r[1:] * r[:-1]).sum() / sumsq) / A + prior_m / prior_v
-        prop_var = 1 / A + 1 / prior_v
-        proposal = dist.rtrunc_norm(prop_mean, np.sqrt(prop_var), 0, 1)
+        css_mean = (r[1:] * r[:-1]).sum() / sumsq
+        css_var = v * sumsq
+
+        prop_prec = 1 / css_var + 1 / prior_v
+        prop_mean = (css_mean / css_var + prior_m / prior_v) / prop_prec
+
+        phistar = dist.rtrunc_norm(prop_mean, np.sqrt(1 / prop_prec), 0, 1)
 
-        a_prop = 0
-        a_phi = 0
+        def accept_factor(phi):
+            return (np.sqrt(1 - phistar**2) *
+                    np.exp(0.5 * phistar**2 * (z[0] - mu)**2 / v))
 
-        if rand() < a_prop / a_phi:
-            return proposal
+        if rand() < accept_factor(phistar) / accept_factor(phi):
+            return phistar
         else:
             return phi
 
-    def _sample_mu(self, gamma, phi, v):
-        # Invoke Cython code
+    def _sample_mu(self, z, phi, v):
+        pass
+
+    def _sample_v(self, z, mu, phi):
+        pass
+
+    def _sample_z(self, gamma, phi, v):
+        # FFBS in Cython
         mu = ffbs.univ_sv_ffbs(self.y, phi,
                                self.nu_approx.means,
                                self.nu_approx.variances,
                                gamma, v)
 
         return mu
 
-    def _sample_v(self):
-        pass
-
-    def _sample_z(self):
-        # FFBS
-        pass
-
 def sample_measure(weights):
     return weights.cumsum().searchsorted(rand())
 
-def rmvnorm(mu, cov, size=1):
-    """
-    Compute multivariate normal draws from possibly singular covariance matrix
-    using singular value decomposition (SVD)
-    """
-    # TODO: find reference
-    U, s, Vh = np.linalg.svd(cov)
-    cov_sqrt = np.dot(U * np.sqrt(s), Vh)
-
-    # N(0, 1) draws
-    draws = np.random.randn(size, len(cov))
-    return np.dot(draws, cov_sqrt) + mu
-
-# TODO arbitrary mixture...e.g. betas or gammas
-
-class NormalMixture(object):
-    """
-    Simple class for storing mixture of some distributions
-
-    Parameters
-    ----------
-    """
-    def __init__(self, means, variances, weights):
-        if not (len(means) == len(variances) == len(weights)):
-            raise ValueError('inputs must be all same length sequences')
-
-        self.means = np.asarray(means)
-        self.variances = np.asarray(variances)
-        self.weights = np.asarray(weights)
-
-        self.dists = [stats.norm(m, np.sqrt(v))
-                      for m, v in zip(self.means, self.variances)]
-
-    def pdf(self, x):
-        """
-        Evaluate mixture probability density function
-        """
-        density = 0
-        for w, dist in zip(self.weights, self.dists):
-            density += w * dist.pdf(x)
-
-        return density
-
-    def plot(self, style='k', ax=None, **plot_kwds):
-        max_sd = np.sqrt(self.variances.max())
-
-        # overkill perhaps
-        lo = self.means.min() - 4 * max_sd
-        hi = self.means.max() + 4 * max_sd
-
-        if ax is None:
-            ax = plt.gca()
-
-        plotting.plot_support(self.pdf, lo, hi, style=style,
-                              **plot_kwds)
-
 def compare_mix_with_logchi2(draws=5000):
     # make a graph showing the mixture approx
-
     plt.figure()
 
     mixture = get_log_chi2_normal_mix()
@@ -224,7 +173,7 @@ def get_log_chi2_normal_mix():
     weights = [0.0073, 0.0000, 0.1056, 0.2575, 0.3400, 0.2457, 0.0440]
     means = [-5.7002, -4.9186, -2.6216, -1.1793, -0.3255, 0.2624, 0.7537]
     variances = [1.4490, 1.2949, 0.6534, 0.3157, 0.1600, 0.0851, 0.0418]
-    return NormalMixture(means, variances, weights)
+    return dist.NormalMixture(means, variances, weights)
 
 if __name__ == '__main__':
     import statlib.datasets as ds