refactoring, moving data around. linmod.py edits

wesm · Mar 1, 2011 · 24e7297 · 24e7297
1 parent 73ecbfd
commit 24e7297
Show file tree

Hide file tree

Showing 19 changed files with 223 additions and 110 deletions.
diff --git a/westharrison/pwch7.py → examples/pw_ch07.py b/westharrison/pwch7.py → examples/pw_ch07.py
diff --git a/westharrison/ch02.py → examples/wh_ch02.py b/westharrison/ch02.py → examples/wh_ch02.py
diff --git a/westharrison/ch03.py → examples/wh_ch03.py b/westharrison/ch03.py → examples/wh_ch03.py
diff --git a/westharrison/ch04.py → examples/wh_ch04.py b/westharrison/ch04.py → examples/wh_ch04.py
diff --git a/westharrison/ch08.py → examples/wh_ch08.py b/westharrison/ch08.py → examples/wh_ch08.py
@@ -4,14 +4,15 @@
 import statlib.dlm as dlm
 reload(dlm)
 from statlib.dlm import *
+from statlib.components import *
 import datasets
 
 gas = datasets.table_81()
 
 p = 12
 var_prior = (1, 0.05)
 model = Polynomial(1) + FullEffectsFourier(12, harmonics=[1, 4])
-model = Polynomial(1) + FullEffectsFourier(12)
+# model = Polynomial(1) + FullEffectsFourier(12)
 k = model.F.shape[1]
 mean_prior = (np.zeros(k), np.eye(k))
 dlm = ConstantDLM(gas, model.F, G=model.G, mean_prior=mean_prior,

diff --git a/westharrison/ch10.py → examples/wh_ch10.py b/westharrison/ch10.py → examples/wh_ch10.py
diff --git a/westharrison/ch12.py → examples/wh_ch12.py b/westharrison/ch12.py → examples/wh_ch12.py
diff --git a/statlib/arma.py b/statlib/arma.py
@@ -6,12 +6,9 @@
 
 from pandas.util.testing import set_trace as st
 
-import scipy.stats as stats
-
 from scikits.statsmodels.tsa.arima import ARMA as sm_arma
-from scikits.statsmodels.tsa.stattools import acf, acovf
+from scikits.statsmodels.tsa.stattools import acf
 from scikits.statsmodels.tools.decorators import cache_readonly
-from scikits.statsmodels.tools.tools import chain_dot as cdot
 
 import scikits.statsmodels.api as sm
 
@@ -92,8 +89,8 @@ def decomp(self):
 
         return waves, mods, decomp
 
-    def plot_acf(self, lags=50):
-        plotting.plot_acf(self.data, lags)
+    def plot_acf(self, lags=50, partial=True):
+        plotting.plot_acf(self.data, lags, partial=partial)
 
 class TestARModel(object):
 
@@ -110,105 +107,6 @@ def _prep_arvars(x, p):
 
     return y, X.T
 
-class BayesLM(object):
-    """
-    Bayesian linear model with independent errors
-
-    Normal-InverseGamma prior
-
-    Parameters
-    ----------
-    Y : ndarray (length n)
-        response variable
-    X : ndarray (n x p)
-        independent variables
-    beta_prior : (mean, cov)
-        Prior for coefficients: Normal(mean, cov)
-    var_prior : (df, df * var_0)
-        Prior for error variance: InverseGamma(df /2, df * var_0 / 2)
-
-    Notes
-    -----
-    y = X'b + e
-    e ~ N(0, v * I_n)
-    b | v ~ N(m_0, v * C_0)
-    v ~ IG(n0 / 2, d0 / 2)
-    """
-    def __init__(self, Y, X, beta_prior, var_prior):
-        self.y = Y
-        self.x = X
-
-        self.nobs = len(Y)
-
-        self.m0, self.c0 = beta_prior
-        self.n0, self.d0 = var_prior
-
-    @cache_readonly
-    def beta_post_params(self):
-        X = self.x
-
-        Q = self.Q
-        Qinv = inv(Q)
-
-        A = cdot(self.c0, X.T, Qinv)
-
-        m = self.m0 + A.dot(self.prior_resid)
-        C = self.c0 - cdot(A, Q, A.T)
-
-        return m, C
-
-    @cache_readonly
-    def Q(self):
-        return cdot(X, self.c0, X.T) + np.eye(self.nobs)
-
-    @cache_readonly
-    def var_post_params(self):
-        post_n = self.nobs + self.n0
-
-        resid = self.prior_resid
-        post_d = np.dot(resid, LA.solve(self.Q, resid)) + self.d0
-
-        return post_n, post_d
-
-    @property
-    def precision_post_dist(self):
-        post_n, post_d = self.var_post_params
-        return stats.gamma(post_n, scale=2. / post_d)
-
-    @property
-    def var_post_dist(self):
-        post_n, post_d = self.var_post_params
-        return stats.invgamma(post_n, scale=post_d / 2.)
-
-    @cache_readonly
-    def prior_resid(self):
-        return self.y - np.dot(self.x, self.m0)
-
-    def sample(self, samples=1000):
-        """
-        Generate Monte Carlo samples of posterior distribution of parameters
-
-        Samples v, then b | v, with standard normal-inverse-gamma full
-        conditional distributions.
-
-        Parameters
-        ----------
-        samples : int
-        """
-        precision_samples = self.precision_post_dist.rvs(samples)
-        var_samples = 1 / precision_samples
-
-        # sample beta condition on v
-        beta_samples = []
-
-        m, C = self.beta_post_params
-
-        for v in var_samples:
-            rv = np.random.multivariate_normal(m, v * C)
-            beta_samples.append(rv)
-
-        return var_samples, np.array(beta_samples)
-
 def ma_coefs(coefs, maxn=10):
     p = len(coefs)
     phis = np.zeros(maxn+1)
@@ -258,6 +156,7 @@ def ar_simulate(phi, s, n, dist='normal'):
     return out
 
 if __name__ == '__main__':
+    from statlib.linmod import BayesLM
     import statlib.datasets as ds
     eeg = ds.eeg_data()
 

diff --git a/westharrison/data/Table3.3.data.txt → statlib/data/Table3.3.data.txt b/westharrison/data/Table3.3.data.txt → statlib/data/Table3.3.data.txt
diff --git a/westharrison/data/eeg.dat → statlib/data/eeg.dat b/westharrison/data/eeg.dat → statlib/data/eeg.dat
diff --git a/westharrison/data/eeg2.dat → statlib/data/eeg2.dat b/westharrison/data/eeg2.dat → statlib/data/eeg2.dat
diff --git a/westharrison/data/table102.csv → statlib/data/table102.csv b/westharrison/data/table102.csv → statlib/data/table102.csv
diff --git a/westharrison/data/table111.csv → statlib/data/table111.csv b/westharrison/data/table111.csv → statlib/data/table111.csv
diff --git a/westharrison/data/table22.csv → statlib/data/table22.csv b/westharrison/data/table22.csv → statlib/data/table22.csv
diff --git a/westharrison/data/table81.csv → statlib/data/table81.csv b/westharrison/data/table81.csv → statlib/data/table81.csv
diff --git a/statlib/linmod.py b/statlib/linmod.py
@@ -0,0 +1,201 @@
+from __future__ import division
+
+from numpy.linalg import inv
+import numpy as np
+import numpy.linalg as LA
+import matplotlib.pyplot as plt
+
+import scipy.stats as stats
+
+from scikits.statsmodels.tools.decorators import cache_readonly
+from scikits.statsmodels.tools.tools import chain_dot as cdot
+
+import statlib.plotting as plotting
+
+from pandas.util.testing import set_trace as st
+
+class BayesLM(object):
+    """
+    Bayesian linear model with independent errors
+
+    Normal-InverseGamma prior
+
+    Parameters
+    ----------
+    Y : ndarray (length n)
+        response variable
+    X : ndarray (n x p)
+        independent variables
+    beta_prior : (mean, cov)
+        Prior for coefficients: Normal(mean, cov)
+    var_prior : (df, df * phi_0)
+        df degrees of freedom
+        phi_0 = 1 / var_0^2 (prior precision)
+        Prior for error variance: InverseGamma(df /2, df * phi_0 / 2)
+
+    Notes
+    -----
+    y = X'b + e
+    e ~ N(0, v * I_n)
+    b | v ~ N(m_0, v * C_0)
+    v ~ IG(n0 / 2, d0 / 2)
+    """
+    def __init__(self, Y, X, beta_prior, var_prior):
+        self.y = Y
+        self.x = X
+
+        self.nobs = len(Y)
+
+        self.m0, self.c0 = beta_prior
+        self.n0, self.d0 = var_prior
+
+    @cache_readonly
+    def beta_post_params(self):
+        X = self.x
+
+        Q = self.Q
+        Qinv = inv(Q)
+
+        A = cdot(self.c0, X.T, Qinv)
+
+        m = self.m0 + np.dot(A, self.prior_resid)
+        C = self.c0 - cdot(A, Q, A.T)
+
+        return m, C
+
+    @property
+    def sigma2(self):
+        """
+        mean posterior estimate of error variance
+        """
+        n, d = self.var_post_params
+        return d / n
+
+    @property
+    def params(self):
+        """
+        Mode of posterior t distribution for params
+        """
+        return self.beta_post_params[0]
+
+    def beta_ci(self, alpha=0.05):
+        df, _ = self.var_post_params
+        mode, scale = self.beta_post_params
+
+        std = np.sqrt(self.sigma2 * np.diag(scale))
+
+        q = stats.t(df).ppf(1 - alpha / 2)
+
+        ci_lower = mode - q * std
+        ci_upper = mode + q * std
+
+        return ci_lower, ci_upper
+
+    def prec_ci(self, alpha=0.05, hpd=True, hpd_draws=100000):
+        """
+        Computes HPD or regular frequentist interval
+        """
+        from statlib.tools import quantile
+
+        if hpd:
+            n, d = self.var_post_params
+            prec_post = stats.gamma(n / 2., scale=2./d)
+            prec_draws = prec_post.rvs(hpd_draws)
+            qs = quantile(prec_draws, [alpha / 2, 0.5, 1 - alpha / 2])
+            hpd_lower, median, hpd_upper = qs
+
+            return hpd_lower, median, hpd_upper
+        else:
+            raise NotImplementedError
+
+    @cache_readonly
+    def Q(self):
+        return cdot(self.x, self.c0, self.x.T) + np.eye(self.nobs)
+
+    @cache_readonly
+    def var_post_params(self):
+        post_n = self.nobs + self.n0
+
+        resid = self.prior_resid
+        post_d = np.dot(resid, LA.solve(self.Q, resid)) + self.d0
+
+        return post_n, post_d
+
+    @property
+    def precision_post_dist(self):
+        post_n, post_d = self.var_post_params
+        return stats.gamma(post_n, scale=2. / post_d)
+
+    @property
+    def var_post_dist(self):
+        post_n, post_d = self.var_post_params
+        return stats.invgamma(post_n, scale=post_d / 2.)
+
+    @cache_readonly
+    def prior_resid(self):
+        return self.y - np.dot(self.x, self.m0)
+
+    def sample(self, samples=1000):
+        """
+        Generate Monte Carlo samples of posterior distribution of parameters
+
+        Samples v, then b | v, with standard normal-inverse-gamma full
+        conditional distributions.
+
+        Parameters
+        ----------
+        samples : int
+        """
+        precision_samples = self.precision_post_dist.rvs(samples)
+        var_samples = 1 / precision_samples
+
+        # sample beta condition on v
+        beta_samples = []
+
+        m, C = self.beta_post_params
+
+        for v in var_samples:
+            rv = np.random.multivariate_normal(m, v * C)
+            beta_samples.append(rv)
+
+        return var_samples, np.array(beta_samples)
+
+    def plot_forecast(self, x, ax=None, label=None, **kwds):
+        """
+        Plot posterior predictive distribution for input predictor values
+
+        y ~ N(Xb, s^2)
+
+        Parameters
+        ----------
+        x : 1d
+        """
+        if ax is None:
+            ax = plt.gca()
+
+        mode, scale = self.beta_post_params
+
+        s2 = self.sigma2
+        # forecast variance
+        rQ = np.sqrt(s2 * (np.dot(x, np.dot(scale, x)) + 1))
+        m = np.dot(x, self.params)
+
+        dist = stats.norm(m, scale=rQ)
+
+        plotting.plot_support(dist.pdf, m - 5 * rQ, m + 5 * rQ, ax=ax)
+
+        if label is not None:
+            y = dist.pdf(m)
+            x = m
+            ax.text(x, y, label, **kwds)
+
+    def posterior_pred_dist(self, x):
+        mode, scale = self.beta_post_params
+
+        s2 = self.sigma2
+        # forecast variance
+        rQ = np.sqrt(s2 * (np.dot(x, np.dot(scale, x)) + 1))
+        m = np.dot(x, self.params)
+
+        return stats.norm(m, scale=rQ)
+
diff --git a/statlib/multi.py b/statlib/multi.py
@@ -112,13 +112,13 @@ def mix_pdf(x):
         return tot
 
     # plot mixture
-    mix = plotting.plot_support(mix_pdf, hi, lo,
+    mix = plotting.plot_support(mix_pdf, lo, hi,
                                 thresh=support_thresh,
                                 style='k',
                                 ax=ax)
 
     for _, dist in dists.iteritems():
-        comp = plotting.plot_support(dist.pdf, hi, lo,
+        comp = plotting.plot_support(dist.pdf, lo, hi,
                                      thresh=support_thresh,
                                      style='k--',
                                      ax=ax)

diff --git a/statlib/plotting.py b/statlib/plotting.py
@@ -17,10 +17,10 @@ def density_plot(y, thresh=1e-10, style='k', ax=None):
     Make kernel density plot of input data
     """
     kde = stats.kde.gaussian_kde(y)
-    plot_support(kde.evaluate, y.max(), y.min(),
+    plot_support(kde.evaluate, y.min(), y.max(),
                  thresh=thresh, style=style, ax=ax)
 
-def plot_support(f, hi, lo, thresh=1e-10, style='k', N=5000, ax=None):
+def plot_support(f, lo, hi, thresh=1e-10, style='k', N=5000, ax=None):
     """
     Attempt to plot input function (e.g. a probability density) over its support
     (places where its value exceeds a certain threshold)