In [1]:
import numpy as np
from abc import ABC, abstractmethod
from scipy.special import expit, logsumexp, ndtr, ndtri
from numpy.polynomial.hermite import hermgauss
from scipy.stats import t as student_t, norm
from scipy.linalg import cho_factor, cho_solve
from sklearn.metrics import accuracy_score, mean_squared_error

In [2]:
# Numerical stability constants
JITTER = 1e-8
LOG_CLIP = 20.0
MIN_DAMPING = 1e-2
PROBIT_SCALE = np.sqrt(1.0 + np.pi / 8.0)

# Utilities

In [3]:
def robust_invert(
    M,
    jitter=JITTER,
    max_tries=5
):
    """
    Robust matrix inversion using Cholesky decomposition with fallback.

    Args:
        M: Square matrix to invert
        jitter: Initial jitter value for numerical stability
        max_tries: Maximum number of attempts with increasing jitter

    Returns:
        Inverted matrix
    """
    n = M.shape[0]
    I = np.eye(n)
    for _ in range(max_tries):
        try:
            c, lower = cho_factor(M, lower=True, check_finite=False)
            return cho_solve((c, lower), I, check_finite=False)
        except np.linalg.LinAlgError:
            M = M + jitter * I
            jitter *= 10.0
    return np.linalg.pinv(M)

In [4]:
def _get_gh_quadrature(gh_points):
    """
    Get Gauss-Hermite quadrature nodes and weights.

    Args:
        gh_points: Number of quadrature points

    Returns:
        Tuple of (nodes, normalized_weights)
    """
    nodes, weights = hermgauss(gh_points)
    return nodes, weights / np.sqrt(np.pi)

In [5]:
def _compute_tilted_moments_standard(
    m_c,
    v_c,
    gh_z,
    gh_w,
    like
):
    """
    Compute tilted moments using standard likelihood function.

    Args:
        m_c: Cavity mean
        v_c: Cavity variance
        gh_z: Gauss-Hermite nodes
        gh_w: Gauss-Hermite weights
        like: Likelihood function

    Returns:
        Tuple of (first_moment, second_moment, normalization_constant)
    """
    v_c = max(v_c, JITTER)
    a = m_c + np.sqrt(2.0 * v_c) * gh_z
    phi = like(a)
    Z = gh_w.dot(phi)
    if not np.isfinite(Z) or Z <= 0:
        return None, None, Z
    E1 = gh_w.dot(a * phi) / Z
    E2 = gh_w.dot(a * a * phi) / Z
    return E1, E2, Z

In [6]:
def _compute_tilted_moments_loglike(
    m_c,
    v_c,
    gh_z,
    gh_w,
    log_phi
):
    """
    Compute tilted moments using log-likelihood function.

    Args:
        m_c: Cavity mean
        v_c: Cavity variance
        gh_z: Gauss-Hermite nodes
        gh_w: Gauss-Hermite weights
        log_phi: Log-likelihood function

    Returns:
        Tuple of (first_moment, second_moment)
    """
    v_c = max(v_c, JITTER)
    a = m_c + np.sqrt(2.0 * v_c) * gh_z
    a_clip = np.clip(a, -LOG_CLIP, LOG_CLIP)
    lwphi = np.log(gh_w) + log_phi(a_clip)
    logZ = logsumexp(lwphi)
    if not np.isfinite(logZ):
        return None, None
    w = np.exp(lwphi - logZ)
    E1 = np.sum(a * w)
    E2 = np.sum(a * a * w)
    return E1, E2

# Likelihoods

In [7]:
class Likelihood(ABC):
    """Abstract base class for likelihood functions."""

    @abstractmethod
    def tilted_moments(
        self,
        m_c,
        v_c,
        y,
        gh_z,
        gh_w
    ):
        """
        Compute tilted moments for the likelihood.

        Args:
            m_c: Cavity mean
            v_c: Cavity variance
            y: Observed data point
            gh_z: Gauss-Hermite nodes
            gh_w: Gauss-Hermite weights

        Returns:
            Tuple of (first_moment, second_moment)
        """
        pass

    @abstractmethod
    def predict(
        self,
        m,
        v
    ):
        """
        Compute predictive mean given latent moments.

        Args:
            m: Latent mean
            v: Latent variance

        Returns:
            Predicted mean
        """
        pass

    @abstractmethod
    def sample(
        self,
        A,
        rng
    ):
        """
        Sample from the likelihood given latent values.

        Args:
            A: Latent function values
            rng: Random number generator

        Returns:
            Sampled observations
        """
        pass

In [8]:
class GaussianLikelihood(Likelihood):
    """Gaussian likelihood for regression problems."""

    def __init__(self, obs_var):
        """
        Initialize Gaussian likelihood.

        Args:
            obs_var: Observation noise variance
        """
        self.obs_var = obs_var

    def tilted_moments(
        self,
        m_c,
        v_c,
        y,
        gh_z,
        gh_w
    ):
        """Compute tilted moments for Gaussian likelihood."""
        like = lambda a: np.exp(-0.5 * (y - a)**2 / self.obs_var)
        E1, E2, _ = _compute_tilted_moments_standard(
            m_c, v_c, gh_z, gh_w, like
        )
        return (E1, E2) if E1 is not None else (None, None)

    def predict(
        self,
        m,
        v
    ):
        """Predict mean for Gaussian likelihood."""
        return m

    def sample(
        self,
        A,
        rng
    ):
        """Sample from Gaussian likelihood."""
        return A + rng.randn(*A.shape) * np.sqrt(self.obs_var)

In [9]:
class PoissonLikelihood(Likelihood):
    """Poisson likelihood for count data."""

    def tilted_moments(
        self,
        m_c,
        v_c,
        y,
        gh_z,
        gh_w
    ):
        """Compute tilted moments for Poisson likelihood."""
        log_phi = lambda a: y * a - np.exp(a)
        return _compute_tilted_moments_loglike(
            m_c, v_c, gh_z, gh_w, log_phi
        )

    def predict(
        self,
        m,
        v
    ):
        """Predict mean for Poisson likelihood."""
        return np.exp(np.clip(m + 0.5 * v, -LOG_CLIP, LOG_CLIP))

    def sample(
        self,
        A,
        rng
    ):
        """Sample from Poisson likelihood."""
        lam = np.exp(np.clip(A, -LOG_CLIP, LOG_CLIP))
        return rng.poisson(lam)

In [10]:
class LogisticLikelihood(Likelihood):
    """Logistic likelihood for binary classification using probit approximation."""

    def tilted_moments(
        self,
        m_c,
        v_c,
        y,
        gh_z,
        gh_w
    ):
        """Compute tilted moments for logistic likelihood."""
        # Convert y to {-1, +1} for symmetric treatment
        y_sign = 2 * y - 1  # Maps {0,1} to {-1,+1}

        # Adjust cavity mean by the label
        m_c_adj = y_sign * m_c

        # Compute moments using probit approximation
        s2 = v_c + PROBIT_SCALE**2
        s = np.sqrt(s2)
        z = m_c_adj / s

        # Cumulative and density functions
        Phi = norm.cdf(z)
        phi = norm.pdf(z)

        # Handle extreme cases
        if Phi < 1e-10:
            # Use asymptotic expansion for very negative z
            if z < -10:
                b = -z / v_c
                var_t = v_c / (1 + z**2)
            else:
                return None, None
        else:
            # Standard case
            b = phi / (Phi * s)
            var_t = v_c - v_c**2 * b * (b + z / s2)

        # First moment (adjusted back by y_sign)
        E1 = m_c + y_sign * v_c * b

        # Second moment
        var_t = max(var_t, JITTER)
        E2 = E1**2 + var_t

        return E1, E2

    def predict(
        self,
        m,
        v
    ):
        """Predict probability for logistic likelihood."""
        # Predictive probability using probit approximation
        s2 = v + PROBIT_SCALE**2
        s = np.sqrt(s2)
        z = m / s
        return norm.cdf(z)

    def sample(
        self,
        A,
        rng
    ):
        """Sample from logistic likelihood."""
        P = expit(A)
        return (rng.rand(*P.shape) < P).astype(int)

In [11]:
class StudentTLikelihood(Likelihood):
    """Student-t likelihood for robust regression."""

    def __init__(
        self,
        df,
        scale
    ):
        """
        Initialize Student-t likelihood.

        Args:
            df: Degrees of freedom
            scale: Scale parameter
        """
        self.df = df
        self.scale = scale

    def tilted_moments(
        self,
        m_c,
        v_c,
        y,
        gh_z,
        gh_w
    ):
        """Compute tilted moments for Student-t likelihood."""
        like = lambda a: (
            1.0
            + ((y - a)**2) / (self.df * self.scale**2)
        )**(-(self.df + 1.0)/2.0)
        E1, E2, _ = _compute_tilted_moments_standard(
            m_c, v_c, gh_z, gh_w, like
        )
        return (E1, E2) if E1 is not None else (None, None)

    def predict(
        self,
        m,
        v
    ):
        """Predict mean for Student-t likelihood."""
        return m

    def sample(
        self,
        A,
        rng
    ):
        """Sample from Student-t likelihood."""
        eps = student_t(self.df).rvs(
            size=A.shape, random_state=rng
        ) * self.scale
        return A + eps

In [12]:
class GammaLikelihood(Likelihood):
    """Gamma likelihood with stable Laplace approximation."""

    def __init__(self, shape):
        """
        Initialize Gamma likelihood.

        Args:
            shape: Shape parameter of Gamma distribution
        """
        self.shape = shape

    def tilted_moments(
        self,
        m_c,
        v_c,
        y,
        gh_z,
        gh_w
    ):
        """
        Compute tilted moments using robust Newton-Raphson method.

        Uses a robust Newton-Raphson method with safeguards for finding
        the mode of the tilted distribution.
        """
        v_c = max(v_c, JITTER)

        # Better initialization: use moment matching
        # For Gamma(shape, scale), mean = shape * scale
        # If Y ~ Gamma(shape, exp(a)/shape), then E[Y] = exp(a)
        # So reasonable initial guess is a = log(y + 1)
        a = np.log(max(y, 0.1))

        # Bounds for a to prevent overflow
        a_min = -LOG_CLIP
        a_max = LOG_CLIP

        converged = False
        for iter_num in range(20):
            # Ensure a stays in bounds
            a = np.clip(a, a_min, a_max)

            # Compute gradient and Hessian with overflow protection
            exp_neg_a = np.exp(-np.clip(a, -LOG_CLIP, LOG_CLIP))

            # Gradient: d/da log[N(a|m_c,v_c) * p(y|a)]
            g = -(a - m_c) / v_c + self.shape - self.shape * y * exp_neg_a

            # Hessian: d²/da²
            h = -1.0 / v_c + self.shape * y * exp_neg_a

            # Check for numerical issues
            if not np.isfinite(g) or not np.isfinite(h):
                # Fall back to quadrature if Newton fails
                log_phi = lambda a_val: (
                    self.shape * a_val -
                    self.shape * y * np.exp(-np.clip(a_val, -LOG_CLIP, LOG_CLIP))
                )
                return _compute_tilted_moments_loglike(
                    m_c, v_c, gh_z, gh_w, log_phi
                )

            # Ensure negative definite Hessian
            h = min(h, -JITTER)

            # Newton step with step size control
            delta = -g / h

            # Line search to ensure improvement
            step_size = 1.0
            for _ in range(10):
                a_new = a + step_size * delta
                a_new = np.clip(a_new, a_min, a_max)

                # Check if objective improved
                exp_neg_a_new = np.exp(-np.clip(a_new, -LOG_CLIP, LOG_CLIP))
                obj_old = (-0.5 * (a - m_c)**2 / v_c + self.shape * a -
                          self.shape * y * exp_neg_a)
                obj_new = (-0.5 * (a_new - m_c)**2 / v_c + self.shape * a_new -
                          self.shape * y * exp_neg_a_new)

                if obj_new > obj_old:
                    a = a_new
                    break
                step_size *= 0.5

            # Check convergence
            if abs(delta * step_size) < 1e-8:
                converged = True
                break

        if not converged:
            # Fall back to quadrature
            log_phi = lambda a_val: (
                self.shape * a_val -
                self.shape * y * np.exp(-np.clip(a_val, -LOG_CLIP, LOG_CLIP))
            )
            return _compute_tilted_moments_loglike(
                m_c, v_c, gh_z, gh_w, log_phi
            )

        # Compute approximate variance at mode
        var_lap = -1.0 / h
        var_lap = max(var_lap, JITTER)

        # Return moments
        E1 = a
        E2 = a * a + var_lap

        return E1, E2

    def predict(
        self,
        m,
        v
    ):
        """Predict mean for Gamma likelihood."""
        # Mean of Gamma distribution
        return self.shape * np.exp(np.clip(m + 0.5 * v, -LOG_CLIP, LOG_CLIP))

    def sample(
        self,
        A,
        rng
    ):
        """Sample from Gamma likelihood."""
        # Y ~ Gamma(shape, exp(A))
        scale = np.exp(np.clip(A, -LOG_CLIP, LOG_CLIP))
        return rng.gamma(self.shape, scale, size=A.shape)

# DLPEP Class

In [13]:
class PowerExpectationPropagation:
    """
    Power Expectation Propagation with double-loop convergence.

    Implements Power EP algorithm with adaptive damping, robust numerics,
    and double-loop optimization for enhanced convergence properties.
    """

    def __init__(
        self,
        likelihood: Likelihood,
        prior_var=1.0,
        max_iter=100,
        tol=1e-6,
        damping=0.8,
        gh_points=20,
        power_fraction=1.0,
        outer_max_iter=10,
        verbose=False
    ):
        """
        Initialize Power Expectation Propagation.

        Args:
            likelihood: Likelihood object
            prior_var: Prior variance for weights
            max_iter: Maximum inner iterations
            tol: Convergence tolerance
            damping: Global damping parameter
            gh_points: Number of Gauss-Hermite quadrature points
            power_fraction: Power parameter (alpha)
            outer_max_iter: Maximum outer iterations
            verbose: Print convergence information
        """
        self.like = likelihood
        self.prior_var = prior_var
        self.max_iter = max_iter
        self.tol = tol
        self.global_damp = damping
        self.alpha = power_fraction
        self.outer_max = outer_max_iter
        self.verbose = verbose
        self.gh_z, self.gh_w = _get_gh_quadrature(gh_points)

    def fit(
        self,
        X,
        y
    ):
        """
        Fit the Power EP model to training data.

        Args:
            X: Feature matrix of shape (n_samples, n_features)
            y: Target vector of shape (n_samples,)

        Returns:
            Self for method chaining
        """
        X, y = np.asarray(X), np.asarray(y)
        n, d = X.shape

        # Initialize sites and posterior
        self.Lambda0 = np.eye(d) / self.prior_var
        self.eta0 = np.zeros(d)
        self.tau = np.ones(n) * 0.1  # Better initialization
        self.nu = np.zeros(n)
        self.damping = np.full(n, self.global_damp)

        self.Lambda = (self.Lambda0 +
                      np.sum(self.tau[:, None, None] *
                            X[:, :, None] * X[:, None, :], axis=0))
        self.eta = self.eta0 + np.sum(self.nu[:, None] * X, axis=0)
        self._update_posterior()

        prev_bound = -np.inf
        for outer_iter in range(self.outer_max):
            # Inner EP until convergence
            for inner_iter in range(self.max_iter):
                delta = self._pep_sweep(X, y)
                self._update_posterior()
                if delta < self.tol:
                    break

            # Check bound improvement
            new_bound = self._compute_bound(X, y)
            if self.verbose and new_bound > prev_bound + 1e-8:
                print(f"[EP] Outer iter {outer_iter+1}: "
                     f"Bound increased from {prev_bound:.3f} to {new_bound:.3f}")

            # Early stopping if no improvement
            if new_bound < prev_bound + 1e-8:
                break

            prev_bound = new_bound

        return self

    def _pep_sweep(
        self,
        X,
        y
    ):
        """
        Perform one sweep of Power EP updates.

        Args:
            X: Feature matrix
            y: Target vector

        Returns:
            Maximum parameter change in this sweep
        """
        n, d = X.shape
        max_delta = 0.0

        for i in range(n):
            xi, yi = X[i], y[i]
            ti, ni = self.tau[i], self.nu[i]

            # Remove alpha-powered site
            ti_remove = self.alpha * ti
            ni_remove = self.alpha * ni

            # More stable cavity computation
            Lam_cav = self.Lambda - ti_remove * np.outer(xi, xi)
            eta_cav = self.eta - ni_remove * xi

            # Ensure positive definite cavity
            eigvals = np.linalg.eigvalsh(Lam_cav)
            if np.min(eigvals) < JITTER:
                Lam_cav = Lam_cav + (JITTER - np.min(eigvals)) * np.eye(d)

            Sig_cav = robust_invert(Lam_cav)
            mu_cav = Sig_cav.dot(eta_cav)

            m_c = xi.dot(mu_cav)
            v_c = xi.dot(Sig_cav.dot(xi))
            v_c = max(v_c, JITTER)

            # Compute tilted moments
            E1, E2 = self.like.tilted_moments(
                m_c, v_c, yi, self.gh_z, self.gh_w
            )
            if E1 is None:
                continue

            v_t = max(E2 - E1**2, JITTER)

            # Update natural parameters
            ti_hat = (1.0 / self.alpha) * (1.0 / v_t - 1.0 / v_c)
            ni_hat = (1.0 / self.alpha) * (E1 / v_t - m_c / v_c)

            # Ensure ti_hat is positive
            ti_hat = max(ti_hat, JITTER)

            dti = ti_hat - ti
            dni = ni_hat - ni

            # Adaptive damping based on change magnitude
            change_mag = abs(dti) + abs(dni)
            if change_mag > 10:
                # Large change - use more damping
                step = min(self.damping[i], 0.3)
            else:
                # Small change - can use less damping
                step = self.damping[i]

            # Apply update
            self.tau[i] += step * dti
            self.nu[i] += step * dni
            self.Lambda += step * dti * np.outer(xi, xi)
            self.eta += step * dni * xi

            # Update site-specific damping
            if change_mag < 0.1:
                self.damping[i] = min(self.damping[i] * 1.1, 0.95)
            elif change_mag > 1.0:
                self.damping[i] = max(self.damping[i] * 0.9, 0.1)

            max_delta = max(max_delta, abs(step * dti), abs(step * dni))

        return max_delta

    def _compute_bound(
        self,
        X,
        y
    ):
        """
        Compute variational lower bound (ELBO).

        Args:
            X: Feature matrix
            y: Target vector

        Returns:
            Lower bound value
        """
        n, d = X.shape
        m, v = self.predict_latent(X)

        # Prior term: -0.5 * (tr(K^{-1} * Sigma) + mu^T * K^{-1} * mu)
        prior_term = (-0.5 * (np.trace(self.Lambda0.dot(self.Sigma)) +
                             self.mu.dot(self.Lambda0.dot(self.mu))))

        # Expected log likelihood
        like_term = 0.0
        for i in range(n):
            if isinstance(self.like, GaussianLikelihood):
                # Closed form for Gaussian
                like_term += -0.5 * np.log(2 * np.pi * self.like.obs_var)
                like_term += (-0.5 * ((y[i] - m[i])**2 + v[i]) /
                             self.like.obs_var)
            elif isinstance(self.like, LogisticLikelihood):
                # Use probit approximation
                y_sign = 2 * y[i] - 1
                m_adj = y_sign * m[i]
                s = np.sqrt(v[i] + PROBIT_SCALE**2)
                like_term += np.log(norm.cdf(m_adj / s))
            else:
                # Use quadrature for other likelihoods
                a = m[i] + np.sqrt(2 * v[i]) * self.gh_z

                if isinstance(self.like, PoissonLikelihood):
                    log_phi = y[i] * a - np.exp(np.clip(a, -LOG_CLIP, LOG_CLIP))
                elif isinstance(self.like, StudentTLikelihood):
                    df, sc = self.like.df, self.like.scale
                    log_phi = (-(df + 1) / 2 *
                              np.log(1 + ((y[i] - a)**2) / (df * sc**2)))
                elif isinstance(self.like, GammaLikelihood):
                    log_phi = (self.like.shape * a -
                              self.like.shape * y[i] *
                              np.exp(-np.clip(a, -LOG_CLIP, LOG_CLIP)))
                else:
                    continue

                like_term += self.gh_w.dot(log_phi)

        # Entropy term: 0.5 * log(abs(2*pi*e*Sigma))
        sign, logdet = np.linalg.slogdet(2 * np.pi * np.e * self.Sigma)
        entropy = 0.5 * logdet

        return prior_term + like_term + entropy

    def _update_posterior(self):
        """Update posterior mean and covariance."""
        # Add jitter for numerical stability
        Lam = self.Lambda + JITTER * np.eye(self.Lambda.shape[0])
        self.Sigma = robust_invert(Lam)
        self.mu = self.Sigma.dot(self.eta)

    def predict_latent(self, X):
        """
        Predict latent function values.

        Args:
            X: Feature matrix for prediction

        Returns:
            Tuple of (mean, variance) for latent function
        """
        m = X.dot(self.mu)
        v = np.sum(X.dot(self.Sigma) * X, axis=1)
        return m, np.maximum(v, JITTER)

    def predict(self, X):
        """
        Predict expected outputs.

        Args:
            X: Feature matrix for prediction

        Returns:
            Expected output values
        """
        m, v = self.predict_latent(X)
        return self.like.predict(m, v)

    def sample_predictive(
        self,
        X,
        n_samples=1000,
        seed=None
    ):
        """
        Sample from predictive distribution.

        Args:
            X: Feature matrix for prediction
            n_samples: Number of samples to draw
            seed: Random seed for reproducibility

        Returns:
            Tuple of (mean, (lower_quantile, upper_quantile))
        """
        rng = np.random.RandomState(seed)

        # Sample weights from posterior
        L = np.linalg.cholesky(
            self.Sigma + JITTER * np.eye(self.Sigma.shape[0])
        )
        z = rng.randn(self.Sigma.shape[0], n_samples)
        w_samples = self.mu[:, None] + L.dot(z)

        # Compute latent values
        A = X.dot(w_samples)

        # Sample observations
        Y = self.like.sample(A, rng)

        # Compute statistics
        mean = Y.mean(axis=1)
        lower = np.percentile(Y, 2.5, axis=1)
        upper = np.percentile(Y, 97.5, axis=1)

        return mean, (lower, upper)

# Factory

In [14]:
def make_power_ep(
    family,
    **kwargs
):
    """
    Create Power EP model with specified likelihood family.

    Args:
        family: Likelihood family name
        **kwargs: Additional parameters for likelihood and EP

    Returns:
        Configured PowerExpectationPropagation instance
    """
    fam = family.lower()
    if fam == "gaussian":
        like = GaussianLikelihood(kwargs.pop("obs_var", 1.0))
    elif fam == "poisson":
        like = PoissonLikelihood()
    elif fam == "logistic":
        like = LogisticLikelihood()
    elif fam == "studentt":
        like = StudentTLikelihood(
            kwargs.pop("df", 4.0),
            kwargs.pop("scale", 1.0)
        )
    elif fam == "gamma":
        like = GammaLikelihood(kwargs.pop("shape", 2.0))
    else:
        raise ValueError(f"Unknown family: {family}")

    return PowerExpectationPropagation(likelihood=like, **kwargs)

# Data Generators

In [15]:
def train_test_split(
    n,
    frac=0.7,
    seed=None
):
    """
    Split data indices into train and test sets.

    Args:
        n: Total number of samples
        frac: Fraction for training set
        seed: Random seed

    Returns:
        Tuple of (train_indices, test_indices)
    """
    rng = np.random.RandomState(seed)
    idx = rng.permutation(n)
    cut = int(frac * n)
    return idx[:cut], idx[cut:]

In [16]:
def simulate_logistic(
    n,
    d,
    w_scale=1.0,
    seed=None
):
    """
    Simulate logistic regression data.

    Args:
        n: Number of samples
        d: Number of features
        w_scale: Scale of true weights
        seed: Random seed

    Returns:
        Tuple of (X, y) where X is features and y is binary labels
    """
    rng = np.random.RandomState(seed)
    X = rng.randn(n, d)
    w = rng.randn(d) * w_scale
    y = (rng.rand(n) < expit(X.dot(w))).astype(int)
    return X, y

In [17]:
def simulate_poisson(
    n,
    d,
    w_scale=0.5,
    seed=None
):
    """
    Simulate Poisson regression data.

    Args:
        n: Number of samples
        d: Number of features
        w_scale: Scale of true weights
        seed: Random seed

    Returns:
        Tuple of (X, y) where X is features and y is count data
    """
    rng = np.random.RandomState(seed)
    X = rng.randn(n, d)
    w = rng.randn(d) * w_scale
    lam = np.exp(X.dot(w))
    y = rng.poisson(lam)
    return X, y

In [18]:
def simulate_student_t(
    n,
    d,
    df=4,
    w_scale=1.0,
    seed=None
):
    """
    Simulate Student-t regression data.

    Args:
        n: Number of samples
        d: Number of features
        df: Degrees of freedom for t-distribution
        w_scale: Scale of true weights
        seed: Random seed

    Returns:
        Tuple of (X, y) where X is features and y is continuous data
    """
    rng = np.random.RandomState(seed)
    X = rng.randn(n, d)
    w = rng.randn(d) * w_scale
    loc = X.dot(w)
    y = loc + rng.standard_t(df, size=n)
    return X, y

In [19]:
def simulate_gaussian(
    n,
    d,
    noise_sd=0.5,
    w_scale=1.0,
    seed=None
):
    """
    Simulate Gaussian regression data.

    Args:
        n: Number of samples
        d: Number of features
        noise_sd: Standard deviation of observation noise
        w_scale: Scale of true weights
        seed: Random seed

    Returns:
        Tuple of (X, y) where X is features and y is continuous data
    """
    rng = np.random.RandomState(seed)
    X = rng.randn(n, d)
    w = rng.randn(d) * w_scale
    y = X.dot(w) + rng.randn(n) * noise_sd
    return X, y

In [20]:
def simulate_gamma(
    n,
    d,
    shape=2.0,
    w_scale=0.5,
    seed=None
):
    """
    Generate Gamma distributed data with log link.

    Args:
        n: Number of samples
        d: Number of features
        shape: Shape parameter of Gamma distribution
        w_scale: Scale of true weights
        seed: Random seed

    Returns:
        Tuple of (X, y) where X is features and y is positive continuous data
    """
    rng = np.random.RandomState(seed)
    X = rng.randn(n, d)
    w = rng.randn(d) * w_scale
    # Log link: E[Y|X] = exp(X'w)
    scale = np.exp(X.dot(w))
    y = rng.gamma(shape, scale, size=n)
    return X, y

# Demonstration

In [21]:
def main():
    """Run demonstration of Power Expectation Propagation on various datasets."""
    np.random.seed(42)
    n, d = 500, 10

    print("=== Running Double-Loop Power EP Demo ===\n")

    # 1. Logistic Regression
    print("1. Logistic Regression")
    X, y = simulate_logistic(n, d, seed=1)
    i_tr, i_te = train_test_split(n, 0.7, seed=2)

    m_log = make_power_ep(
        "logistic",
        prior_var=10.0,
        power_fraction=0.7,
        damping=0.5,
        max_iter=100,
        outer_max_iter=5,
        tol=1e-6,
        gh_points=20,
        verbose=True
    )

    m_log.fit(X[i_tr], y[i_tr])
    p_te = m_log.predict(X[i_te])
    acc_log = accuracy_score(y[i_te], (p_te >= 0.5).astype(int))
    print(f"Test Accuracy: {acc_log:.3f}\n")

    # 2. Poisson Regression
    print("2. Poisson Regression")
    X, y = simulate_poisson(n, d, seed=3)
    i_tr, i_te = train_test_split(n, 0.7, seed=4)

    m_poi = make_power_ep(
        "poisson",
        prior_var=1.0,
        power_fraction=0.6,
        damping=0.7,
        max_iter=100,
        outer_max_iter=5,
        tol=1e-6,
        gh_points=20
    )

    m_poi.fit(X[i_tr], y[i_tr])
    mean_poi, (l_poi, h_poi) = m_poi.sample_predictive(
        X[i_te], n_samples=2000, seed=5
    )
    rmse_poi = np.sqrt(mean_squared_error(y[i_te], mean_poi))
    cov_poi = np.mean((y[i_te] >= l_poi) & (y[i_te] <= h_poi))
    print(f"Test RMSE: {rmse_poi:.3f}, Coverage: {cov_poi:.3f}\n")

    # 3. Student-t Regression
    print("3. Student-t Regression")
    X, y = simulate_student_t(n, d, seed=6)
    i_tr, i_te = train_test_split(n, 0.7, seed=7)

    m_stu = make_power_ep(
        "studentt",
        df=4,
        scale=1.0,
        prior_var=1.0,
        power_fraction=0.8,
        damping=0.7,
        max_iter=100,
        outer_max_iter=5,
        tol=1e-6,
        gh_points=20
    )

    m_stu.fit(X[i_tr], y[i_tr])
    mean_stu, (l_stu, h_stu) = m_stu.sample_predictive(
        X[i_te], n_samples=2000, seed=8
    )
    rmse_stu = np.sqrt(mean_squared_error(y[i_te], mean_stu))
    cov_stu = np.mean((y[i_te] >= l_stu) & (y[i_te] <= h_stu))
    print(f"Test RMSE: {rmse_stu:.3f}, Coverage: {cov_stu:.3f}\n")

    # 4. Gaussian Regression
    print("4. Gaussian Regression")
    X, y = simulate_gaussian(n, d, seed=9)
    i_tr, i_te = train_test_split(n, 0.7, seed=10)

    m_gau = make_power_ep(
        "gaussian",
        obs_var=0.25,
        prior_var=1.0,
        power_fraction=0.9,
        damping=0.7,
        max_iter=100,
        outer_max_iter=5,
        tol=1e-6,
        gh_points=20
    )

    m_gau.fit(X[i_tr], y[i_tr])
    mean_gau, (l_gau, h_gau) = m_gau.sample_predictive(
        X[i_te], n_samples=2000, seed=11
    )
    rmse_gau = np.sqrt(mean_squared_error(y[i_te], mean_gau))
    cov_gau = np.mean((y[i_te] >= l_gau) & (y[i_te] <= h_gau))
    print(f"Test RMSE: {rmse_gau:.3f}, Coverage: {cov_gau:.3f}\n")

    # 5. Gamma Regression
    print("5. Gamma Regression")
    X, y = simulate_gamma(n, d, shape=2.0, seed=12)
    i_tr, i_te = train_test_split(n, 0.7, seed=13)

    m_gam = make_power_ep(
        "gamma",
        shape=2.0,
        prior_var=1.0,
        power_fraction=0.5,
        damping=0.5,
        max_iter=200,
        outer_max_iter=10,
        tol=1e-6,
        gh_points=20,
        verbose=True
    )

    m_gam.fit(X[i_tr], y[i_tr])
    mean_gam, (l_gam, h_gam) = m_gam.sample_predictive(
        X[i_te], n_samples=2000, seed=14
    )
    rmse_gam = np.sqrt(mean_squared_error(y[i_te], mean_gam))
    cov_gam = np.mean((y[i_te] >= l_gam) & (y[i_te] <= h_gam))
    print(f"Test RMSE: {rmse_gam:.3f}, Coverage: {cov_gam:.3f}\n")

    # Summary Report
    print("\n=== EP Demo Summary Results ===")
    print(f"Logistic   Accuracy: {acc_log:.3f}")
    print(f"Poisson    RMSE: {rmse_poi:.3f}, Coverage: {cov_poi:.3f}")
    print(f"Student-t  RMSE: {rmse_stu:.3f}, Coverage: {cov_stu:.3f}")
    print(f"Gaussian   RMSE: {rmse_gau:.3f}, Coverage: {cov_gau:.3f}")
    print(f"Gamma      RMSE: {rmse_gam:.3f}, Coverage: {cov_gam:.3f}")

In [22]:
if __name__ == "__main__":
    main()

=== Running Double-Loop Power EP Demo ===

1. Logistic Regression
[EP] Outer iter 1: Bound increased from -inf to -136.016
Test Accuracy: 0.820

2. Poisson Regression
Test RMSE: 5.930, Coverage: 0.967

3. Student-t Regression
Test RMSE: 1.414, Coverage: 0.947

4. Gaussian Regression
Test RMSE: 0.495, Coverage: 0.947

5. Gamma Regression
[EP] Outer iter 1: Bound increased from -inf to -1279.743
[EP] Outer iter 2: Bound increased from -1279.743 to -1278.861
Test RMSE: 4.786, Coverage: 0.900


=== EP Demo Summary Results ===
Logistic   Accuracy: 0.820
Poisson    RMSE: 5.930, Coverage: 0.967
Student-t  RMSE: 1.414, Coverage: 0.947
Gaussian   RMSE: 0.495, Coverage: 0.947
Gamma      RMSE: 4.786, Coverage: 0.900
