In [None]:
import numpy as np
from scipy.stats import norm
from scipy.optimize import minimize


class EMAlgorithmPointMassTruncated:
    def __init__(self, x, sigma_0, tau_squared, pi, max_iter=100, tol=1e-6):
        """
        EM algorithm for a mixture of a point mass at 0 and a Gaussian component.

        Parameters:
        - x: Observed data points (1D array)
        - sigma_0: Fixed noise standard deviation (scalar)
        - tau_squared: Initial value for the variance of the Gaussian component
        - pi: Initial value for the mixing proportion \(\pi\)
        - max_iter: Maximum number of iterations
        - tol: Tolerance for convergence
        """
        self.x = x
        self.sigma_0 = sigma_0
        self.tau_squared = tau_squared
        self.pi = pi
        self.max_iter = max_iter
        self.tol = tol
        self.n = len(x)
        self.mu = np.mean(x)  # Initialize \(\mu\)

    def e_step(self):
        """
        E-step: Compute posterior probabilities \(q_i\) for each data point.
        """
        sigma_2_total = self.sigma_0**2 + 1/self.tau_squared
        phi_0 = (self.x == 0).astype(float)  # Point mass at 0
        phi_2 = norm.pdf(self.x, self.mu, np.sqrt(sigma_2_total))
        numerator = self.pi * phi_2
        denominator = (1 - self.pi) * phi_0 + self.pi * phi_2
        q = numerator / (denominator + 1e-8)  # Avoid division by zero
        return q

    def m_step(self, q):
        """
        M-step: Update parameters \(\pi\), \(\mu\), and \(\tau^2\).
        """
        # Update \(\pi\)
        self.pi = np.mean(q)

        # Update \(\mu\) (Equation 23 in the document)
        def objective(mu):
            sigma_2_total = self.sigma_0**2 + 1/self.tau_squared
            return -np.sum(q * norm.logpdf(self.x, mu, np.sqrt(sigma_2_total)))

        res = minimize(objective, self.mu, method="L-BFGS-B")
        self.mu = res.x[0]

        # Update \(\tau^2\) (Equation 25 in the document)
        self.tau_squared = np.sum(q * (self.x - self.mu) ** 2) / np.sum(q)
        self.tau_squared = max(self.tau_squared, 1e-8)  # Ensure non-negative variance

    def run(self):
        """
        Run the EM algorithm until convergence.

        Returns:
        - pi: Estimated mixing proportion
        - mu: Estimated mean of the Gaussian component
        - tau_squared: Estimated variance of the Gaussian component
        """
        for iteration in range(self.max_iter):
            # E-step
            q = self.e_step()

            # Save old parameters to check for convergence
            old_params = np.array([self.pi, self.mu, self.tau_squared])

            # M-step
            self.m_step(q)

            # Check convergence
            new_params = np.array([self.pi, self.mu, self.tau_squared])
            if np.linalg.norm(new_params - old_params) < self.tol:
                print(f"Converged in {iteration + 1} iterations.")
                break
        else:
            print("Maximum iterations reached without convergence.")

        return self.pi, self.mu, self.tau_squared


# Example Usage
if __name__ == "__main__":
    # Generate synthetic data
 
    point_mass = np.zeros(100)
    gaussian_component = np.random.normal(5, 2, 150)
    x = np.concatenate([point_mass, gaussian_component])

    # EM algorithm parameters
    sigma_0 = 1.0  # Fixed standard deviation for the point mass
    tau_squared = 2.0  # Initial guess for variance of the Gaussian component
    pi = 0.5  # Initial guess for mixing proportion

    # Run the EM algorithm
    em = EMAlgorithmPointMassTruncated(x, sigma_0, tau_squared, pi)
    pi, mu, tau_squared = em.run()

    # Print results
    print(f"Estimated \u03c0: {pi}")
    print(f"Estimated \u03bc: {mu}")
    print(f"Estimated \u03c4^2: {tau_squared}")


Converged in 11 iterations.
Estimated π: 0.6153223121848359
Estimated μ: 4.603705295377346
Estimated τ^2: 4.520206597144137


In [2]:
np.random.seed(42)
point_mass = np.zeros(100)
gaussian_component = np.random.normal(5, 2, 150)
x = np.concatenate([point_mass, gaussian_component])

    # EM algorithm parameters
sigma_0 = 1.0  # Fixed standard deviation for the point mass
tau_squared = 2.0  # Initial guess for variance of the Gaussian component
pi = 0.5  # Initial guess for mixing proportion

    # Run the EM algorithm
em = EMAlgorithmPointMassTruncated(x, sigma_0, tau_squared, pi)
pi, mu, tau_squared = em.run()

    # Print results
print(f"Estimated \u03c0: {pi}")
print(f"Estimated \u03bc: {mu}")
print(f"Estimated \u03c4^2: {tau_squared}")

Converged in 11 iterations.
Estimated π: 0.6109283479236458
Estimated μ: 4.748850083508237
Estimated τ^2: 3.875994541791946


In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from scipy.stats import norm
from scipy.optimize import minimize


class EMAlgorithmWithCovariates:
    def __init__(self, x, z, sigma_0, tau_squared, max_iter=100, tol=1e-6, hidden_dim=16, lr=0.01):
        """
        EM algorithm for a mixture of a point mass at 0 and a Gaussian component with covariate-dependent mixing proportion.

        Parameters:
        - x: Observed data points (1D array)
        - z: Covariate (1D array)
        - sigma_0: Fixed noise standard deviation for the point mass
        - tau_squared: Initial value for the variance of the Gaussian component
        - max_iter: Maximum number of iterations
        - tol: Tolerance for convergence
        - hidden_dim: Hidden dimension size for the neural network
        - lr: Learning rate for the neural network optimizer
        """
        self.x = x
        self.z = torch.tensor(z, dtype=torch.float32).unsqueeze(1)  # Covariate
        self.sigma_0 = sigma_0
        self.tau_squared = tau_squared
        self.max_iter = max_iter
        self.tol = tol
        self.n = len(x)
        self.mu = np.mean(x)  # Initialize \(\mu\)

        # Neural network to model \(\pi(z)\)
        self.pi_net = nn.Sequential(
            nn.Linear(1, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid(),  # Ensure output is in (0, 1)
        )
        self.optimizer = optim.Adam(self.pi_net.parameters(), lr=lr)

    def e_step(self):
        """
        E-step: Compute posterior probabilities \(q_i\) for each data point.
        """
        # Predict \(\pi(z)\) using the neural network
        with torch.no_grad():
            pi_z = self.pi_net(self.z).squeeze().numpy()

        sigma_2_total = self.sigma_0**2 + self.tau_squared
        phi_0 = (self.x == 0).astype(float)  # Point mass at 0
        phi_2 = norm.pdf(self.x, self.mu, np.sqrt(sigma_2_total))
        numerator = pi_z * phi_2
        denominator = (1 - pi_z) * phi_0 + pi_z * phi_2
        q = numerator / (denominator + 1e-8)  # Avoid division by zero
        return q, pi_z

    def m_step(self, q):
        """
        M-step: Update parameters \(\mu\), and \(\tau^2\).
        """
        # Update \(\mu\) (optimize Equation 23)
        def objective(mu):
            sigma_2_total = self.sigma_0**2 + self.tau_squared
            return -np.sum(q * norm.logpdf(self.x, mu, np.sqrt(sigma_2_total)))

        res = minimize(objective, self.mu, method="L-BFGS-B")
        self.mu = res.x[0]

        # Update \(\tau^2\) (Equation 25)
        self.tau_squared = np.sum(q * (self.x - self.mu) ** 2) / np.sum(q)
        self.tau_squared = max(self.tau_squared, 1e-8)  # Ensure non-negative variance

    def update_pi_net(self, q):
        """
        Update the neural network \(\pi(z)\) to predict mixture proportions using \(q_i\).
        """
        self.pi_net.train()
        q_tensor = torch.tensor(q, dtype=torch.float32)
        for _ in range(100):  # Fixed number of optimization steps
            self.optimizer.zero_grad()
            pi_z = self.pi_net(self.z).squeeze()
            loss = nn.BCELoss()(pi_z, q_tensor)  # Binary cross-entropy loss
            loss.backward()
            self.optimizer.step()

    def run(self):
        """
        Run the EM algorithm until convergence.

        Returns:
        - mu: Estimated mean of the Gaussian component
        - tau_squared: Estimated variance of the Gaussian component
        - pi_net: Trained neural network for \(\pi(z)\)
        """
        for iteration in range(self.max_iter):
            # E-step
            q, pi_z = self.e_step()

            # Save old parameters to check for convergence
            old_params = np.array([self.mu, self.tau_squared])

            # M-step
            self.m_step(q)
            self.update_pi_net(q)  # Update the neural network for \(\pi(z)\)

            # Check convergence
            new_params = np.array([self.mu, self.tau_squared])
            if np.linalg.norm(new_params - old_params) < self.tol:
                print(f"Converged in {iteration + 1} iterations.")
                break
        else:
            print("Maximum iterations reached without convergence.")

        return self.mu, self.tau_squared, self.pi_net


# Example Usage
if __name__ == "__main__":
    # Generate synthetic data
    np.random.seed(42)
    point_mass = np.zeros(100)
    gaussian_component = np.random.normal(5, 2, 150)
    x = np.concatenate([point_mass, gaussian_component])
    z = np.random.uniform(-1, 1, len(x))  # Covariate

    # EM algorithm parameters
    sigma_0 = 1.0  # Fixed standard deviation for the point mass
    tau_squared = 2.0  # Initial guess for variance of the Gaussian component

    # Run the EM algorithm
    em = EMAlgorithmWithCovariates(x, z, sigma_0, tau_squared)
    mu, tau_squared, pi_net = em.run()

    # Print results
    print(f"Estimated \u03bc: {mu}")
    print(f"Estimated \u03c4^2: {tau_squared}")


Maximum iterations reached without convergence.
Estimated μ: 4.741789097525298
Estimated τ^2: 3.9037142696758833
