In [1]:
import numpy as np
from scipy.special import expit
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import mean_squared_error
from models import GibbsSamplerLLFM


In [10]:
def generate_synthetic(T=150, S=3, K_true=2, seed=0):

    np.random.seed(seed)

    # ----- True latent features -----
    Z_true = np.zeros((T, K_true))
    Z_true[:75, 0] = 1
    Z_true[50:, 1] = 1

    # ----- True weights -----
    W_true = np.zeros((K_true, S))
    W_true[0, 0] = 3.0
    W_true[1, 1] = 3.0
    W_true[:, 2] = 2.5

    # ----- True bias -----
    b_true = np.array([-2.5, -2.5, -3.0])

    # ----- Generate observations -----
    logits = Z_true @ W_true + b_true
    P_true = expit(logits)
    Y = np.random.binomial(1, P_true)

    return Y, Z_true, W_true, b_true, P_true

Y, Z_true, W_true, b_true, P_true = generate_synthetic()
print("Generated synthetic data Y with shape:", Y.shape)


Generated synthetic data Y with shape: (150, 3)


In [None]:
# ---- Generate data ----


# ---- Instantiate your sampler ----
sampler = GibbsSamplerLLFM(
    Data=Y,
    K=10,              
    alpha=1.0,
    sigma_w=1.0,
    sigma_b=0.3,
    mu_b=-5.0,
    n_iter=2000,
    burn=400
)

# ---- Run MCMC ----
sampler.run()






Average number of active features across iterations: 3.86


In [14]:
# ---- Posterior means ----
W_post, b_post, Z_post = sampler.get_posterior_samples()

feature_counts = np.array([Z.sum(axis=0) for Z in Z_post])
print(feature_counts[-5:])
print(b_post[-5:])

[[ 0  0  0  0  0 16  0 10  0 10]
 [ 0  1  0  0  0 13  0 11  0  6]
 [ 0  0  0  0  0 11  0 22  0 20]
 [ 0  0  0  0  0  4  0 16  0 32]
 [ 0  0  0  0  0  9  0 11  0 42]]
[array([-0.7686777 , -0.55181537, -0.64589489]), array([-0.87675101, -1.04754566, -0.43625941]), array([-0.85226744, -1.0358215 , -0.51523909]), array([-1.1912159 , -0.43000262, -0.61593307]), array([-1.18494609, -0.8046824 , -0.81010076])]


In [13]:
np.mean(feature_counts)

np.float64(4.90925)