In [6]:
import numpy as np
SEED = 45
np.random.seed(SEED)
from scipy.special import expit
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import mean_squared_error
from models import GibbsSamplerLLFM
from evals import latent_features_to_file, latent_features


In [7]:
def generate_synthetic(T=150, S=4, K_true=2):

    

    # ----- True latent features -----
    Z_true = np.zeros((T, K_true))
    Z_true[:20, 0] = 1
    Z_true[110:, 1] = 1
    Z_true[70:100, :]= 1

    # ----- True weights -----
    W_true = np.zeros((K_true, S))
    W_true[0,1] = 6
    W_true[0,2] = 6
    W_true[1,0] = 6
    W_true[1,3] = 6


    # ----- True bias -----
    b_true = np.array([-3, -3, -3,-3])

    # ----- Generate observations -----
    logits = Z_true @ W_true + b_true
    P_true = expit(logits)
    Y = np.random.binomial(1, P_true)

    return Y, Z_true, W_true, b_true, P_true

Y, Z_true, W_true, b_true, P_true = generate_synthetic()
print("Generated synthetic data Y:", Y)
print("Probabilities P_true:", P_true)


Generated synthetic data Y: [[1 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [1 1 1 0]
 [1 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [1 1 1 0]
 [0 1 1 0]
 [0 1 1 0]
 [0 1 1 1]
 [0 1 1 0]
 [0 1 1 0]
 [0 0 0 0]
 [1 0 0 0]
 [0 0 0 0]
 [0 0 0 1]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 1 0]
 [0 0 1 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 1 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 1 0 0]
 [0 0 0 0]
 [0 0 1 0]
 [0 0 0 0]
 [1 0 0 0]
 [0 1 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 1 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 1 0 0]
 [0 0 1 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 1 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 1 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 0 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 0 0]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 

In [8]:
# ---- Instantiate your sampler ----
sampler = GibbsSamplerLLFM(
    Data=Y,
    K=10,              
    alpha=0.5,
    sigma_w=2.0,
    sigma_b=0.5,
    mu_b=-5.0,
    n_iter=5000,
    burn=1000,
    n_subsample=1500
)

# ---- Run MCMC ----
sampler.run()
sampler.get_posterior_samples()


(array([[[ 3.44285032e+00,  7.17864721e-01, -7.95871551e-01,
           1.12092720e+00],
         [-2.25891693e+00, -1.36314085e+00, -9.01307807e-01,
          -2.25387685e+00],
         [ 1.69705156e+00,  1.22564241e+00,  7.27661961e-02,
           1.51149965e+00],
         ...,
         [-9.97707849e-01,  3.44078064e+00,  4.44974377e+00,
          -2.09974277e+00],
         [ 2.96978588e+00, -7.38251494e+00, -7.22226058e+00,
           1.75785488e+00],
         [-6.25644685e+00, -5.11634415e+00, -5.74166796e+00,
          -6.52472916e+00]],
 
        [[ 3.23043444e-01, -5.92218058e+00, -8.09523910e+00,
           4.14595113e-01],
         [-4.87047037e-01,  5.68651232e-01,  1.12210517e+00,
          -1.32714144e+00],
         [-4.70535889e-01,  6.24639573e-01,  1.27622821e+00,
           1.05824620e+00],
         ...,
         [-5.61764723e+00, -4.51120193e+00, -4.92769384e+00,
          -5.17373927e+00],
         [-1.66139971e+00,  4.87801619e-01,  5.43910158e-01,
          -2.06626

In [9]:
latent_features(Z_post=sampler.good_samples_Z, W_post=sampler.good_samples_W, b_post=sampler.good_samples_b)
p1given0 = sampler.posterior_predictive([1, 0, 0])
print("P(pred=1 | conds=[1,0,0]):", p1given0)

Posterior grouping by number of active features

Number of samples with zero active features: 0

Group with 2 active features:
  Number of posterior samples: 46
  Average weights:
[[ 0.81165531 -0.1576162  -0.21963441  0.49695991]
 [-5.5326862  -4.1828     -3.71201382 -5.33787681]]
  Average bias:
[-0.60064366 -0.51090646 -0.59948747 -0.63428669]
----------------------------------------
Group with 3 active features:
  Number of posterior samples: 480
  Average weights:
[[ 0.6118647   0.46079972  0.35827239  0.33424525]
 [-0.24820565 -4.80342427 -4.88409835 -0.25230026]
 [-4.87600046 -5.22379279 -5.18406241 -4.76449024]]
  Average bias:
[-0.63151901 -0.66412107 -0.68431691 -0.65296972]
----------------------------------------
Group with 4 active features:
  Number of posterior samples: 637
  Average weights:
[[ 0.77426986  0.63981866  0.46322969  0.47803874]
 [-0.14744471 -2.57404301 -2.518303   -0.1609573 ]
 [-1.86913193 -4.77707572 -4.71864336 -1.73790577]
 [-3.74432881 -3.44813138 -3