In [1]:
############
# Packages #
############
import time as t

import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import scipy as sp

###################
# hyperparametres #
###################

# for reproductibility
seed = 0

# data dimensions
T=200
k=100
l =0

#Z : (k,1)
#Y : (T,1)
#U : (T,l)
#X : (T,k)

# data generations

# for X:
rho=0.75

# for Beta : number of non null
s=5 #in [5,10,100]
lst_s = [5,10,100]

# for sigma2 :ratio between explained and total variance 
Ry=0.02 #in [0.02, 0.25, 0.5]
lst_Ry = [0.02, 0.25, 0.5]

# for q prior
a=1
b=1

# for R2 prior
A=1
B=1

### Initialize parameters

Firstly, let's comput the initialization of the parameters

In [2]:
def compute_X(T, k, rho):
    """Compute matrix of xt observations

    Args:
        T (int): number of observations
        k (int): number of predictors
        rho (float): Toeplitz correlation parameter

    Returns:
        np.array: dimensions T*k
    """
    cov_matrix=np.zeros((k, k))
    for i in range(k):
        for j in range(k):
            cov_matrix[i,j]=rho**np.abs(i-j)
    return np.random.multivariate_normal([0]*k, cov_matrix, T)

def compute_U(T, l):
    """Compute matrix of xt observations

    Args:
        T (int): number of observations
        l (int): number of predictors

    Returns:
        np.array: dimensions T*l
    """
    if l==0:
        return 0
    else:
        pass
        
def compute_vx(X):
    return np.mean(np.var(X,axis=0))

def sample_beta(k, s):
    """Sample of beta vector of dimensions 1*k

    Args:
        k (int): number of predictors
        s (int): number of non-zero elements of beta

    Returns:
        np.array: dimensions 1*k
    """
    beta=np.zeros(k)
    index_normal_distribution=np.random.choice(len(beta), size=s, replace=False)
    beta[index_normal_distribution] = np.random.normal(loc=0, scale=1, size=s)
    return beta

def sample_phi(l):
    if l==0:
        return 0
    else:
        return np.random.uniform(0,1, size=l)

def comput_Z(beta):
    """Compute z_1,...,z_k

    Args:
        beta (np.array): random vector beta

    Returns:
        np.array: dimensions1*k
    """
    Z=beta
    Z[Z!=0]=1
    return Z

def compute_sigma2(Ry, beta, X):
    """ Compute sigma2
    Args:
        Ry (float): pourcentage of explained variance
        beta (np.array): beta previously sampled
        X (np.array): matrix of (xt) samples

    Returns:
        float: dimensions 1*1
    """
    return (1/Ry-1)*np.mean(np.square(X @ beta))

def compute_R2(q, k, gamma2, v_x):
    return (q*k*gamma2*v_x)/(q*k*gamma2*v_x+1)

def sample_epsilon(T, sigma2):
    """Sample epsilon_1,...,epsilon_T

    Args:
        T (int): number of observations
        sigma2 (float): sigma2 previously sampled

    Returns:
        np.array: dimensions 1*T
    """
    return np.random.normal(loc=0, scale=sigma2, size=T)

def compute_Y(X, beta, epsilon):
    return X@beta + epsilon


### Final function
def init_parameters(seed, T, k, l, rho, s, Ry, a, b, A, B):
    """
    Initialize parameters for a given simulation.

    Args:
        seed (int): Seed for reproducibility.
        T (int): Number of observations.
        k (int): Number of covariates.
        l (int): Number of latent variables.
        rho (float): Correlation parameter.
        s (float): Scaling parameter.
        Ry (float): Response variance.
        a (float): Shape parameter for gamma2.
        b (float): Shape parameter for gamma2.
        A (float): Shape parameter for q.
        B (float): Shape parameter for q.

    Returns:
        dict: Dictionary containing initialized parameters.
    """
    np.random.seed(seed=seed)
    dct = {
        "X" : compute_X(T=T, k=k, rho=rho),
        "U": compute_U(T=T, l=l),
        "beta": sample_beta(k=k, s=s),
        "phi": sample_phi(l=l),
        "q": np.random.beta(A,B),
        "gamma2": np.random.beta(a,b),
    }
    dct["vx"] = compute_vx(X=dct["X"])
    dct["R2"] = compute_R2(q=dct["q"], k=k, gamma2=dct["gamma2"], v_x=dct["vx"])
    dct["Z"]=comput_Z(beta=dct["beta"])
    dct["sigma2"] = compute_sigma2(Ry=Ry, beta=dct["beta"], X=dct["X"])
    dct["epsilon"] = sample_epsilon(T=T, sigma2=dct["sigma2"])
    dct["Y"]=compute_Y(X=dct["X"], beta=dct["beta"], epsilon=dct["epsilon"])
    return dct

In [3]:
dct = init_parameters(seed,T,k,l,rho,s,Ry,a,b,A,B)

# for scalar in ["q", "gamma2", "vx", "R2", "sigma2"]:
#     value = dct[scalar]
#     print(f"{scalar}={value}\n")

# for mat in ["beta", "phi", "Y", "epsilon", "X", "U"]:
#     value = dct[mat]
#     fig = px.histogram(value, histnorm='probability density', title = f"<b>Histogram of {mat}</b> shape : {value.shape} ", template="plotly_dark")
#     fig.show()


## Samples simple discrete variables

In [4]:
def sample_discrete(seed, values, weights, n_points):
    probs = weights/weights.sum()
    return sp.stats.rv_discrete(seed = seed, values = (values, probs)).rvs(size=n_points)

#print(sample_discrete(seed, np.arange(3), np.arange(1,4) , 100))

def sample_discrete_ndim(seed, values, weights, n_points):
    nval = np.multiply(values.shape)
    return sample_discrete(seed, values.reshape(nval), weights.reshape(nval), n_points)

In [5]:
test_multy_sample = sample_discrete(seed, np.arange(27).reshape((3,3,3)), np.arange(1,28).reshape((3,3,3)) , 10000)
print(test_multy_sample)
# fig = px.histogram(test_multy_sample, histnorm='probability density', title = f"<b>Histogram from a linear 3D density</b>", template="plotly_dark")
# fig.show()

[19 22 20 ... 23  3 24]


## 1. Draw from the conditional posterior of (R2, q)

In [6]:
def density_unormalized_R2_q_by_Y_U_X_theta_z(R2, q, dct, k, a, b, A, B):
    vx = dct["vx"]
    s_z = np.sum(dct["Z"])
    sigma2 = dct["sigma2"]
    Beta = dct["beta"]
    exponent = - np.prod([
        1/(1e-6 + 2*sigma2),
        (k*vx*q*(1-R2))/(1e-6 + R2),
         np.dot(Beta, np.dot(np.diag(dct["Z"]), Beta))
        ])       
    return np.prod([
        np.exp(exponent),
        q**(s_z+s_z/2+a-1),
        (1-q)**(k-s_z+b-1),
        R2**(A-1-s_z/2),
        (1-R2)**(s_z/2+B-1)
    ])

def sample_R2_q_by_Y_U_X_theta_z(seed, n_points, dct, k, a, b, A, B):
    
    arr0 = np.arange(0.001,0.101,0.001) # commence pas à 0 car division par 0 sinon
    arr1 = np.arange(0.11,0.91,0.01)
    arr2 = np.arange(0.901,1.001,0.001)
    discretization = np.concatenate((arr0, arr1, arr2), axis=0)
    
    values = np.dstack(np.meshgrid(discretization, discretization)).reshape(-1, 2)
    def density(R2_q):
        R2 = R2_q[0]
        q = R2_q[1]
        return density_unormalized_R2_q_by_Y_U_X_theta_z(R2, q, dct, k, a, b, A, B)
    
    weights = np.apply_along_axis(density, 1, values) 
    index = np.arange(len(weights))
    sample_mask = sample_discrete(seed, index, weights, n_points)
    return values[sample_mask]

In [19]:
posterior_R2_q=sample_R2_q_by_Y_U_X_theta_z(seed=seed, n_points=2, dct=dct, k=k, a=a, b=b, A=A, B=B)
print(posterior_R2_q)

[[0.028 0.063]
 [0.018 0.073]]


In [8]:
Z=dct['Z']
X=dct['X']
U=dct['U']
Y=dct['Y']
q=dct["q"]
gamma2=dct["gamma2"]
beta=dct["beta"]
phi=dct["phi"]
sigma2=dct["sigma2"]

## 2. Sample from the conditional posterior of $\phi$

In [9]:
def sample_phi_posterior(U, Y, X, beta, sigma2, l, n_variables, seed):
    """Sample 1*l random vectors ϕ|Y, U, X, z, β, R^2, q, sigma^2

    Args:
        U (np.array): T*l matrix of predictors
        Y (np.array): T*1 vector of target
        X (np.array): T*k matrix of predictors
        beta (np.array): 1*k vector beta
        sigma2 (float): sigma^2
        n_variables (int): number of variables
        seed (int): seed

    Returns:
        np.array: n_variables of 1*l random vectors
    """
    np.random.seed(seed)
    if l==0:
        return sample_phi(l=l)
    else:
        return np.random.multivariate_normal(np.linalg.inv(U.T@U)@U.T@(Y-X@beta), sigma2*np.linalg.inv(U.T@U), n_variables)

In [10]:
phi_posterior=sample_phi_posterior(U=U, Y=Y, X=X, beta=beta, sigma2=sigma2, l=l, n_variables=2, seed=seed)
print(phi_posterior)

0


## 3. Sample from the conditional posterior of $z$

### Preliminary step: compute some random vectors

In [11]:
def compute_X_tilde(X,beta):
    """Compute T*s(z) \Tilde{X} matrix

    Args:
        X (np.array): T*p matrix of x predictors
        beta (np.array): 1*p vector of beta prior

    Returns:
        np.array: \Tilde{X} matrix
    """
    non_zero_beta=list(np.nonzero(beta)[0])
    X_tilde=X[:, non_zero_beta]
    return X_tilde

def compute_W_tilde(X_tilde, gamma2):
    """Compute s(z)*s(z) \Tilde{W} matrix

    Args:
        X_tilde (np.array): T*s(z) \Tilde{X} matrix
        gamma2 (float): gamma^2 

    Returns:
        np.array: \Tilde{W} matrix
    """
    I_s_z=np.identity(X_tilde.shape[1])
    return X_tilde.T@X_tilde+(1/gamma2)*I_s_z

def compute_Y_tilde(Y, U, phi):
    """Compute 1*T \Tilde{Y} matrix

    Args:
        Y (np.array): 1*T vector of target variables
        U (np.array): T*l matrix of u predictors
        phi (np.array): 1*l phi vector

    Returns:
        np.array: \Tilde{Y}
    """
    if U==0:
        return Y
    else:
        return Y-U@phi

def compute_estimator_beta_tilde(W_tilde, X_tilde, Y_tilde):
    """Compute 1*s(z) \hat{\Tilde{\beta}} vector

    Args:
        W_tilde (np.array): s(z)*s(z) \Tilde{W} matrix
        X_tilde (np.array): T*s(z) \Tilde{X} matrix
        Y_tilde (np.array): 1*T \Tilde{Y} matrix

    Returns:
        np.array: 1*s(z) \hat{\Tilde{\beta}} vector
    """
    return np.linalg.inv(W_tilde)@X_tilde.T@Y_tilde

### Gibbs sampler

In [12]:
def compute_z_posterior(q, k, Z, gamma2, W_tilde, Y_tilde, estimator_beta_tilde, T):
    """Compute \pi(z|Y, U, X, \phi, R^2, q) mass function

    Args:
        q (float): q variable
        k (int): number of x predictors
        Z (np.array): 1*k array of z1,...,zk
        gamma2 (float): gamma^2 variable
        W_tilde (np.array): s(z)*s(z) \Tilde{W} matrix
        Y_tilde (np.array): 1*T \Tilde{Y} matrix
        estimator_beta_tilde (np.array): 1*s(z) \hat{\Tilde{\beta}} vector
        T (int): number of samples

    Returns:
        float: value of \pi(z|Y, U, X, \phi, R^2, q)
    """
    s_z=np.sum(Z)
    return q**s_z*(1-q)**(k-s_z)*(1/gamma2)**(s_z/2)*np.linalg.det(W_tilde)**(-0.5)*0.5**(-T/2)*(Y_tilde.T@Y_tilde-estimator_beta_tilde.T@W_tilde@estimator_beta_tilde)**(-T/2)*sp.special.gamma(T/2)


def compute_zi_posterior_conditional_z_excluded_i(z_i, z_minus_i, i, q, k, gamma2, W_tilde, Y_tilde, estimator_beta_tilde, T):
    """Compute \pi(z_i|Y, X, U, \phi, R^2, q, z_{-i}) for a specific zi

    Args:
        z_i (int): z_i variable
        z_minus_i (np.array): z random vector with z_i excluded
        i (int): index of z_i variable in z random vector
        q (float): q variable
        k (int): number of x predictors
        gamma2 (float): gamma^2 variable
        W_tilde (np.array): s(z)*s(z) \Tilde{W} matrix
        Y_tilde (np.array): 1*T \Tilde{Y} matrix
        estimator_beta_tilde (np.array): 1*s(z) \hat{\Tilde{\beta}} vector
        T (int): number of samples

    Returns:
        float: value of \pi(z_i|Y, X, U, \phi, R^2, q, z_{-i})
    """

    z=z_minus_i
    z=np.insert(z, i, z_i)
    z_posterior=compute_z_posterior(q=q, k=k, Z=z, gamma2=gamma2, W_tilde=W_tilde, Y_tilde=Y_tilde, estimator_beta_tilde=estimator_beta_tilde, T=T)
    z_0=np.copy(z)
    z_0[i]=0
    z_posterior_zi_equal_0=compute_z_posterior(q=q, k=k, Z=z_0, gamma2=gamma2, W_tilde=W_tilde, Y_tilde=Y_tilde, estimator_beta_tilde=estimator_beta_tilde, T=T)
    z_1=np.copy(Z)
    z_1[i]=1
    z_posterior_zi_equal_1=compute_z_posterior(q=q, k=k, Z=z_1, gamma2=gamma2, W_tilde=W_tilde, Y_tilde=Y_tilde, estimator_beta_tilde=estimator_beta_tilde, T=T)

    return z_posterior/(z_posterior_zi_equal_0+z_posterior_zi_equal_1+1e-6)

def simulated_zi_posterior_conditional_z_excluded_i(z_minus_i, i, q, k, gamma2, W_tilde, Y_tilde, estimator_beta_tilde, T):
    """Sample z_i|Y, X, U, \phi, R^2, q z_{-i}, by inverse CDF method

    Args:
        z_minus_i (np.array): z random vector with z_i excluded
        i (int): index of z_i variable in z random vector
        q (float): q variable
        k (int): number of x predictors
        gamma2 (float): gamma^2 variable
        W_tilde (np.array): s(z)*s(z) \Tilde{W} matrix
        Y_tilde (np.array): 1*T \Tilde{Y} matrix
        estimator_beta_tilde (np.array): 1*s(z) \hat{\Tilde{\beta}} vector
        T (int): number of samples

    Returns:
        int: value of z_i|Y, X, U, \phi, R^2, q z_{-i} in {0,1} 
    """
    proba_success=compute_zi_posterior_conditional_z_excluded_i(z_i=1, z_minus_i=z_minus_i, i=i, q=q, k=k, gamma2=gamma2, W_tilde=W_tilde, Y_tilde=Y_tilde, estimator_beta_tilde=estimator_beta_tilde, T=T)
    u=np.random.uniform(0,1)
    if u<=1-proba_success:
        return 0
    else:
        return 1
    
def step_gibbs_sampler_z_posterior(q, k, gamma2, W_tilde, Y_tilde, estimator_beta_tilde, T, z_t_minus_1):
    """One iteration of Gibbs sampler

    Args:
        q (float): q variable
        k (int): number of x predictors
        gamma2 (float): gamma^2 variable
        W_tilde (np.array): s(z)*s(z) \Tilde{W} matrix
        Y_tilde (np.array): 1*T \Tilde{Y} matrix
        estimator_beta_tilde (np.array): 1*s(z) \hat{\Tilde{\beta}} vector
        T (int): number of samples
        z_t_minus_1 (_type_): z_{t-1} variable sampled during the previous iteration (t-1 step)

    Returns:
        np.array: array of variables Z^(t)=(Z^(t)_1,...,Z^(t)_k) sampled at step t
    """
    z_t=np.copy(z_t_minus_1)
    for i in range(len(z_t)):
        z_t_minus_i=np.delete(z_t, i)
        sampled_z_t_i=simulated_zi_posterior_conditional_z_excluded_i(z_minus_i=z_t_minus_i, i=i, q=q, k=k, gamma2=gamma2, W_tilde=W_tilde, Y_tilde=Y_tilde, estimator_beta_tilde=estimator_beta_tilde, T=T)
        z_t=np.insert(z_t_minus_i, i, sampled_z_t_i)
    return z_t

def gibbs_sampler_z_posterior(q, k, gamma2, W_tilde, Y_tilde, estimator_beta_tilde, T, n_iter, n_variables):
    """Gibbs sampler to simulate 1*k vectors z|Y, U, X, \phi, R^2

    Args:
        q (float): q variable
        k (int): number of x predictors
        gamma2 (float): gamma^2 variable
        W_tilde (np.array): s(z)*s(z) \Tilde{W} matrix
        Y_tilde (np.array): 1*T \Tilde{Y} matrix
        estimator_beta_tilde (np.array): 1*s(z) \hat{\Tilde{\beta}} vector
        T (int): number of samples
        n_iter (int): number of iterations
        n_variables (int): number of variables desired

    Returns:
        np.array: array of n_variables z=(z_1,...,z_k)
    """
    array_z=[]
    for n in range(n_variables):
        z_0=np.random.binomial(n=1, p=q, size=k) #Yanis; je ne sais pas trop comment initialiser le premier Z, j'ai repris la loi a priori
        z_t=z_0
        for t in range(n_iter):
            z_t=step_gibbs_sampler_z_posterior(q=q, k=k, gamma2=gamma2, W_tilde=W_tilde, Y_tilde=Y_tilde, estimator_beta_tilde=estimator_beta_tilde, T=T, z_t_minus_1=z_t)
        array_z.append(z_t)
    return np.array(array_z)

In [13]:
X_tilde=compute_X_tilde(X=X,beta=beta)
W_tilde=compute_W_tilde(X_tilde=X_tilde, gamma2=gamma2)
Y_tilde=compute_Y_tilde(Y=Y, U=U, phi=phi)
estimator_beta_tilde=compute_estimator_beta_tilde(W_tilde=W_tilde, X_tilde=X_tilde, Y_tilde=Y_tilde)

In [14]:
sample_posterior_z=gibbs_sampler_z_posterior(q=q,
                                   k=k, 
                                   gamma2=gamma2,
                                   W_tilde=W_tilde,
                                   Y_tilde=Y_tilde,
                                   estimator_beta_tilde=estimator_beta_tilde, 
                                   T=T,
                                   n_variables=1,
                                   n_iter=1000)

print(sample_posterior_z)

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


### 4. Draw from the conditional posterior of $\sigma^2$

In [15]:
def sample_sigma2_posterior(Y_tilde, estimator_beta_tilde, X_tilde, gamma2, T, n_variables, seed):
    """Sample random variables sigma^2|Y, U, X, ϕ, R2, q, z

    Args:
        Y_tilde (np.array): 1*T \Tilde{Y} matrix
        estimator_beta_tilde (np.array): 1*s(z) \hat{\Tilde{\beta}} vector
        X_tilde (np.array): T*s(z) \Tilde{X} matrix
        gamma2 (float): gamma^2 variable
        T (int): number of samples
        n_variables (int): number of variables desired
        seed (int): seed

    Returns:
        _type_: n_variables sigma^2|Y, U, X, ϕ, R2, q, z
    """
    np.random.seed(seed)
    I_s_z=np.identity(X_tilde.shape[1])
    inverse_gamma_dist = sp.stats.invgamma(T/2, scale=0.5*(Y_tilde.T@Y_tilde-estimator_beta_tilde.T@(X_tilde.T@X_tilde+(1/gamma2)*I_s_z)@estimator_beta_tilde))
    return inverse_gamma_dist.rvs(size=n_variables)

In [16]:
array_sigma2_posterior=sample_sigma2_posterior(Y_tilde=Y_tilde,
                                               estimator_beta_tilde=estimator_beta_tilde,
                                               X_tilde=X_tilde,
                                               gamma2=gamma2,
                                               T=T,
                                               n_variables=2,
                                               seed=seed)
print(array_sigma2_posterior)

[81758.34021657 85536.60018476]


### 5. Draw from the conditional posterior of $\tilde{\beta}$

In [17]:
def sample_beta_tilde_posterior(X_tilde, Y, U, phi, sigma2, gamma2, n_variables, seed):
    """Sample 1*s(z) random vectors \tilde{β}|Y, U, X, ϕ, R2, q, sigma^2, z

    Args:
        X_tilde (np.array): T*s(z) \Tilde{X} matrix
        Y (np.array): T*1 vector of target
        U (np.array): T*l matrix of predictors
        phi (np.array): 1*l phi vector
        sigma2 (float): sigma^2
        gamma2 (float): gamma^2
        n_variables (int): number of variables
        seed (int): seed

    Returns:
        _type_: n_variables of 1*l random vectors
    """
    np.random.seed(seed)
    I_s_z=np.identity(X_tilde.shape[1])
    if U==0:
        return np.random.multivariate_normal(np.linalg.inv((1/gamma2)*I_s_z+X_tilde.T@X_tilde)@X_tilde.T@Y,sigma2*np.linalg.inv((1/gamma2)*I_s_z+X_tilde.T@X_tilde), n_variables)
    else:
        return np.random.multivariate_normal(np.linalg.inv((1/gamma2)*I_s_z+X_tilde.T@X_tilde)@X_tilde.T@(Y-U@phi),sigma2*np.linalg.inv((1/gamma2)*I_s_z+X_tilde.T@X_tilde), n_variables)

In [18]:
array_beta_tilde_posterior=sample_beta_tilde_posterior(X_tilde=X_tilde,
                                                       Y=Y,
                                                       U=U,
                                                       phi=phi,
                                                       sigma2=sigma2,
                                                       gamma2=gamma2,
                                                       n_variables=2,
                                                       seed=seed)

print(array_beta_tilde_posterior)

[[ -2.95008323  -9.25414079  -5.69520608  -1.42845618  -5.62275484]
 [ -3.15195418 -11.60255837  -2.3262441   -7.12269525  -8.02888534]]


* $l=0$ dans l'énoncé donc, $U=0$: bizarre comme certaines variances dépendent de U (celle du posterior de $\phi$) et valent donc 0
* Doute sur la génération de $\beta$ et $Z$: en théorie $\beta$, suit un mélange de lois (gaussien + dirac). Or, dans l'énoncé on demande explicitement de mettre $k-s$ élements de $\beta$ à 0 et les autres composantes suivent une normale centrée réduite. De plus, $Z_j$ suit une loi de bernouilli de param_tre $q$. Or, dans ce cas, il n'y a pas de simulation aléatoire de $Z$. On regarde juste les composantes de $\beta$ non-nulles. Je trouve ça étrange.