In [1]:
from src.densite_function import *
from src.gaussian_simulation import *
from src.estimators import *
from src.vraisemblance import *
import numpy as np
from numpy.linalg import norm 
import matplotlib.pylab as plt

In [2]:
np.random.seed(8878)

theta=simulate_gaussian_vector(mu=np.array([0]*20), sigma=np.identity(20))

A_optimal=np.matrix(np.identity(20))*0.5
b_optimal=theta/2

A=A_optimal+simulate_gaussian_vector(mu=np.array([0]*20), sigma=0.01*np.identity(20))
b=b_optimal+simulate_gaussian_vector(mu=np.array([0]*20), sigma=0.01*np.identity(20))

On généré un n-échantillon de taille 100, de loi $\mathcal{N}(\theta, 2I_{20})$

In [7]:
np.random.seed(8878)
n=100
i=1
echantillon_x=np.array([])

while i<=n:
    if i==1:
        echantillon_x=np.append(echantillon_x, simulate_gaussian_vector(mu=theta, sigma=2*np.identity(20)))
    else:
        echantillon_x=np.vstack((echantillon_x, simulate_gaussian_vector(mu=theta, sigma=2*np.identity(20))))
    i+=1

In [59]:
np.random.shuffle(test)
test

array([[1, 2],
       [3, 4]])

In [68]:
np.random.shuffle(echantillon_x)
echantillon_x

array([[-0.06241532, -0.32519154, -0.05114187, ..., -0.48582026,
         0.45269093,  2.0288521 ],
       [ 0.02008314,  0.02573104, -1.49981415, ...,  1.11443018,
        -1.81498786,  3.03911805],
       [ 1.49160643, -1.13517359, -0.39191734, ..., -2.50582719,
        -0.44250758,  1.76074482],
       ...,
       [-0.82810839,  0.34612553,  0.91338373, ...,  0.17315423,
        -0.19413196,  1.38785861],
       [-0.03768587,  0.28401316,  1.52972288, ..., -0.87033858,
         0.45655378,  0.03955608],
       [ 0.72420659, -1.3383938 , -1.75785089, ..., -0.68401601,
        -0.06917863,  2.62969781]])

Soit $l_n(\theta)=\frac{1}{n}\sum_{i=1}^n \lVert X_i - \theta \rVert^2 = \frac{1}{n}\sum_{i=1}^n l_i(\theta) $. 
On cherche $\hat{\theta} \in argmin \; l_n(\theta)$

# SDG

On estime le gradient de la log vraisemblance par $\nabla_{\theta} l_i(\theta)= -2(X_i-\theta)$

In [88]:
def SDG(theta_init, learn_rate, echantillon, n_iter):
    #Step 1: mélanger l'échantillon
    np.random.shuffle(echantillon)
    #Step 2: tant que le nombre d'itérations n'est pas atteint, actualiser theta

    k=np.random.randint(low=0, high=len(echantillon)) #U[0,99]
    theta=theta_init-learn_rate*(-1)*gradient_log_vraisemblance(echantillon[k], theta_init)
            
    for i in range(n_iter-1):
        k=np.random.randint(low=0, high=len(echantillon))
        theta=theta-learn_rate*(-1)*gradient_log_vraisemblance(echantillon[k], theta)
    return theta

In [89]:
def SDG_IAWE(theta_init, learn_rate, n_iter, A, b, echantillon, k=6):
    #Step 1: mélanger l'échantillon
    np.random.shuffle(echantillon)
    #Step 2: tant que le nombre d'itérations n'est pas atteint, actualiser theta

    k=np.random.randint(low=0, high=len(echantillon)) #U[0,99]
    theta=theta_init-learn_rate*importance_sampling_gradientlogvraisemblance(k=k, theta=theta_init, A=A, b=b, x=echantillon[k])
            
    for i in range(n_iter-1):
        k=np.random.randint(low=0, high=len(echantillon))
        theta=theta-learn_rate*importance_sampling_gradientlogvraisemblance(k=k, theta=theta, A=A, b=b, x=echantillon[k])
    return theta

In [90]:
def SDG_SUMO(theta_init, learn_rate, n_iter, A, b, echantillon, l=6):
    #Step 1: mélanger l'échantillon
    np.random.shuffle(echantillon)
    #Step 2: tant que le nombre d'itérations n'est pas atteint, actualiser theta

    k=np.random.randint(low=0, high=len(echantillon)) #U[0,99]
    theta=theta_init-learn_rate*estimateur_SUMO_gradientlogvraisemblance(k=k, theta=theta_init, A=A, b=b, x=echantillon[k], r=0.6, l=l)
            
    for i in range(n_iter-1):
        k=np.random.randint(low=0, high=len(echantillon))
        theta=theta-learn_rate*estimateur_SUMO_gradientlogvraisemblance(k=k, theta=theta, A=A, b=b, x=echantillon[k], r=0.6, l=l)
    return theta

In [91]:
def SDG_RR(theta_init, learn_rate, n_iter, A, b, echantillon, l=6):
    #Step 1: mélanger l'échantillon
    np.random.shuffle(echantillon)
    #Step 2: tant que le nombre d'itérations n'est pas atteint, actualiser theta

    k=np.random.randint(low=0, high=len(echantillon)) #U[0,99]
    theta=theta_init-learn_rate*estimateur_ML_RR_gradientlogvraisemblance(x=echantillon[k], theta=theta_init, A=A, b=b, r=0.6, l=l)
            
    for i in range(n_iter-1):
        k=np.random.randint(low=0, high=len(echantillon))
        theta=theta-learn_rate*estimateur_ML_RR_gradientlogvraisemblance(x=echantillon[k], theta=theta, A=A, b=b, r=0.6, l=l)
    return theta

In [93]:
def SDG_SS(theta_init, learn_rate, n_iter, A, b, echantillon, l=6):
    #Step 1: mélanger l'échantillon
    np.random.shuffle(echantillon)
    #Step 2: tant que le nombre d'itérations n'est pas atteint, actualiser theta

    k=np.random.randint(low=0, high=len(echantillon)) #U[0,99]
    theta=theta_init-learn_rate*estimateur_ML_SS_gradientlogvraisemblance(x=echantillon[k], theta=theta_init, A=A, b=b, r=0.6, l=l)
            
    for i in range(n_iter-1):
        k=np.random.randint(low=0, high=len(echantillon))
        theta=theta-learn_rate*estimateur_ML_RR_gradientlogvraisemblance(x=echantillon[k], theta=theta, A=A, b=b, r=0.6, l=l)
    return theta

In [96]:
np.random.seed(878)
theta_sdg=SDG(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, echantillon=echantillon_x)
theta_iawe=SDG_IAWE(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, k=6)
theta_RR=SDG_RR(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=6)
theta_SS=SDG_RR(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=6)

print("Vraie valeur de theta: {}".format(np.around(theta, 2)))
print("Estimation de theta par descente de gradient stochastique useuelle: {}".format(np.around(theta_sdg, 2)))
print("Estimation de theta par SDG IWAE: {}".format(np.around(theta_iawe, 2)))
print("Estimation de theta par SDG RR: {}".format(np.around(theta_RR, 2)))
print("Estimation de theta par SDG SS: {}".format(np.around(theta_SS, 2)))

Vraie valeur de theta: [-0.3  -1.01 -0.57  0.53 -0.45 -1.65 -1.66  1.26 -0.5  -2.16  0.18 -0.7
 -0.77  0.53  0.12 -0.5   0.69 -0.35 -0.43  1.35]
Estimation de theta par descente de gradient stochastique useuelle: [-0.17 -1.07 -0.88  0.14 -0.37 -1.86 -1.3   1.15 -0.69 -1.68  0.5  -0.78
 -0.57  0.7   0.03 -0.27  0.44 -0.33 -0.28  1.27]
Estimation de theta par SDG IWAE: [ 0.02  1.49  0.68 -0.28  0.49  2.32  1.9  -1.07  0.45  2.58 -0.42  0.93
  0.94 -0.71 -0.09  0.46 -0.47  0.86  0.75 -1.47]
Estimation de theta par SDG RR: [-6.97 -0.1  -0.88 -3.06 -1.35  1.22  0.98 -4.38 -1.    0.99 -3.1  -1.07
 -0.6  -3.18 -2.66 -1.7  -3.26 -1.21 -1.62 -4.85]
Estimation de theta par SDG SS: [-5.59 -0.11 -1.18 -3.5  -1.68  0.51  0.57 -4.28 -1.64  0.9  -2.91 -0.97
 -1.3  -3.51 -2.86 -1.32 -3.35 -1.72 -1.58 -4.81]


In [None]:
def procedure_MC_theta(M, L, x, theta, A, b):

    biais_IWAE_M={}
    biais_SUMO_M={}
    biais_SS_M={}
    biais_RR_M={}

    var_IWAE_M={}
    var_SUMO_M={}
    var_SS_M={}
    var_RR_M={}

    l=1
    while l<=L:
        m=1
        estimations_IWAE_M_l=np.array([])
        estimations_SUMO_M_l=np.array([])
        estimations_SS_M_l=np.array([])
        estimations_RR_M_l=np.array([])


        while m<=M:
            gradient_log_vraisemblance_estim_IWAE=importance_sampling_gradientlogvraisemblance(k=l, theta=theta, A=A, b=b, x=x)
            gradient_log_vraisemblance_estim_SUMO=estimateur_SUMO_gradientlogvraisemblance(theta=theta, A=A, b=b, x=x, r=0.6, l=l)
            gradient_log_vraisemblance_estim_SS=estimateur_ML_SS_gradientlogvraisemblance(theta=theta, A=A, b=b, x=x, r=0.6, l=l)
            gradient_log_vraisemblance_estim_RR=estimateur_ML_RR_gradientlogvraisemblance(theta=theta, A=A, b=b, x=x, r=0.6, l=l)

            if m==1:
                estimations_IWAE_M_l= np.append(estimations_IWAE_M_l, gradient_log_vraisemblance_estim_IWAE)
                estimations_SUMO_M_l=np.append(estimations_SUMO_M_l, gradient_log_vraisemblance_estim_SUMO)
                estimations_SS_M_l=np.append(estimations_SS_M_l, gradient_log_vraisemblance_estim_SS)
                estimations_RR_M_l=np.append(estimations_RR_M_l, gradient_log_vraisemblance_estim_RR)
            
            else:
                estimations_IWAE_M_l= np.vstack((estimations_IWAE_M_l, gradient_log_vraisemblance_estim_IWAE))
                estimations_SUMO_M_l=np.vstack((estimations_SUMO_M_l, gradient_log_vraisemblance_estim_SUMO))
                estimations_SS_M_l=np.vstack((estimations_SS_M_l, gradient_log_vraisemblance_estim_SS))
                estimations_RR_M_l=np.vstack((estimations_RR_M_l, gradient_log_vraisemblance_estim_RR))
            m+=1
        
        biais_IWAE_M_l=np.mean(estimations_IWAE_M_l, axis=0)-gradient_log_vraisemblance(x=x, theta=theta)
        biais_SUMO_M_l=np.mean(estimations_SUMO_M_l, axis=0)-gradient_log_vraisemblance(x=x, theta=theta)
        biais_SS_M_l=np.mean(estimations_SS_M_l, axis=0)-gradient_log_vraisemblance(x=x, theta=theta)
        biais_RR_M_l=np.mean(estimations_RR_M_l, axis=0)-gradient_log_vraisemblance(x=x, theta=theta)

        squared_biais_IWAE_M_l=norm(biais_IWAE_M_l)**2
        squared_biais_SUMO_M_l=norm(biais_SUMO_M_l)**2
        squared_biais_SS_M_l=norm(biais_SS_M_l)**2
        squared_biais_RR_M_l=norm(biais_RR_M_l)**2

        var_IWAE_M_l=np.mean(norm(estimations_IWAE_M_l-np.mean(estimations_IWAE_M_l, axis=0)))
        var_SUMO_M_l=np.mean(norm(estimations_SUMO_M_l-np.mean(estimations_SUMO_M_l, axis=0)))
        var_SS_M_l=np.mean(norm(estimations_SS_M_l-np.mean(estimations_SS_M_l, axis=0)))
        var_RR_M_l=np.mean(norm(estimations_RR_M_l-np.mean(estimations_RR_M_l, axis=0)))

        biais_IWAE_M[l]=squared_biais_IWAE_M_l
        biais_SUMO_M[l]=squared_biais_SUMO_M_l
        biais_SS_M[l]=squared_biais_SS_M_l
        biais_RR_M[l]=squared_biais_RR_M_l

        var_IWAE_M[l]=var_IWAE_M_l
        var_SUMO_M[l]=var_SUMO_M_l
        var_SS_M[l]=var_SS_M_l
        var_RR_M[l]=var_RR_M_l

        l+=1

    return biais_IWAE_M, biais_SUMO_M, biais_SS_M, biais_RR_M, var_IWAE_M, var_SUMO_M, var_SS_M, var_RR_M