In [1]:
from src.gaussian_simulation import *
from src.estimators2 import *
from src.sgd import *
import numpy as np
from numpy.linalg import norm 
import matplotlib.pylab as plt

On reprend le cadre précédent. On génére un n-échantillon de taille 1000, de loi $p_{\theta}(\mathbf{x}|z)=\mathcal{N}(z, I_{20})$

In [2]:
np.random.seed(589)

theta=simulate_gaussian_vector(mu=np.array([0]*20), sigma=np.identity(20))

A_optimal=np.identity(20)*0.5
b_optimal=theta/2

A=A_optimal+simulate_gaussian_vector(mu=np.array([0]*20), sigma=0.01*np.identity(20))
b=b_optimal+simulate_gaussian_vector(mu=np.array([0]*20), sigma=0.01*np.identity(20))

echantillon_x=generer_nech_gaussien_x_sachant_z(n=1000, mu_z=theta, sigma_z=np.identity(20),sigma_x=np.identity(20))

La log-vraisemblance de l'échantillon (avec le facteur $-\frac{1}{n}$) est $l_n(\theta)=\frac{1}{n}\sum_{i=1}^n \lVert X_i - \theta \rVert^2 = \frac{1}{n}\sum_{i=1}^n l_i(\theta) $. 
On cherche $\hat{\theta} \in argmin \; l_n(\theta)$

# Une première estimation de $\theta$

On utilise les estimateurs du gradients de la log-vraisemblance pour estimer $\theta$ à l'aide de la descente de gradient stochastique. On fixe les paramètres de coût computationnel au mimimum pour faciliter la comparaison avec l'estimateur de $\theta$ par SGD usuelle ($k=6$ pour l'estimateur IAWE et à $l=0$ pour les autres). Dans la descente de gradient stochastique usuelle, on estime le gradient de la log vraisemblance par $\nabla_{\theta} l_i(\theta)= -2(X_i-\theta)$. Les hyper-paramètres sont fixés de la façon suivante:
* Nombre d'itérations: 100
* Learning rate: 0,001
* Comme $\theta$ est proche de $0_{\mathbb{R}^{20}}$, on fixe $\theta_0$ à $0_{\mathbb{R}^{20}}$.

In [6]:
np.random.seed(589)
theta_SGD=SGD(theta_init=np.array([0]*20), learn_rate=0.001, n_iter=100, echantillon=echantillon_x)
theta_iawe=SGD_IAWE(theta_init=np.array([0]*20), learn_rate=0.001, n_iter=100, A=A, b=b, echantillon=echantillon_x, k=6)
theta_SUMO=SGD_SUMO(theta_init=np.array([0]*20), learn_rate=0.001, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=0)
theta_RR=SGD_RR(theta_init=np.array([0]*20), learn_rate=0.001, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=0)
theta_SS=SGD_SS(theta_init=np.array([0]*20), learn_rate=0.001, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=0)

print("Vraie valeur de theta: {}".format(np.around(theta, 2)))
print("Estimation de theta par descente de gradient stochastique useuelle: {}".format(np.around(theta_SGD, 2)))
print("Estimation de theta par SGD IWAE: {}".format(np.around(theta_iawe, 2)))
print("Estimation de theta par SGD SUMO: {}".format(np.around(theta_SUMO, 2)))
print("Estimation de theta par SGD RR: {}".format(np.around(theta_RR, 2)))
print("Estimation de theta par SGD SS: {}".format(np.around(theta_SS, 2)))

Vraie valeur de theta: [ 1.11  0.59  0.82  0.74  0.97 -1.09 -0.63  0.33 -1.86 -0.06 -1.72 -1.12
  1.71  0.29  0.13  0.74  0.72 -0.1   1.7  -1.24]
Estimation de theta par descente de gradient stochastique useuelle: [ 0.2   0.11  0.2   0.14  0.21 -0.21 -0.09  0.04 -0.36  0.01 -0.29 -0.22
  0.32  0.06  0.01  0.11  0.15 -0.02  0.33 -0.24]
Estimation de theta par SGD IWAE: [-0.11 -0.06 -0.09 -0.06 -0.12  0.11  0.04 -0.04  0.14 -0.01  0.14  0.08
 -0.17 -0.02 -0.02 -0.07 -0.06  0.02 -0.15  0.08]
Estimation de theta par SGD SUMO: [-0.19 -0.11 -0.21 -0.16 -0.24  0.16  0.14 -0.1   0.34 -0.05  0.34  0.28
 -0.35 -0.13 -0.09 -0.12 -0.16  0.04 -0.44  0.27]
Estimation de theta par SGD RR: [-0.93  0.07 -0.15 -0.07 -0.21  0.31  0.42 -0.02  0.48  0.14  0.39  0.32
 -0.27  0.11 -0.07  0.03 -0.    0.08 -0.1   0.35]
Estimation de theta par SGD SS: [-0.54 -0.07 -0.15 -0.05 -0.17  0.04  0.07 -0.1   0.09  0.01  0.14 -0.02
 -0.13 -0.03 -0.09 -0.08 -0.04  0.03 -0.17  0.02]


In [10]:
def procedure_MC_theta(M, L, theta, A, b, n):
    biais_SGD_M={}
    biais_IWAE_M={}
    biais_SUMO_M={}
    biais_SS_M={}
    biais_RR_M={}

    var_SGD_M={}
    var_IWAE_M={}
    var_SUMO_M={}
    var_SS_M={}
    var_RR_M={}

    l=2
    while l<=L:
        m=1
        estimations_SGD_M_l=np.array([])
        estimations_IWAE_M_l=np.array([])
        estimations_SUMO_M_l=np.array([])
        estimations_SS_M_l=np.array([])
        estimations_RR_M_l=np.array([])


        while m<=M:
            echantillon_x=generer_nech_gaussien(n)
            theta_SGD=SGD(theta_init=np.array([0]*20), learn_rate=0.01, echantillon=echantillon_x, n_iter=100)
            theta_IAWE=SGD_IAWE(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, k=l)
            theta_SUMO=SGD_SUMO(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=l)
            theta_RR=SGD_RR(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=l)
            theta_SS=SGD_SS(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=l)

            if m==1:
                estimations_SGD_M_l=np.append(estimations_SGD_M_l, theta_SGD)
                estimations_IWAE_M_l= np.append(estimations_IWAE_M_l, theta_IAWE)
                estimations_SUMO_M_l=np.append(estimations_SUMO_M_l, theta_SUMO)
                estimations_RR_M_l=np.append(estimations_RR_M_l, theta_RR)
                estimations_SS_M_l=np.append(estimations_SS_M_l, theta_SS)
            
            else:
                estimations_SGD_M_l=np.vstack((estimations_SGD_M_l, theta_SGD))
                estimations_IWAE_M_l= np.vstack((estimations_IWAE_M_l, theta_IAWE))
                estimations_SUMO_M_l=np.vstack((estimations_SUMO_M_l, theta_SUMO))
                estimations_RR_M_l=np.vstack((estimations_RR_M_l, theta_RR))
                estimations_SS_M_l=np.vstack((estimations_SS_M_l, theta_SS))
            m+=1

        biais_SGD_M_l=np.mean(estimations_SGD_M_l, axis=0)-theta
        biais_IWAE_M_l=np.mean(estimations_IWAE_M_l, axis=0)-theta
        biais_IWAE_M_l=np.mean(estimations_IWAE_M_l, axis=0)-theta
        biais_SUMO_M_l=np.mean(estimations_SUMO_M_l, axis=0)-theta
        biais_SS_M_l=np.mean(estimations_SS_M_l, axis=0)-theta
        biais_RR_M_l=np.mean(estimations_RR_M_l, axis=0)-theta

        squared_biais_SGD_M_l=norm(biais_SGD_M_l)**2
        squared_biais_IWAE_M_l=norm(biais_IWAE_M_l)**2
        squared_biais_SUMO_M_l=norm(biais_SUMO_M_l)**2
        squared_biais_SS_M_l=norm(biais_SS_M_l)**2
        squared_biais_RR_M_l=norm(biais_RR_M_l)**2

        var_SGD_M_l=np.mean(norm(estimations_SGD_M_l-np.mean(estimations_IWAE_M_l, axis=0), axis=1)**2)
        var_IWAE_M_l=np.mean(norm(estimations_IWAE_M_l-np.mean(estimations_IWAE_M_l, axis=0), axis=1)**2)
        var_SUMO_M_l=np.mean(norm(estimations_SUMO_M_l-np.mean(estimations_SUMO_M_l, axis=0), axis=1)**2)
        var_SS_M_l=np.mean(norm(estimations_SS_M_l-np.mean(estimations_SS_M_l, axis=0), acis=1)**2)
        var_RR_M_l=np.mean(norm(estimations_RR_M_l-np.mean(estimations_RR_M_l, axis=0), axis=1)**2)
        
        biais_SGD_M[l]=squared_biais_SGD_M_l
        biais_IWAE_M[l]=squared_biais_IWAE_M_l
        biais_SUMO_M[l]=squared_biais_SUMO_M_l
        biais_SS_M[l]=squared_biais_SS_M_l
        biais_RR_M[l]=squared_biais_RR_M_l
        
        var_SGD_M[l]=var_SGD_M_l
        var_IWAE_M[l]=var_IWAE_M_l
        var_SUMO_M[l]=var_SUMO_M_l
        var_SS_M[l]=var_SS_M_l
        var_RR_M[l]=var_RR_M_l

        l+=1

    return biais_SGD_M, biais_IWAE_M, biais_SUMO_M, biais_SS_M, biais_RR_M, var_SGD_M,var_IWAE_M, var_SUMO_M, var_SS_M, var_RR_M

In [11]:
# np.random.seed(8554)

# biais_SGD_M_theta, biais_IWAE_M_theta, biais_SUMO_M_theta, biais_SS_M_theta, biais_RR_M_theta, var_SGD_M_theta, var_IWAE_M_theta, var_SUMO_M_theta, var_SS_M_theta, var_RR_M_theta = procedure_MC_theta(M=1000, 
#                                                                                                                                                                                                         L=8,
#                                                                                                                                                                                                         theta=theta,
#                                                                                                                                                                                                         A=A, 
#                                                                                                                                                                                                         b=b,
#                                                                                                                                                                                                         n=100) 
                                                                                                                            

In [12]:
sorted_biais_SGD_M_theta = sorted(biais_SGD_M_theta.items())
sorted_biais_IWAE_M_theta = sorted(biais_IWAE_M_theta.items())
sorted_biais_SS_M_gradient = sorted(biais_SS_M_theta.items())
sorted_biais_RR_M_gradient = sorted(biais_RR_M_theta.items())

sorted_var_IWAE_M_gradient = sorted(var_IWAE_M_gradient.items())
sorted_var_SUMO_M_gradient = sorted(var_SUMO_M_gradient.items())
sorted_var_SS_M_gradient = sorted(var_SS_M_gradient.items())
sorted_var_RR_M_gradient = sorted(var_RR_M_gradient.items())

l, biais_IAWE_gradient = zip(*sorted_biais_IWAE_M_gradient)
l, biais_SUMO_gradient = zip(*sorted_biais_SUMO_M_gradient)
l, biais_SS_gradient = zip(*sorted_biais_SS_M_gradient)
l, biais_RR_gradient = zip(*sorted_biais_RR_M_gradient)

l, var_IAWE_gradient = zip(*sorted_var_IWAE_M_gradient) 
l, var_SUMO_gradient = zip(*sorted_var_SUMO_M_gradient) 
l, var_SS_gradient = zip(*sorted_var_SS_M_gradient) 
l, var_RR_gradient = zip(*sorted_var_RR_M_gradient) 

fig, axs = plt.subplots(2,1, figsize=(10, 10))

axs[0].plot(l, biais_IAWE_gradient, 'b', label="Biais au carré de l'estimateur IWAE", color="blue")
axs[0].plot(l, biais_SUMO_gradient, 'b', label="Biais au carré de l'estimateur SUMO", color="red")
axs[0].plot(l, biais_SS_gradient, 'b', label="Biais au carré de l'estimateur ML-SS", color="orange")
axs[0].plot(l, biais_RR_gradient, 'b', label="Biais au carré de l'estimateur ML-RR", color="green")

axs[0].legend()

fig.suptitle('Biais au carré et variance des estimateurs du gradient de la log-vraisemblance, en fonction du coût computationel', fontsize=16)

axs[1].plot(l, var_IAWE_gradient, 'b', label="Variance de l'estimateur IAWE", color="blue")
axs[1].plot(l, var_SUMO_gradient, 'b', label="Variance de l'estimateur SUMO", color="red")
axs[1].plot(l, var_SS_gradient, 'b', label="Variance de l'estimateur ML-SS", color="orange")
axs[1].plot(l, var_RR_gradient, 'b', label="Variance de l'estimateur ML-RR", color="green")


axs[1].legend()

plt.show()

NameError: name 'biais_SDG_M_theta' is not defined

In [None]:
estimations_SGD_M_l=np.array([])
estimations_IWAE_M_l=np.array([])
estimations_SUMO_M_l=np.array([])
estimations_SS_M_l=np.array([])
estimations_RR_M_l=np.array([])

m=1
M=1000
n=100

while m<=M:
    echantillon_x=generer_nech_gaussien(n)
    theta_SGD=SGD(theta_init=np.array([0]*20), learn_rate=0.01, echantillon=echantillon_x, n_iter=100)
    theta_IAWE=SGD_IAWE(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, k=3)
    theta_SUMO=SGD_SUMO(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=3)
    theta_RR=SGD_RR(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=3)
    theta_SS=SGD_SS(theta_init=np.array([0]*20), learn_rate=0.01, n_iter=100, A=A, b=b, echantillon=echantillon_x, l=3)

    if m==1:
        estimations_SGD_M_l=np.append(estimations_SGD_M_l, theta_SGD)
        estimations_IWAE_M_l= np.append(estimations_IWAE_M_l, theta_IAWE)
        estimations_SUMO_M_l=np.append(estimations_SUMO_M_l, theta_SUMO)
        estimations_RR_M_l=np.append(estimations_RR_M_l, theta_RR)
        estimations_SS_M_l=np.append(estimations_SS_M_l, theta_SS)
            
    else:
        estimations_SGD_M_l=np.vstack((estimations_SGD_M_l, theta_SGD))
        estimations_IWAE_M_l= np.vstack((estimations_IWAE_M_l, theta_IAWE))
        estimations_SUMO_M_l=np.vstack((estimations_SUMO_M_l, theta_SUMO))
        estimations_RR_M_l=np.vstack((estimations_RR_M_l, theta_RR))
        estimations_SS_M_l=np.vstack((estimations_SS_M_l, theta_SS))
    m+=1

biais_SGD_M_l=np.mean(estimations_SGD_M_l, axis=0)-theta
biais_IWAE_M_l=np.mean(estimations_IWAE_M_l, axis=0)-theta
biais_IWAE_M_l=np.mean(estimations_IWAE_M_l, axis=0)-theta
biais_SUMO_M_l=np.mean(estimations_SUMO_M_l, axis=0)-theta
biais_SS_M_l=np.mean(estimations_SS_M_l, axis=0)-theta
biais_RR_M_l=np.mean(estimations_RR_M_l, axis=0)-theta

squared_biais_SGD_M_l=norm(biais_SGD_M_l)**2
squared_biais_IWAE_M_l=norm(biais_IWAE_M_l)**2
squared_biais_SUMO_M_l=norm(biais_SUMO_M_l)**2
squared_biais_SS_M_l=norm(biais_SS_M_l)**2
squared_biais_RR_M_l=norm(biais_RR_M_l)**2

var_SGD_M_l=np.mean(norm(estimations_SGD_M_l-np.mean(estimations_IWAE_M_l, axis=0), axis=1)**2)
var_IWAE_M_l=np.mean(norm(estimations_IWAE_M_l-np.mean(estimations_IWAE_M_l, axis=0), axis=1)**2)
var_SUMO_M_l=np.mean(norm(estimations_SUMO_M_l-np.mean(estimations_SUMO_M_l, axis=0), axis=1)**2)
var_SS_M_l=np.mean(norm(estimations_SS_M_l-np.mean(estimations_SS_M_l, axis=0), axis=1)**2)
var_RR_M_l=np.mean(norm(estimations_RR_M_l-np.mean(estimations_RR_M_l, axis=0), axis=1)**2)

TypeError: _norm_dispatcher() got an unexpected keyword argument 'acis'

In [None]:
var_SGD_M_l

96.71430856920539

In [None]:
var_IWAE_M_l

0.7846978280276523

In [None]:
var_SUMO_M_l

7.006007094800772

In [None]:
var_SS_M_l

11.955258074079461

In [None]:
squared_biais_SGD_M_l

0.31892409802688737

In [None]:
squared_biais_IWAE_M_l

107.62803455070205

In [None]:
squared_biais_SUMO_M_l

285.88914190502953

In [None]:
squared_biais_SS_M_l

92.91956607763443

In [None]:
squared_biais_RR_M_l

214.1572221985394