- Found out that directly minimizing the variance with \Sigma replaced by X^TX/n does not work because in that case the variance reduces to 1.
- Hence I tried replacing \Sigma by np.eye(d). The algorithm converges nicely (even without the logdet or 1/det barriers). However for some reason it is ~10x larger than it should be and even when I divide it by 10 it doesn't do well.

In [92]:
import sys, json
from datetime import datetime as dt
import numpy as np
import scipy
from preconditioners.utils import generate_c, generate_centered_gaussian_data
from preconditioners.cov_approx.variance_cov_approx import *
from sklearn.covariance import  GraphicalLasso

In [93]:
# parameters of the run
n_epochs = 100
iter_per_epoch = 500
tol = 0.00001
n = 50
d = 150
simga_2 = 1
ro = 0.5
regime = 'autoregressive'
regul_lambda = 0 # for log barrier use 0.001
lr_start = 0.5
lr_decay = 0.98

params = {
    'n_epochs' : n_epochs,
    'iter_per_epoch' : iter_per_epoch,
    'tol' : tol,
    'n' : n,
    'd' : d,
    'sigma_2' : simga_2,
    'ro' : ro,
    'regime' : regime,
    'regul_lambda' : regul_lambda,
    'lr_start' : lr_start,
    'lr_decay' : lr_decay
}

In [94]:
# generate data and initialization
c = generate_c(ro=ro,
                regime=regime,
                n=n,
                d=d
                )
w_star = np.random.multivariate_normal(mean=np.zeros(d), cov=np.eye(d))
X, y, xi = generate_centered_gaussian_data(w_star,
                                            c,
                                            n=n,
                                            d=d,
                                            sigma2=simga_2,
                                            fix_norm_of_x=False)

# initialize C (cholesky), cov_inv, regul_lambda and learning rate
# is this a good way to initialize?
cov_empir = X.T.dot(X) / n
cov_inv = np.linalg.inv(cov_empir + 0.1 * np.eye(d))
C = scipy.linalg.cholesky(cov_inv) + 0.5 * generate_c(ro=0.2,
                                                        regime='autoregressive',
                                                        n=n,
                                                        d=d,
                                                        )



In [95]:
# run optimization
for epoch in range(n_epochs):
    if epoch == 0:
        lr = lr_start
    else:
        lr = lr * lr_decay

    for i in range(iter_per_epoch):
        # compute loss and gradient
        cov_inv = C.dot(C.T)
        loss_val, grad_loss_val = fAndG_invbarrier(
            B = np.eye(d),
            C = C,
            X = X,
            a = regul_lambda
        )
        error = np.linalg.norm(grad_loss_val)

        # update C (- because we are maximizing)
        C = C - lr * grad_loss_val
        # update regul_lambda
        # regul_lambda = regul_lambda - lr*np.trace(grad_loss_val.dot(grad_loss_val.T))
        # check if we are done
        if i % 50 == 0:
            print(f"iteration {i}/{iter_per_epoch} of epoch {epoch}/{n_epochs}, loss {loss_val} and error {error}")
        if error < tol:
            break
    if error < tol:
        break

#dtstamp = str(dt.now()).replace(' ', '_')
#with open(f'results_{dtstamp}.json', 'w') as f:
#    json.dump({'C' : C.tolist(), 'loss' : float(loss_val), 'error' : float(error), 'parans' : params}, f)

iteration 0/500 of epoch 0/100, loss 1.2349280146739245 and error 0.20005515237831284
iteration 50/500 of epoch 0/100, loss 0.8635421383220943 and error 0.07550986857825727
iteration 100/500 of epoch 0/100, loss 0.7738988007177717 and error 0.047326381199693894
iteration 150/500 of epoch 0/100, loss 0.7327571682451082 and error 0.03471254336849047
iteration 200/500 of epoch 0/100, loss 0.7088318774507757 and error 0.027515784540685383
iteration 250/500 of epoch 0/100, loss 0.6931148546322397 and error 0.02280325362177303
iteration 300/500 of epoch 0/100, loss 0.6820047504036633 and error 0.01945361642417678
iteration 350/500 of epoch 0/100, loss 0.6737515664849731 and error 0.01694263235543614
iteration 400/500 of epoch 0/100, loss 0.6673940137176942 and error 0.014987828392616212
iteration 450/500 of epoch 0/100, loss 0.6623583333209532 and error 0.013421646153579317
iteration 0/500 of epoch 1/100, loss 0.6582806267970733 and error 0.012137869222656335
iteration 50/500 of epoch 1/100,

In [104]:
c

array([[1.00000000e+00, 5.00000000e-01, 2.50000000e-01, ...,
        5.60519386e-45, 2.80259693e-45, 1.40129846e-45],
       [5.00000000e-01, 1.00000000e+00, 5.00000000e-01, ...,
        1.12103877e-44, 5.60519386e-45, 2.80259693e-45],
       [2.50000000e-01, 5.00000000e-01, 1.00000000e+00, ...,
        2.24207754e-44, 1.12103877e-44, 5.60519386e-45],
       ...,
       [5.60519386e-45, 1.12103877e-44, 2.24207754e-44, ...,
        1.00000000e+00, 5.00000000e-01, 2.50000000e-01],
       [2.80259693e-45, 5.60519386e-45, 1.12103877e-44, ...,
        5.00000000e-01, 1.00000000e+00, 5.00000000e-01],
       [1.40129846e-45, 2.80259693e-45, 5.60519386e-45, ...,
        2.50000000e-01, 5.00000000e-01, 1.00000000e+00]])

In [105]:
np.linalg.inv(c)

array([[ 1.33333333e+000, -6.66666667e-001, -5.55111512e-017, ...,
         4.14867685e-061,  2.07433843e-061, -1.03716921e-061],
       [-6.66666667e-001,  1.66666667e+000, -6.66666667e-001, ...,
        -5.76344236e-224, -2.88172118e-224,  5.76344236e-224],
       [ 0.00000000e+000, -6.66666667e-001,  1.66666667e+000, ...,
        -2.07649895e-207, -1.03824947e-207,  2.07649895e-207],
       ...,
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000, ...,
         1.66666667e+000, -6.66666667e-001,  1.85037171e-017],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000, ...,
        -6.66666667e-001,  1.66666667e+000, -6.66666667e-001],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000, ...,
         0.00000000e+000, -6.66666667e-001,  1.33333333e+000]])

In [106]:
C.dot(C.T)

array([[ 9.89520193e+00, -3.92320164e-01,  2.29962207e-01, ...,
         6.17563556e-01,  2.78341444e-01, -4.95977851e-01],
       [-3.92320164e-01,  1.03365965e+01, -6.09590015e-01, ...,
        -6.93417536e-02,  2.73705724e-02, -3.28900260e-01],
       [ 2.29962207e-01, -6.09590015e-01,  1.04415231e+01, ...,
         1.24231820e-01, -2.04036753e-01,  7.22706262e-03],
       ...,
       [ 6.17563556e-01, -6.93417536e-02,  1.24231820e-01, ...,
         5.66291624e+00,  3.57854622e-01,  9.58126172e-02],
       [ 2.78341444e-01,  2.73705724e-02, -2.04036753e-01, ...,
         3.57854622e-01,  5.10210840e+00,  5.18076710e-01],
       [-4.95977851e-01, -3.28900260e-01,  7.22706262e-03, ...,
         9.58126172e-02,  5.18076710e-01,  5.51519680e+00]])

In [107]:
np.linalg.inv(C.dot(C.T))

array([[ 1.22583942e-01,  9.36113205e-03,  6.84443450e-04, ...,
        -2.32679769e-02, -3.90986181e-03,  1.60182080e-02],
       [ 9.36113205e-03,  1.19075609e-01,  1.28861926e-02, ...,
         5.41165173e-03, -5.90506178e-05,  6.88490694e-03],
       [ 6.84443450e-04,  1.28861926e-02,  1.19912803e-01, ...,
        -6.13828992e-03,  1.25014925e-02, -2.11518247e-03],
       ...,
       [-2.32679769e-02,  5.41165173e-03, -6.13828992e-03, ...,
         2.48439364e-01, -3.82721064e-02,  4.80840389e-04],
       [-3.90986181e-03, -5.90506178e-05,  1.25014925e-02, ...,
        -3.82721064e-02,  3.39630761e-01, -5.35809042e-02],
       [ 1.60182080e-02,  6.88490694e-03, -2.11518247e-03, ...,
         4.80840389e-04, -5.35809042e-02,  2.71021784e-01]])

In [108]:
cov_empir

array([[ 0.9030155 ,  0.43696357,  0.19571924, ...,  0.06978713,
         0.06980953,  0.02296126],
       [ 0.43696357,  0.99124956,  0.61667035, ...,  0.07869875,
         0.02209223,  0.07085664],
       [ 0.19571924,  0.61667035,  0.96804366, ...,  0.10616198,
        -0.09106277,  0.02197274],
       ...,
       [ 0.06978713,  0.07869875,  0.10616198, ...,  1.01270856,
         0.5186546 ,  0.38425201],
       [ 0.06980953,  0.02209223, -0.09106277, ...,  0.5186546 ,
         0.98808604,  0.51923817],
       [ 0.02296126,  0.07085664,  0.02197274, ...,  0.38425201,
         0.51923817,  1.05212325]])

In [109]:
gl = GraphicalLasso(assume_centered=True, alpha=0.25, tol=1e-4).fit(X)

In [110]:
gl.precision_

array([[ 1.2852561 , -0.18191025, -0.        , ..., -0.        ,
        -0.        , -0.        ],
       [-0.18191025,  1.30695299, -0.42002534, ..., -0.        ,
        -0.        , -0.        ],
       [-0.        , -0.42002534,  1.40382108, ..., -0.        ,
         0.        , -0.        ],
       ...,
       [-0.        , -0.        , -0.        , ...,  1.11683749,
        -0.26577359, -0.06358468],
       [-0.        , -0.        ,  0.        , ..., -0.26577359,
         1.23859462, -0.25257571],
       [-0.        , -0.        , -0.        , ..., -0.06358468,
        -0.25257571,  1.08902042]])

In [111]:
print(np.linalg.norm(C.dot(C.T) - np.linalg.inv(c)))
print(np.linalg.norm(C.dot(C.T)/10 - np.linalg.inv(c)))
print(np.linalg.norm(gl.precision_ - np.linalg.inv(c)))
print(np.linalg.norm(np.eye(d) - np.linalg.inv(c)))

101.58320285676734
15.486163684815262
9.677548073635403
14.087031072743628


In [112]:
print(np.linalg.norm(np.diag(C.dot(C.T)/10) - np.diag(np.linalg.inv(c))))
print(np.linalg.norm(np.diag(gl.precision_) - np.diag(np.linalg.inv(c))))
print(np.linalg.norm(np.diag(np.eye(d)) - np.diag(np.linalg.inv(c))))

9.687289015586297
5.619379139424423
8.124038404635959


In [113]:
print(np.diag(gl.precision_)[:10])
print(np.diag(C.dot(C.T)-9)[:10])
print(np.diag(np.linalg.inv(c))[:10])

[1.2852561  1.30695299 1.40382108 1.21949981 1.23904836 1.31463549
 1.2039896  1.02636606 1.33756259 1.47205837]
[0.89520193 1.33659652 1.44152307 1.33496846 1.65222431 1.52922659
 1.75320315 1.05496604 1.41765909 1.74532465]
[1.33333333 1.66666667 1.66666667 1.66666667 1.66666667 1.66666667
 1.66666667 1.66666667 1.66666667 1.66666667]
