Try the same as last time but with stronger regul_lambda to make it further away from the empirical covariance. Even with large regul lambda it does not make a difference. It seems that it is not difficult to make a good approximation of cov_empir such that the resulting preconditioned GD algorithm will have a different limit than GD

In [29]:
import sys, json
from datetime import datetime as dt
import numpy as np
import scipy
from preconditioners.utils import generate_c, generate_centered_gaussian_data
from preconditioners.impl_cov_approx import *
from sklearn.covariance import  GraphicalLasso

In [30]:
# parameters of the run
n_epochs = 100
iter_per_epoch = 500
tol = 0.1
n = 50
d = 150
ro = 0.5
regime = 'autoregressive'
regul_lambda = 1000
lr_start = 0.05
lr_decay = 0.98

params = {
    'n_epochs' : n_epochs,
    'iter_per_epoch' : iter_per_epoch,
    'tol' : tol,
    'n' : n,
    'd' : d,
    'ro' : ro,
    'regime' : regime,
    'regul_lambda' : regul_lambda,
    'lr_start' : lr_start,
    'lr_decay' : lr_decay
}

In [31]:
# generate data and initialization
c = generate_c(ro=ro,
                regime=regime,
                n=n,
                d=d
                )
w_star = np.random.multivariate_normal(mean=np.zeros(d), cov=np.eye(d))
X, y, xi = generate_centered_gaussian_data(w_star,
                                            c,
                                            n=n,
                                            d=d,
                                            sigma2=1,
                                            fix_norm_of_x=False)

# initialize C (cholesky), cov_inv, regul_lambda and learning rate
# is this a good way to initialize?
cov_empir = X.T.dot(X) / n
cov_inv = np.linalg.inv(cov_empir + 0.1 * np.eye(d))
C = scipy.linalg.cholesky(cov_inv) + 0.1 * generate_c(ro=0.1,
                                                        regime='autoregressive',
                                                        n=n,
                                                        d=d,
                                                        )




In [32]:
# run optimization
for epoch in range(n_epochs):
    if epoch == 0:
        lr = lr_start
    else:
        lr = lr * lr_decay

    for i in range(iter_per_epoch):
        # compute loss and gradient
        cov_inv = C.dot(C.T)
        loss_val = loss(cov_inv, X, cov_empir, regul_lambda)
        grad_loss_val = grad_loss(C, cov_empir, regul_lambda, X)
        error = np.linalg.norm(grad_loss_val)

        # update C (+ because we are maximizing)
        C = C + lr * grad_loss_val
        # update regul_lambda
        # regul_lambda = regul_lambda - lr*np.trace(grad_loss_val.dot(grad_loss_val.T))
        # check if we are done
        if i % 50 == 0:
            print(f"iteration {i}/{iter_per_epoch} of epoch {epoch}/{n_epochs}, loss {loss_val} and error {error}")
        if error < tol:
            break

dtstamp = str(dt.now()).replace(' ', '_')
with open(f'results_{dtstamp}.json', 'w') as f:
    json.dump({'C' : C.tolist(), 'loss' : float(loss_val), 'error' : float(error), 'parans' : params}, f)

iteration 0/500 of epoch 0/100, loss -541.9643882944864 and error 119.21085320304746
iteration 50/500 of epoch 0/100, loss 198.59074469696267 and error 4.708374384920027
iteration 100/500 of epoch 0/100, loss 242.6208558322092 and error 3.765483375457212
iteration 150/500 of epoch 0/100, loss 272.9735299889714 and error 3.232254086579383
iteration 200/500 of epoch 0/100, loss 296.19403634446275 and error 2.8768439093229317
iteration 250/500 of epoch 0/100, loss 315.01258731203365 and error 2.6179992152419898
iteration 300/500 of epoch 0/100, loss 330.8380248672936 and error 2.418573562144262
iteration 350/500 of epoch 0/100, loss 344.49437869853693 and error 2.258802587511555
iteration 400/500 of epoch 0/100, loss 356.50586625464894 and error 2.127060681282179
iteration 450/500 of epoch 0/100, loss 367.2268484720718 and error 2.016000572552507
iteration 0/500 of epoch 1/100, loss 376.9081708758089 and error 1.920719931538273
iteration 50/500 of epoch 1/100, loss 385.5647930182532 and e

  r = _umath_linalg.det(a, signature=signature)


iteration 250/500 of epoch 22/100, loss inf and error 0.4669183300660834
iteration 300/500 of epoch 22/100, loss inf and error 0.4661046551619229
iteration 350/500 of epoch 22/100, loss inf and error 0.46529521927709305
iteration 400/500 of epoch 22/100, loss inf and error 0.4644899857321367
iteration 450/500 of epoch 22/100, loss inf and error 0.46368891829026054
iteration 0/500 of epoch 23/100, loss inf and error 0.4628919811506994
iteration 50/500 of epoch 23/100, loss inf and error 0.462114956148296
iteration 100/500 of epoch 23/100, loss inf and error 0.461341831015109
iteration 150/500 of epoch 23/100, loss inf and error 0.46057257323745776
iteration 200/500 of epoch 23/100, loss inf and error 0.4598071506802759
iteration 250/500 of epoch 23/100, loss inf and error 0.4590455315807646
iteration 300/500 of epoch 23/100, loss inf and error 0.45828768454341495
iteration 350/500 of epoch 23/100, loss inf and error 0.45753357853417476
iteration 400/500 of epoch 23/100, loss inf and err

KeyboardInterrupt: 

In [33]:
c

array([[1.00000000e+00, 5.00000000e-01, 2.50000000e-01, ...,
        5.60519386e-45, 2.80259693e-45, 1.40129846e-45],
       [5.00000000e-01, 1.00000000e+00, 5.00000000e-01, ...,
        1.12103877e-44, 5.60519386e-45, 2.80259693e-45],
       [2.50000000e-01, 5.00000000e-01, 1.00000000e+00, ...,
        2.24207754e-44, 1.12103877e-44, 5.60519386e-45],
       ...,
       [5.60519386e-45, 1.12103877e-44, 2.24207754e-44, ...,
        1.00000000e+00, 5.00000000e-01, 2.50000000e-01],
       [2.80259693e-45, 5.60519386e-45, 1.12103877e-44, ...,
        5.00000000e-01, 1.00000000e+00, 5.00000000e-01],
       [1.40129846e-45, 2.80259693e-45, 5.60519386e-45, ...,
        2.50000000e-01, 5.00000000e-01, 1.00000000e+00]])

In [34]:
np.linalg.inv(c)

array([[ 1.33333333e+000, -6.66666667e-001, -5.55111512e-017, ...,
         4.14867685e-061,  2.07433843e-061, -1.03716921e-061],
       [-6.66666667e-001,  1.66666667e+000, -6.66666667e-001, ...,
        -5.76344236e-224, -2.88172118e-224,  5.76344236e-224],
       [ 0.00000000e+000, -6.66666667e-001,  1.66666667e+000, ...,
        -2.07649895e-207, -1.03824947e-207,  2.07649895e-207],
       ...,
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000, ...,
         1.66666667e+000, -6.66666667e-001,  1.85037171e-017],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000, ...,
        -6.66666667e-001,  1.66666667e+000, -6.66666667e-001],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000, ...,
         0.00000000e+000, -6.66666667e-001,  1.33333333e+000]])

In [35]:
C.dot(C.T)

array([[ 1.81260209e+03, -2.99210942e+02, -4.21064928e+01, ...,
         5.59333106e+01,  1.37193968e+02, -3.22057543e+01],
       [-2.99210942e+02,  1.71527560e+03, -5.21258523e+02, ...,
        -5.27331087e-01,  3.92474530e+00, -6.79178212e+01],
       [-4.21064928e+01, -5.21258523e+02,  1.95339977e+03, ...,
        -4.29586940e+01, -9.73990995e+01, -2.31519117e+02],
       ...,
       [ 5.59333106e+01, -5.27331087e-01, -4.29586940e+01, ...,
         1.99074961e+03, -3.16946294e+02, -4.52294362e+01],
       [ 1.37193968e+02,  3.92474530e+00, -9.73990995e+01, ...,
        -3.16946294e+02,  1.78572124e+03, -4.52797408e+02],
       [-3.22057543e+01, -6.79178212e+01, -2.31519117e+02, ...,
        -4.52294362e+01, -4.52797408e+02,  1.68545442e+03]])

In [36]:
np.linalg.inv(C.dot(C.T))

array([[ 1.00663573,  0.530494  ,  0.17644185, ..., -0.16680783,
        -0.10345324,  0.09956663],
       [ 0.530494  ,  1.53609714,  0.91136137, ..., -0.01643978,
        -0.13304121,  0.0951468 ],
       [ 0.17644185,  0.91136137,  1.27367671, ...,  0.03067205,
         0.07251723,  0.30137341],
       ...,
       [-0.16680783, -0.01643978,  0.03067205, ...,  0.96090307,
         0.45943137,  0.18018207],
       [-0.10345324, -0.13304121,  0.07251723, ...,  0.45943137,
         1.01381038,  0.56018859],
       [ 0.09956663,  0.0951468 ,  0.30137341, ...,  0.18018207,
         0.56018859,  1.27247214]])

In [37]:
cov_empir

array([[ 1.00643195,  0.53051948,  0.17643347, ..., -0.16682012,
        -0.10348244,  0.09957169],
       [ 0.53051948,  1.5358976 ,  0.91143317, ..., -0.01643559,
        -0.1330344 ,  0.09515347],
       [ 0.17643347,  0.91143317,  1.27344689, ...,  0.03068474,
         0.07252151,  0.30139127],
       ...,
       [-0.16682012, -0.01643559,  0.03068474, ...,  0.96066331,
         0.45948199,  0.1801907 ],
       [-0.10348244, -0.1330344 ,  0.07252151, ...,  0.45948199,
         1.01354976,  0.56021577],
       [ 0.09957169,  0.09515347,  0.30139127, ...,  0.1801907 ,
         0.56021577,  1.27225556]])

In [38]:
gl = GraphicalLasso(assume_centered=True, alpha=0.25, tol=1e-4).fit(X)

In [39]:
gl.precision_

array([[ 1.08597124, -0.18948688, -0.        , ...,  0.        ,
         0.        , -0.        ],
       [-0.18948688,  0.91331423, -0.41805074, ...,  0.        ,
         0.        , -0.        ],
       [-0.        , -0.41805074,  1.22116199, ..., -0.        ,
        -0.        , -0.03802007],
       ...,
       [ 0.        ,  0.        , -0.        , ...,  1.24520066,
        -0.21546217, -0.        ],
       [ 0.        ,  0.        , -0.        , ..., -0.21546217,
         1.14718273, -0.26013489],
       [-0.        , -0.        , -0.03802007, ..., -0.        ,
        -0.26013489,  0.8875395 ]])