# Translating functions VoC for fitting ridge regressions

In [1]:
import numpy as np
from scipy.linalg import svd
from sklearn.linear_model import Ridge

In [2]:
def ridgesvd(Y, X, lambd):
    """
    Computes ridge regression coefficients using singular value decomposition.
    
    Parameters:
        Y : array_like
            Target vector of shape (T,).
        X : array_like
            Design matrix of shape (T, P).
        lambd : array_like
            Array of ridge regularization parameters of shape (L,).
    
    Returns:
        B : ndarray
            Ridge regression coefficients of shape (P, L).
    """
    if np.isnan(X).sum() + np.isnan(Y).sum() > 0:
        raise ValueError("Missing data")

    L = len(lambd)
    # MATLAB uses 'gesvd', default is 'gesdd'
    U, d, Vt = svd(X, check_finite=False, lapack_driver='gesvd') 
    T, P = X.shape

    if T >= P:
        compl = np.zeros((P, T - P))
    else:
        compl = np.zeros((P - T, T))
    
    B = np.zeros((P, L))

    for l in range(L):
        if T >= P:
            B[:, l] = Vt.T @ np.hstack((np.diag(d / (d**2 + lambd[l])), compl)) @ U.T @ Y
        else:
            B[:, l] = Vt.T @ np.vstack((np.diag(d / (d**2 + lambd[l])), compl)) @ U.T @ Y
    
    return B



In [4]:
def get_beta(Y, X, lambda_list):
    """
    Computes beta coefficients using ridge regression with SVD.
    
    Parameters:
        Y : array_like
            Target vector of shape (T,).
        X : array_like
            Features matrix of shape (T, P).
        lambda_list : array_like
            Array of ridge regularization parameters of shape (L,).
    
    Returns:
        B : ndarray
            Ridge regression coefficients of shape (P, L).
    """
    if np.isnan(X).sum() + np.isnan(Y).sum() > 0:
        raise ValueError("Missing data")
    
    L_ = len(lambda_list)
    T_ = X.shape[0]
    P_ = X.shape[1]

    if P_ > T_:
        a_matrix = X @ X.T / T_  # T_ x T_
    else:
        a_matrix = X.T @ X / T_  # P_ x P_

    U_a, d_a, _ = svd(a_matrix, check_finite=False, lapack_driver='gesvd')
    scale_eigval = ((d_a * T_)**(-1/2))

    # originally only the X.T version was implemented, but that
    # causes error in multiplication of matrices even in the original
    # MATLAB code. The second brach (P<=T) is not used because in that
    # regime 'ridgesvd' is used, not 'get_beta'.
    if P_ > T_:
        W = X.T @ U_a @ np.diag(scale_eigval)
    else:
        W = X @ U_a @ np.diag(scale_eigval)

    a_matrix_eigval = d_a #.reshape(-1, 1)  # P_ x 1
    
    # FIXME the following code does not run for P<=T
    # Fix it so that it runs. 
    signal_times_return = X.T @ Y / T_  # (SR): M x 1
    signal_times_return_times_v = W.T @ signal_times_return  # V' * (SR): T_ x 1

    B = np.zeros((P_, L_))
    for l in range(L_):
        B[:, l] = W @ np.diag(1 / (a_matrix_eigval + lambda_list[l])) @ signal_times_return_times_v

    return B


In [16]:
# Example usage:
T = 100
P = 1000
X = np.random.randn(T, P)
Y = np.random.randn(T)
lambd = np.array([1])

B_ridgesvd = ridgesvd(Y, X, lambd).flatten()
B_get_beta = get_beta(Y, X, lambd).flatten()
B_sklearn = np.linalg.inv(X.T @ X + lambd[0] * np.eye(P)) @ X.T @ Y
B_sklearn_svd = Ridge(alpha=lambd[0], fit_intercept=False, solver="svd").fit(X, Y).coef_
B_sklearn_auto = Ridge(alpha=lambd[0], fit_intercept=False, solver="auto").fit(X, Y).coef_
B_sklearn_cholesky = Ridge(alpha=lambd[0], fit_intercept=False, solver="cholesky").fit(X, Y).coef_


ref = B_ridgesvd
print(np.sum(abs(ref - B_get_beta)))
print(np.sum(abs(ref - B_ridgesvd)))
print(np.sum(abs(ref - B_sklearn)))
print(np.sum(abs(ref - B_sklearn_svd)))
print(np.sum(abs(ref - B_sklearn_auto)))
print(np.sum(abs(ref - B_sklearn_cholesky)))

0.951307904983197
0.0
1.6281699838187336e-11
2.7122509967114627e-14
2.2494674309023188e-14
2.2494674309023188e-14


In [17]:
%timeit ridgesvd(Y, X, lambd).flatten()
%timeit get_beta(Y, X, lambd).flatten()
%timeit np.linalg.inv(X.T @ X + lambd[0] * np.eye(P)) @ X.T @ Y
%timeit Ridge(alpha=lambd[0], fit_intercept=False, solver="svd").fit(X, Y).coef_
%timeit Ridge(alpha=lambd[0], fit_intercept=False, solver="auto").fit(X, Y).coef_
%timeit Ridge(alpha=lambd[0], fit_intercept=False, solver="cholesky").fit(X, Y).coef_


75.6 ms ± 4.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
18.5 ms ± 3.53 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
119 ms ± 4.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
30.7 ms ± 3.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.87 ms ± 118 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.85 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
