In [1]:
import numpy as np
import pandas as pd

In [2]:
def sym(A: np.ndarray) -> np.ndarray:
    return (A + A.T) / 2.0

def proj_psd(A: np.ndarray, eps: float = 1e-12) -> np.ndarray:
    A = sym(A)
    w, V = np.linalg.eigh(A)
    w = np.maximum(w, eps)
    return sym(V @ np.diag(w) @ V.T)

def cov_to_corr(S: np.ndarray):
    S = sym(S)
    d = np.sqrt(np.clip(np.diag(S), 0.0, None))
    invd = np.where(d > 0, 1.0 / d, 0.0)
    C = (S * invd).T * invd
    np.fill_diagonal(C, 1.0)
    return sym(C), d

def corr_to_cov(C: np.ndarray, d: np.ndarray) -> np.ndarray:
    C = sym(C)
    return sym((C * d).T * d)

def ensure_psd_numerical(S: np.ndarray, eps: float = 1e-12) -> np.ndarray:
    S = sym(S)
    mineig = np.min(np.linalg.eigvalsh(S))
    if mineig < -1e-10:  # only if meaningfully negative
        S = S + (-mineig + eps) * np.eye(S.shape[0])
    return sym(S)

In [3]:
def near_psd_correlation(C: np.ndarray, eps: float = 1e-12) -> np.ndarray:
    B = proj_psd(C, eps=eps)
    d = np.sqrt(np.clip(np.diag(B), 0.0, None))
    invd = np.where(d > 0, 1.0 / d, 0.0)
    B = (B * invd).T * invd
    np.fill_diagonal(B, 1.0)
    return sym(B)

def near_psd_covariance(S: np.ndarray, eps: float = 1e-12) -> np.ndarray:
    C, d = cov_to_corr(S)
    C_psd = near_psd_correlation(C, eps=eps)
    S_psd = corr_to_cov(C_psd, d)
    return ensure_psd_numerical(S_psd, eps=eps)

In [4]:
def higham_nearest_correlation(C: np.ndarray, tol: float = 1e-10, max_iter: int = 200, eps: float = 1e-12) -> np.ndarray:
    Y = sym(C)
    delta = np.zeros_like(Y)

    for _ in range(max_iter):
        R = Y - delta
        X = proj_psd(R, eps=eps)
        delta = X - R

        Y_new = X.copy()
        np.fill_diagonal(Y_new, 1.0)
        Y_new = sym(Y_new)

        rel = np.linalg.norm(Y_new - Y, ord="fro") / max(1.0, np.linalg.norm(Y, ord="fro"))
        Y = Y_new
        if rel < tol:
            break

    return sym(Y)

def higham_covariance(S: np.ndarray, tol: float = 1e-10, max_iter: int = 200, eps: float = 1e-12) -> np.ndarray:
    C, d = cov_to_corr(S)
    C_higham = higham_nearest_correlation(C, tol=tol, max_iter=max_iter, eps=eps)
    S_higham = corr_to_cov(C_higham, d)
    return ensure_psd_numerical(S_higham, eps=eps)

In [5]:
def read_matrix_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    df.index = df.columns
    return df

In [6]:
cov_in = "testout_1.3.csv"
corr_in = "testout_1.4.csv"

S_df = read_matrix_csv(cov_in)
C_df = read_matrix_csv(corr_in)

S = S_df.to_numpy(float)
C = C_df.to_numpy(float)

In [7]:
S_31 = near_psd_covariance(S)
out_31 = pd.DataFrame(S_31, columns=S_df.columns, index=S_df.columns)
print("Task 3.1 - out_31:\n", out_31)

Task 3.1 - out_31:
           x1        x2        x3        x4        x5
x1  1.173986 -0.617989 -0.284559 -0.065152 -0.688287
x2 -0.617989  1.318197  0.017092  0.445696  0.139176
x3 -0.284559  0.017092  0.918102  0.354147  0.246056
x4 -0.065152  0.445696  0.354147  0.894764 -0.218717
x5 -0.688287  0.139176  0.246056 -0.218717  0.522607


In [8]:
C_32 = near_psd_correlation(C)
out_32 = pd.DataFrame(C_32, columns=C_df.columns, index=C_df.columns)
print("Task 3.2 - out_32:\n", out_32)

Task 3.2 - out_32:
           x1        x2        x3        x4        x5
x1  1.000000 -0.483199 -0.241787 -0.067767 -0.714761
x2 -0.483199  1.000000  0.015446  0.405660  0.178286
x3 -0.241787  0.015446  1.000000  0.488250  0.336248
x4 -0.067767  0.405660  0.488250  1.000000 -0.322136
x5 -0.714761  0.178286  0.336248 -0.322136  1.000000


In [9]:
S_33 = higham_covariance(S)
out_33 = pd.DataFrame(S_33, columns=S_df.columns, index=S_df.columns)
print("Task 3.3 - out_33:\n", out_33)

Task 3.3 - out_33:
           x1        x2        x3        x4        x5
x1  1.173986 -0.623870 -0.294335 -0.057677 -0.693888
x2 -0.623870  1.318197  0.016449  0.448579  0.143703
x3 -0.294335  0.016449  0.918102  0.354067  0.246866
x4 -0.057677  0.448579  0.354067  0.894764 -0.217062
x5 -0.693888  0.143703  0.246866 -0.217062  0.522607


In [10]:
C_34 = higham_nearest_correlation(C)
out_34 = pd.DataFrame(C_34, columns=C_df.columns, index=C_df.columns)
print("Task 3.4 - out_34:\n", out_34)

Task 3.4 - out_34:
           x1        x2        x3        x4        x5
x1  1.000000 -0.483199 -0.241787 -0.067767 -0.714761
x2 -0.483199  1.000000  0.015446  0.405660  0.178286
x3 -0.241787  0.015446  1.000000  0.488250  0.336248
x4 -0.067767  0.405660  0.488250  1.000000 -0.322136
x5 -0.714761  0.178286  0.336248 -0.322136  1.000000
