# Low-Rank Autoregressive Matrix Completion (LAMC)


In [1]:
import numpy as np

def ten2mat(tensor, mode):
    return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order = 'F')

def mat2ten(mat, dim, mode):
    index = list()
    index.append(mode)
    for i in range(dim.shape[0]):
        if i != mode:
            index.append(i)
    return np.moveaxis(np.reshape(mat, list(dim[index]), order = 'F'), 0, mode)

def svt_tnn(mat, tau, theta):
    [m, n] = mat.shape
    if 2 * m < n:
        u, s, v = np.linalg.svd(mat @ mat.T, full_matrices = 0)
        s = np.sqrt(s)
        idx = np.sum(s > tau)
        mid = np.zeros(idx)
        mid[: theta] = 1
        mid[theta : idx] = (s[theta : idx] - tau) / s[theta : idx]
        return (u[:, : idx] @ np.diag(mid)) @ (u[:, : idx].T @ mat)
    elif m > 2 * n:
        return svt_tnn(mat.T, tau, theta).T
    u, s, v = np.linalg.svd(mat, full_matrices = 0)
    idx = np.sum(s > tau)
    vec = s[: idx].copy()
    vec[theta : idx] = s[theta : idx] - tau
    return u[:, : idx] @ np.diag(vec) @ v[: idx, :]

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return  np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

from scipy import sparse
from scipy.sparse.linalg import spsolve as spsolve

def generate_Psi(dim_time, time_lags):
    Psis = []
    max_lag = np.max(time_lags)
    for i in range(len(time_lags) + 1):
        row = np.arange(0, dim_time - max_lag)
        if i == 0:
            col = np.arange(0, dim_time - max_lag) + max_lag
        else:
            col = np.arange(0, dim_time - max_lag) + max_lag - time_lags[i - 1]
        data = np.ones(dim_time - max_lag)
        Psi = sparse.coo_matrix((data, (row, col)), shape = (dim_time - max_lag, dim_time))
        Psis.append(Psi)
    return Psis

def imputer(dense_mat, sparse_mat, time_lags, rho0, lambda0, theta, 
            epsilon = 1e-4, maxiter = 100, K = 3):
    """Low-Rank Autoregressive Matrix Completion, LAMC-imputer."""
    
    dim = np.array(sparse_mat.shape)
    d = len(time_lags)
    max_lag = np.max(time_lags)
    pos_missing = np.where(sparse_mat == 0)
    pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    dense_test = dense_mat[pos_test]
    del dense_mat
    
    T = np.zeros(dim)
    Z = sparse_mat.copy()
    Z[pos_missing] = np.mean(sparse_mat[sparse_mat != 0])
    A = 0.001 * np.random.rand(dim[0], d)
    Psis = generate_Psi(dim[1], time_lags)
    iden = sparse.coo_matrix((np.ones(dim[1]), (np.arange(0, dim[1]), np.arange(0, dim[1]))), shape = (dim[1], dim[1]))
    it = 0
    ind = np.zeros((d, dim[1] - max_lag), dtype = np.int_)
    for i in range(d):
        ind[i, :] = np.arange(max_lag - time_lags[i], dim[1] - time_lags[i])
    last_mat = sparse_mat.copy()
    snorm = np.linalg.norm(sparse_mat, 'fro')
    rho = rho0
    while True:
        B = []
        for m in range(dim[0]):
            Psis0 = Psis.copy()
            for i in range(d):
                Psis0[i + 1] = A[m, i] * Psis[i + 1]
            B.append(Psis0[0] - sum(Psis0[1 :]))
        for k in range(K):
            rho = min(rho * 1.05, 1e5)
            X = svt_tnn(Z - T / rho, 1 / rho, theta)
            temp0 = rho / lambda0 * (X + T / rho)
            mat = np.zeros(dim)
            for m in range(dim[0]):
                mat[m, :] = spsolve(B[m].T @ B[m] + rho * iden / lambda0, temp0[m, :])
            Z[pos_missing] = mat[pos_missing]
            T = T + rho * (X - Z)
        for m in range(dim[0]):
            Vm = Z[m, ind].T
            A[m, :] = np.linalg.pinv(Vm) @ Z[m, max_lag :]
        tol = np.linalg.norm((X - last_mat), 'fro') / snorm
        last_mat = X.copy()
        it += 1
        if it % 200 == 0:
            print('Iter: {}'.format(it))
            print('Tolerance: {:.6}'.format(tol))
            print('MAPE: {:.6}'.format(compute_mape(dense_test, X[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_test, X[pos_test])))
            print()
        if (tol < epsilon) or (it >= maxiter):
            break

    print('Total iteration: {}'.format(it))
    print('Tolerance: {:.6}'.format(tol))
    print('Imputation MAPE: {:.6}'.format(compute_mape(dense_test, X[pos_test])))
    print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_test, X[pos_test])))
    print()
    
    return X

## On the Seattle Freeway Traffic Speed Dataset

### Random Missing Data

In [None]:
import numpy as np
import time

for r in [0.3, 0.7, 0.9]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    ## Random Missing (RM)
    dense_tensor = np.load('tensor.npz')['arr_0'].transpose(0, 2, 1)
    dim1, dim2, dim3 = dense_tensor.shape
    np.random.seed(1000)
    sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)
    dense_mat = dense_tensor.transpose(0, 2, 1).reshape([dim1, dim2 * dim3])
    sparse_mat = sparse_tensor.transpose(0, 2, 1).reshape([dim1, dim2 * dim3])
    del dense_tensor, sparse_tensor

    for c in [1/10, 1/5, 1, 5, 10]:
        for theta in [5, 10, 15, 20, 25]:
            print('c = {}'.format(c))
            print('theta = {}'.format(theta))
            start = time.time()
            time_lags = np.arange(1, 7)
            alpha = np.ones(3) / 3
            lmbda = 1e-5
            gamma = c * lmbda
            mat_hat = imputer(dense_mat[:, : 14 * dim2], sparse_mat[:, : 14 * dim2],
                              time_lags, lmbda, gamma, theta)
            end = time.time()
            print('Running time: %d seconds'%(end - start))
            print()

### Non-Random Missing Data

In [None]:
import numpy as np
import time

for r in [0.3, 0.7]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    ## Non-random Missing (NM)
    dense_tensor = np.load('tensor.npz')['arr_0'].transpose(0, 2, 1)
    dim1, dim2, dim3 = dense_tensor.shape
    np.random.seed(1000)
    sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim3) + 0.5 - missing_rate)[:, None, :]
    dense_mat = dense_tensor.transpose(0, 2, 1).reshape([dim1, dim2 * dim3])
    sparse_mat = sparse_tensor.transpose(0, 2, 1).reshape([dim1, dim2 * dim3])
    del dense_tensor, sparse_tensor

    for c in [1/10, 1/5, 1, 5, 10]:
        for theta in [5, 10, 15, 20, 25]:
            print('c = {}'.format(c))
            print('theta = {}'.format(theta))
            start = time.time()
            time_lags = np.arange(1, 7)
            alpha = np.ones(3) / 3
            lmbda = 1e-5
            gamma = c * lmbda
            mat_hat = imputer(dense_mat[:, : 14 * dim2], sparse_mat[:, : 14 * dim2],
                              time_lags, lmbda, gamma, theta)
            end = time.time()
            print('Running time: %d seconds'%(end - start))
            print()

### Block-Out Missing

In [None]:
import numpy as np
import time

for r in [0.3]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    ## Block-out Missing (BM)
    dense_tensor = np.load('tensor.npz')['arr_0'].transpose(0, 2, 1)
    dim1, dim2, dim3 = dense_tensor.shape
    block_window = 12
    np.random.seed(1000)
    vec = np.random.rand(int(dim2 * dim3 / block_window))
    temp = np.array([vec] * block_window)
    vec = temp.reshape([dim2 * dim3], order = 'F')
    sparse_tensor = dense_tensor * mat2ten(np.ones((dim1, dim2 * dim3)) * np.round(vec + 0.5 - missing_rate)[None, :], np.array([dim1, dim2, dim3]), 0)
    dense_mat = dense_tensor.transpose(0, 2, 1).reshape([dim1, dim2 * dim3])
    sparse_mat = sparse_tensor.transpose(0, 2, 1).reshape([dim1, dim2 * dim3])
    del dense_tensor, sparse_tensor

    for c in [1/10, 1/5, 1, 5, 10]:
        for theta in [5, 10, 15, 20, 25]:
            print('c = {}'.format(c))
            print('theta = {}'.format(theta))
            start = time.time()
            time_lags = np.arange(1, 7)
            alpha = np.ones(3) / 3
            lmbda = 1e-5
            gamma = c * lmbda
            mat_hat = imputer(dense_mat[:, : 14 * dim2], sparse_mat[:, : 14 * dim2],
                              time_lags, lmbda, gamma, theta)
            end = time.time()
            print('Running time: %d seconds'%(end - start))
            print()

## On the Portland Traffic Volume Dataset

### Random Missing

In [None]:
import numpy as np
import time

for r in [0.3, 0.7, 0.9]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    # Random Missing (RM)
    dense_mat = np.load('volume.npy')
    dim1, dim2 = dense_mat.shape
    dim = np.array([dim1, 96, 31])
    dense_tensor = mat2ten(dense_mat, dim, 0)
    np.random.seed(1000)
    sparse_tensor = mat2ten(dense_mat * np.round(np.random.rand(dim1, dim2) + 0.5 - missing_rate), dim, 0)
    dense_mat = dense_tensor.transpose(0, 2, 1).reshape([dim1, dim2])
    sparse_mat = sparse_tensor.transpose(0, 2, 1).reshape([dim1, dim2])
    del dense_tensor, sparse_tensor

    for c in [1/10, 1/5, 1, 5, 10]:
        for theta in [5, 10, 15, 20, 25]:
            print('c = {}'.format(c))
            print('theta = {}'.format(theta))
            start = time.time()
            time_lags = np.arange(1, 5)
            alpha = np.ones(3) / 3
            lmbda = 1e-5
            gamma = c * lmbda
            mat_hat = imputer(dense_mat[:, : 14 * 96], sparse_mat[:, : 14 * 96],
                              time_lags, lmbda, gamma, theta)
            end = time.time()
            print('Running time: %d seconds'%(end - start))
            print()

### Non-Random Missing

In [None]:
import numpy as np
import time

for r in [0.3, 0.7]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    # Non-random Missing (NM)
    dense_mat = np.load('volume.npy')
    dim1, dim2 = dense_mat.shape
    dim = np.array([dim1, 96, 31])
    dense_tensor = mat2ten(dense_mat, dim, 0)
    np.random.seed(1000)
    sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim[2]) + 0.5 - missing_rate)[:, None, :]
    dense_mat = dense_tensor.transpose(0, 2, 1).reshape([dim1, dim2])
    sparse_mat = sparse_tensor.transpose(0, 2, 1).reshape([dim1, dim2])
    del dense_tensor, sparse_tensor

    for c in [1/10, 1/5, 1, 5, 10]:
        for theta in [5, 10, 15, 20, 25]:
            print('c = {}'.format(c))
            print('theta = {}'.format(theta))
            start = time.time()
            time_lags = np.arange(1, 5)
            alpha = np.ones(3) / 3
            lmbda = 1e-5
            gamma = c * lmbda
            mat_hat = imputer(dense_mat[:, : 14 * 96], sparse_mat[:, : 14 * 96],
                              time_lags, lmbda, gamma, theta)
            end = time.time()
            print('Running time: %d seconds'%(end - start))
            print()

### Block-Out Missing

In [None]:
import numpy as np
import time

for r in [0.3]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    ## Block-out Missing (BM)
    dense_mat = np.load('volume.npy')
    dim1, dim2 = dense_mat.shape
    dim = np.array([dim1, 96, 31])
    dense_tensor = mat2ten(dense_mat, dim, 0)
    block_window = 4
    np.random.seed(1000)
    vec = np.random.rand(int(dim2 / block_window))
    temp = np.array([vec] * block_window)
    vec = temp.reshape([dim2], order = 'F')
    sparse_tensor = mat2ten(dense_mat * np.round(vec + 0.5 - missing_rate)[None, :], dim, 0)
    dense_mat = dense_tensor.transpose(0, 2, 1).reshape([dim1, dim2])
    sparse_mat = sparse_tensor.transpose(0, 2, 1).reshape([dim1, dim2])
    del dense_tensor, sparse_tensor

    for c in [1/10, 1/5, 1, 5, 10]:
        for theta in [5, 10, 15, 20, 25]:
            print('c = {}'.format(c))
            print('theta = {}'.format(theta))
            start = time.time()
            time_lags = np.arange(1, 5)
            alpha = np.ones(3) / 3
            lmbda = 1e-5
            gamma = c * lmbda
            mat_hat = imputer(dense_mat[:, : 14 * 96], sparse_mat[:, : 14 * 96],
                              time_lags, lmbda, gamma, theta)
            end = time.time()
            print('Running time: %d seconds'%(end - start))
            print()

### License

<div class="alert alert-block alert-danger">
<b>This work is released under the MIT license.</b>
</div>