# Low-Rank Matrix and Tensor Factorization for Speed Field Reconstruction

- **Content**
  - Matrix factorization with gradient descent (GD), steepest gradient descent (SGD), and alternating least squares (ALS)
  - Hankel tensor factorization
  - Applications: NGSIM speed field reconstruction \& Seattle freeway traffic speed imputation

## Matrix Factorization (MF)

### Gradient Descent (GD)

In [None]:
import numpy as np
np.random.seed(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def MF_gd(dense_mat, sparse_mat, R, rho, alpha, maxiter = 100):
    N, T = sparse_mat.shape
    if np.isnan(sparse_mat).any() == False:
        ind = sparse_mat != 0
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        ind = ~np.isnan(sparse_mat)
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    W = 0.01 * np.random.randn(R, N)
    X = 0.01 * np.random.randn(R, T)
    obj = np.zeros(maxiter)
    show_iter = 10
    for it in range(maxiter):
        res_old = sparse_mat - W.T @ X
        grad_w = - X @ (res_old * ind).T + rho * W
        W = W - alpha * grad_w
        res_new = sparse_mat - W.T @ X
        grad_x = - W @ (res_new * ind) + rho * X
        X = X - alpha * grad_x
        mat_hat = W.T @ X
        obj[it] = (np.linalg.norm((sparse_mat - mat_hat) * ind, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(W, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(X, 'fro') ** 2 / 2)
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('Loss function: {:.6}'.format(obj[it]))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat, obj

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('../datasets/NGSIM-data-set/NGSIM_full.npy')
sparse_mat = np.load('../datasets/NGSIM-data-set/NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
R = 10
rho = 1e+1
alpha = 1e-4
maxiter = 1000
mat_hat, obj_gd = MF_gd(dense_mat, sparse_mat, R, rho, alpha, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_MF_gd_rec.png')

### Matrix Factorization with Steepest Gradient Descent (SGD)

In [None]:
import numpy as np
np.random.seed(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def MF_sgd(dense_mat, sparse_mat, R, rho, maxiter = 100):
    N, T = sparse_mat.shape
    if np.isnan(sparse_mat).any() == False:
        ind = sparse_mat != 0
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        ind = ~np.isnan(sparse_mat)
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    W = 0.01 * np.random.randn(R, N)
    X = 0.01 * np.random.randn(R, T)
    obj = np.zeros(maxiter)
    show_iter = 10
    for it in range(maxiter):
        res_old = sparse_mat - W.T @ X
        grad_w = - X @ (res_old * ind).T + rho * W
        a1 = (np.linalg.norm((grad_w.T @ X) * ind, 'fro') ** 2
              + rho * np.linalg.norm(grad_w, 'fro') ** 2)
        a2 = - np.sum(res_old * (grad_w.T @ X) * ind) + rho * np.sum(W * grad_w)
        alpha = a2 / a1
        W = W - alpha * grad_w
        res_new = sparse_mat - W.T @ X
        grad_x = - W @ (res_new * ind) + rho * X
        b1 = (np.linalg.norm((W.T @ grad_x) * ind, 'fro') ** 2
              + rho * np.linalg.norm(grad_x, 'fro') ** 2)
        b2 = - np.sum(res_new * (W.T @ grad_x) * ind) + rho * np.sum(X * grad_x)
        beta = b2 / b1
        X = X - beta * grad_x
        mat_hat = W.T @ X
        obj[it] = (np.linalg.norm((sparse_mat - mat_hat) * ind, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(W, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(X, 'fro') ** 2 / 2)
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('Loss function: {:.6}'.format(obj[it]))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat, obj

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('../datasets/NGSIM-data-set/NGSIM_full.npy')
sparse_mat = np.load('../datasets/NGSIM-data-set/NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
R = 10
rho = 1e+1
maxiter = 1000
mat_hat, obj_sgd = MF_sgd(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_MF_sgd_rec.png')

### Matrix Factorization with Alternating Least Squares (ALS)

In [None]:
import numpy as np
np.random.randn(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def MF_als(dense_mat, sparse_mat, R, rho, maxiter = 100):
    N, T = sparse_mat.shape
    if np.isnan(sparse_mat).any() == False:
        ind = sparse_mat != 0
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        ind = ~np.isnan(sparse_mat)
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    W = 0.01 * np.random.randn(R, N)
    X = 0.01 * np.random.randn(R, T)
    obj = np.zeros(maxiter)
    show_iter = 10
    for it in range(maxiter):
        for i in range(N):
            pos0 = np.where(sparse_mat[i, :] != 0)
            Xt = X[:, pos0[0]]
            W[:, i] = np.linalg.solve(Xt @ Xt.T + rho * np.eye(R), 
                                      Xt @ sparse_mat[i, pos0[0]])
        for t in range(T):
            pos0 = np.where(sparse_mat[:, t] != 0)
            Wi = W[:, pos0[0]]
            X[:, t] = np.linalg.solve(Wi @ Wi.T + rho * np.eye(R), 
                                      Wi @ sparse_mat[pos0[0], t])
        mat_hat = W.T @ X
        obj[it] = (np.linalg.norm((sparse_mat - mat_hat) * ind, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(W, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(X, 'fro') ** 2 / 2)
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('Loss function: {:.6}'.format(obj[it]))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat, obj

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('../datasets/NGSIM-data-set/NGSIM_full.npy')
sparse_mat = np.load('../datasets/NGSIM-data-set/NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
R = 10
rho = 1e+1
maxiter = 200
mat_hat, obj_als = MF_als(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_MF_als_rec.png')

### Objective Function $f$ vs. Iteration

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['mathtext.fontset'] = 'cm'

show_it = 200
fig = plt.figure(figsize = (4, 3.5))
plt.yscale("log")
plt.plot(obj_gd, 'blue', linewidth = 2.5)
plt.plot(obj_sgd, 'green', linewidth = 2.5)
plt.plot(obj_als, 'red', linewidth = 2.5)
plt.xlim([0, show_it])
plt.xlabel('Iteration')
plt.ylabel(r'Objective function $f$')
plt.legend(['GD', 'SGD', 'ALS'])
plt.savefig("MF_convergence_over_gd_and_als_within_{}iter.pdf".format(show_it), 
            format = "pdf", bbox_inches = "tight")
plt.show()

### Seattle Freeway Traffic Speed Imputation

#### Matrix Factorization with GD

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
R = 10
rho = 1e+2
alpha = 2e-5
maxiter = 1000
mat_hat, obj_gd = MF_gd(dense_mat, sparse_mat, R, rho, alpha, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

#### Matrix Factorization with SGD

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
R = 10
rho = 1e+2
maxiter = 1000
mat_hat, obj_sgd = MF_sgd(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

#### Matrix Factorization with ALS

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
R = 10
rho = 1e+2
maxiter = 200
mat_hat, obj_als = MF_als(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

#### Objective Function vs. Iteration

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['mathtext.fontset'] = 'cm'

show_it = 200
fig = plt.figure(figsize = (4, 3.5))
plt.yscale("log")
plt.plot(obj_gd, 'blue', linewidth = 2.5)
plt.plot(obj_sgd, 'green', linewidth = 2.5)
plt.plot(obj_als, 'red', linewidth = 2.5)
plt.xlim([0, show_it])
plt.xlabel('Iteration')
plt.ylabel(r'Objective function $f$')
plt.legend(['GD', 'SGD', 'ALS'])
plt.savefig("MF_convergence_over_gd_and_als_within_{}iter_Seattle.pdf".format(show_it), 
            format = "pdf", bbox_inches = "tight")
plt.show()

## Smoothing Matrix Factorization

In [None]:
import numpy as np
np.random.randn(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def generate_Psi(n):
    mat1 = np.append(np.zeros((n - 1, 1)), np.eye(n - 1), axis = 1)
    mat2 = np.append(np.eye(n - 1), np.zeros((n - 1, 1)), axis = 1)
    Psi = mat1 - mat2
    return Psi

def update_cg(var, r, q, Aq, rold):
    alpha = rold / np.inner(q, Aq)
    var = var + alpha * q
    r = r - alpha * Aq
    rnew = np.inner(r, r)
    q = r + (rnew / rold) * q
    return var, r, q, rnew

def ell_w(ind, W, X, Psi1, rho, lmbda):
    return X @ ((W.T @ X) * ind).T + rho * W + lmbda * W @ Psi1.T @ Psi1

def conj_grad_w(sparse_mat, ind, W, X, Psi1, rho, lmbda, maxiter = 5):
    rank, dim1 = W.shape
    w = np.reshape(W, -1, order = 'F')
    r = np.reshape(X @ sparse_mat.T 
                   - ell_w(ind, W, X, Psi1, rho, lmbda), -1, order = 'F')
    q = r.copy()
    rold = np.inner(r, r)
    for it in range(maxiter):
        Q = np.reshape(q, (rank, dim1), order = 'F')
        Aq = np.reshape(ell_w(ind, Q, X, Psi1, rho, lmbda), -1, order = 'F')
        w, r, q, rold = update_cg(w, r, q, Aq, rold)
    return np.reshape(w, (rank, dim1), order = 'F')

def ell_x(ind, W, X, Psi2, rho, lmbda):
    return W @ ((W.T @ X) * ind) + rho * X + lmbda * X @ Psi2.T @ Psi2

def conj_grad_x(sparse_mat, ind, W, X, Psi2, rho, lmbda, maxiter = 5):
    rank, dim2 = X.shape
    x = np.reshape(X, -1, order = 'F')
    r = np.reshape(W @ sparse_mat 
                   - ell_x(ind, W, X, Psi2, rho, lmbda), -1, order = 'F')
    q = r.copy()
    rold = np.inner(r, r)
    for it in range(maxiter):
        Q = np.reshape(q, (rank, dim2), order = 'F')
        Aq = np.reshape(ell_x(ind, W, Q, Psi2, rho, lmbda), -1, order = 'F')
        x, r, q, rold = update_cg(x, r, q, Aq, rold)
    return np.reshape(x, (rank, dim2), order = 'F')

def SMF(dense_mat, sparse_mat, rank, rho, lmbda, maxiter = 50):
    dim1, dim2 = sparse_mat.shape
    W = 0.01 * np.random.randn(rank, dim1)
    X = 0.01 * np.random.randn(rank, dim2)
    if np.isnan(sparse_mat).any() == False:
        ind = sparse_mat != 0
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        ind = ~np.isnan(sparse_mat)
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    Psi1 = generate_Psi(dim1)
    Psi2 = generate_Psi(dim2)
    show_iter = 10
    for it in range(maxiter):
        W = conj_grad_w(sparse_mat, ind, W, X, Psi1, rho, lmbda)
        X = conj_grad_x(sparse_mat, ind, W, X, Psi2, rho, lmbda)
        mat_hat = W.T @ X
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat

### Speed Field Reconstruction

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('../datasets/NGSIM-data-set/NGSIM_full.npy')
sparse_mat = np.load('../datasets/NGSIM-data-set/NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
R = 10
rho = 1e+1
lmbda = 1e+2
maxiter = 200
mat_hat = SMF(dense_mat, sparse_mat, R, rho, lmbda, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_SMF_rec_lambda_{}.png'.format(int(lmbda)))

### Seattle Freeway Traffic Speed Imputation

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
R = 10
rho = 1e+2
lmbda = 5e+2
maxiter = 200
mat_hat = SMF(dense_mat, sparse_mat, R, rho, lmbda, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

## Hankel Tensor Factorization

In [None]:
import numpy as np
np.random.randn(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def hankel(matrix, tau):
    N, T = matrix.shape
    tensor = np.zeros((N, T - tau + 1, tau))
    for k in range(tau):
        tensor[:, :, k] = matrix[:, k : T - tau + k + 1]
    return tensor

def inv_hankel(tensor):
    N, dim, tau = tensor.shape
    T = dim + tau - 1
    matrix = np.zeros((N, T))
    binary = np.zeros((N, T))
    for k in range(tau):
        temp1 = np.zeros((N, T))
        temp1[:, k : dim + k] = tensor[:, :, k]
        temp2 = np.zeros((N, T))
        temp2[:, k : dim + k] = 1
        matrix += temp1
        binary += temp2
    return matrix / binary

def ten2mat(tensor, mode):
    return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order = 'F')

def kr_prod(a, b):
    return np.einsum('ir, jr -> ijr', a, b).reshape(a.shape[0] * b.shape[0], -1)

def update_cg(var, r, q, Aq, rold):
    alpha = rold / np.inner(q, Aq)
    var = var + alpha * q
    r = r - alpha * Aq
    rnew = np.inner(r, r)
    q = r + (rnew / rold) * q
    return var, r, q, rnew

def ell(ind_mode, f_mat, mat):
    return ((f_mat @ mat.T) * ind_mode) @ mat

def conj_grad(sparse_tensor, ind, fact_mat, mode, maxiter = 5):
    dim, rank = fact_mat[mode].shape
    ind_mode = ten2mat(ind, mode)
    f = np.reshape(fact_mat[mode], -1, order = 'F')
    temp = []
    for k in range(3):
        if k != mode:
            temp.append(fact_mat[k])
    mat = kr_prod(temp[-1], temp[0])
    r = np.reshape(ten2mat(sparse_tensor, mode) @ mat
                   - ell(ind_mode, fact_mat[mode], mat), -1, order = 'F')
    q = r.copy()
    rold = np.inner(r, r)
    for it in range(maxiter):
        Q = np.reshape(q, (dim, rank), order = 'F')
        Aq = np.reshape(ell(ind_mode, Q, mat), -1, order = 'F')
        alpha = rold / np.inner(q, Aq)
        f, r, q, rold = update_cg(f, r, q, Aq, rold)
    return np.reshape(f, (dim, rank), order = 'F')

def HTF(dense_mat, sparse_mat, tau, rank, rho, maxiter = 50):
    if np.isnan(sparse_mat).any() == False:
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    sparse_tensor = hankel(sparse_mat, tau)
    dim = sparse_tensor.shape
    fact_mat = []
    for k in range(3):
        fact_mat.append(0.01 * np.random.randn(dim[k], rank))
    ind = sparse_tensor != 0
    show_iter = 10
    for it in range(maxiter):
        for k in range(3):
            fact_mat[k] = conj_grad(sparse_tensor, ind, fact_mat, k)
        tensor_hat = np.einsum('ur, vr, xr -> uvx', 
                               fact_mat[0], fact_mat[1], fact_mat[2])
        mat_hat = inv_hankel(tensor_hat)
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat

### Speed Field Reconstruction

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('NGSIM_full.npy')
sparse_mat = np.load('NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
tau = 10
R = 10
rho = 1e+1
maxiter = 200
mat_hat = HTF(dense_mat, sparse_mat, tau, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_HTF_rec_tau_{}.png'.format(int(tau)))

### Seattle Freeway Traffic Speed Imputation

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
tau = 6
R = 10
rho = 1e+2
maxiter = 200
mat_hat = HTF(dense_mat, sparse_mat, tau, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

## Dynamic System (Fluid Flow)

The dataset is available at [https://github.com/xinychen/vars/tree/main/datasets/fluid-flow](https://github.com/xinychen/vars/tree/main/datasets/fluid-flow).

In [None]:
import numpy as np
import seaborn as sns
import scipy.io
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
color = scipy.io.loadmat('CCcool.mat')
cc = color['CC']
newcmp = LinearSegmentedColormap.from_list('', cc)

tensor = np.load('tensor.npz')['arr_0']
tensor = tensor[:, :, : 150]
M, N, T = tensor.shape

plt.rcParams['font.size'] = 13
plt.rcParams['mathtext.fontset'] = 'cm'
fig = plt.figure(figsize = (12, 4))
id = np.array([5, 10, 15, 20, 25, 30, 35, 40])
for t in range(8):
    ax = fig.add_subplot(2, 4, t + 1)
    ax = sns.heatmap(tensor[:, :, id[t] - 1], cmap = newcmp, vmin = -5, vmax = 5, cbar = False)
    ax.contour(np.linspace(0, N, N), np.linspace(0, M, M), tensor[:, :, id[t] - 1],
               levels = np.linspace(0.15, 15, 30), colors = 'k', linewidths = 0.7)
    ax.contour(np.linspace(0, N, N), np.linspace(0, M, M), tensor[:, :, id[t] - 1],
               levels = np.linspace(-15, -0.15, 30), colors = 'k', linestyles = 'dashed', linewidths = 0.7)
    plt.xticks([])
    plt.yticks([])
    plt.title(r'$t = {}$'.format(id[t]))
    for _, spine in ax.spines.items():
        spine.set_visible(True)
plt.show()
fig.savefig('fluid_flow_heatmap_2_times_4.png', bbox_inches = 'tight')

### License

<div class="alert alert-block alert-danger">
<b>This work is released under the MIT license.</b>
</div>