# Low-Rank Matrix and Tensor Factorization for Speed Field Reconstruction

- **Content**
  - Matrix factorization with gradient descent (GD), steepest gradient descent (SGD), and alternating least squares (ALS)
  - Hankel tensor factorization
  - Applications: NGSIM speed field reconstruction \& Seattle freeway traffic speed imputation

## Matrix Factorization (MF)

### Gradient Descent (GD)

In [None]:
import numpy as np
np.random.seed(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def MF_gd(dense_mat, sparse_mat, R, rho, alpha, maxiter = 100):
    N, T = sparse_mat.shape
    if np.isnan(sparse_mat).any() == False:
        ind = sparse_mat != 0
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        ind = ~np.isnan(sparse_mat)
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    W = 0.01 * np.random.randn(R, N)
    X = 0.01 * np.random.randn(R, T)
    obj = np.zeros(maxiter)
    show_iter = 10
    for it in range(maxiter):
        res_old = sparse_mat - W.T @ X
        grad_w = - X @ (res_old * ind).T + rho * W
        W = W - alpha * grad_w
        res_new = sparse_mat - W.T @ X
        grad_x = - W @ (res_new * ind) + rho * X
        X = X - alpha * grad_x
        mat_hat = W.T @ X
        obj[it] = (np.linalg.norm((sparse_mat - mat_hat) * ind, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(W, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(X, 'fro') ** 2 / 2)
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('Loss function: {:.6}'.format(obj[it]))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat, obj

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('../datasets/NGSIM-data-set/NGSIM_full.npy')
sparse_mat = np.load('../datasets/NGSIM-data-set/NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
R = 10
rho = 1e+1
alpha = 1e-4
maxiter = 1000
mat_hat, obj_gd = MF_gd(dense_mat, sparse_mat, R, rho, alpha, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_MF_gd_rec.png')

### Matrix Factorization with Steepest Gradient Descent (SGD)

In [None]:
import numpy as np
np.random.seed(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def MF_sgd(dense_mat, sparse_mat, R, rho, maxiter = 100):
    N, T = sparse_mat.shape
    if np.isnan(sparse_mat).any() == False:
        ind = sparse_mat != 0
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        ind = ~np.isnan(sparse_mat)
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    W = 0.01 * np.random.randn(R, N)
    X = 0.01 * np.random.randn(R, T)
    obj = np.zeros(maxiter)
    show_iter = 10
    for it in range(maxiter):
        res_old = sparse_mat - W.T @ X
        grad_w = - X @ (res_old * ind).T + rho * W
        a1 = (np.linalg.norm((grad_w.T @ X) * ind, 'fro') ** 2
              + rho * np.linalg.norm(grad_w, 'fro') ** 2)
        a2 = - np.sum(res_old * (grad_w.T @ X) * ind) + rho * np.sum(W * grad_w)
        alpha = a2 / a1
        W = W - alpha * grad_w
        res_new = sparse_mat - W.T @ X
        grad_x = - W @ (res_new * ind) + rho * X
        b1 = (np.linalg.norm((W.T @ grad_x) * ind, 'fro') ** 2
              + rho * np.linalg.norm(grad_x, 'fro') ** 2)
        b2 = - np.sum(res_new * (W.T @ grad_x) * ind) + rho * np.sum(X * grad_x)
        beta = b2 / b1
        X = X - beta * grad_x
        mat_hat = W.T @ X
        obj[it] = (np.linalg.norm((sparse_mat - mat_hat) * ind, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(W, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(X, 'fro') ** 2 / 2)
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('Loss function: {:.6}'.format(obj[it]))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat, obj

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('../datasets/NGSIM-data-set/NGSIM_full.npy')
sparse_mat = np.load('../datasets/NGSIM-data-set/NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
R = 10
rho = 1e+1
maxiter = 1000
mat_hat, obj_sgd = MF_sgd(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_MF_sgd_rec.png')

### Matrix Factorization with Alternating Least Squares (ALS)

In [None]:
import numpy as np
np.random.randn(1)

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def MF_als(dense_mat, sparse_mat, R, rho, maxiter = 100):
    N, T = sparse_mat.shape
    if np.isnan(sparse_mat).any() == False:
        ind = sparse_mat != 0
        pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    elif np.isnan(sparse_mat).any() == True:
        ind = ~np.isnan(sparse_mat)
        pos_test = np.where((dense_mat > 0) & (np.isnan(sparse_mat)))
        sparse_mat[np.isnan(sparse_mat)] = 0
    W = 0.01 * np.random.randn(R, N)
    X = 0.01 * np.random.randn(R, T)
    obj = np.zeros(maxiter)
    show_iter = 10
    for it in range(maxiter):
        for i in range(N):
            pos0 = np.where(sparse_mat[i, :] != 0)
            Xt = X[:, pos0[0]]
            W[:, i] = np.linalg.solve(Xt @ Xt.T + rho * np.eye(R), 
                                      Xt @ sparse_mat[i, pos0[0]])
        for t in range(T):
            pos0 = np.where(sparse_mat[:, t] != 0)
            Wi = W[:, pos0[0]]
            X[:, t] = np.linalg.solve(Wi @ Wi.T + rho * np.eye(R), 
                                      Wi @ sparse_mat[pos0[0], t])
        mat_hat = W.T @ X
        obj[it] = (np.linalg.norm((sparse_mat - mat_hat) * ind, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(W, 'fro') ** 2 / 2
                   + rho * np.linalg.norm(X, 'fro') ** 2 / 2)
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('Loss function: {:.6}'.format(obj[it]))
            print('MAPE: {:.6}'.format(compute_mape(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_mat[pos_test], 
                                                    mat_hat[pos_test])))
            print()
    return mat_hat, obj

In [None]:
import numpy as np
np.random.seed(1)

import matplotlib.pyplot as plt
import seaborn as sns
import imageio as io
plt.rcParams['font.size'] = 12

dense_mat = np.load('../datasets/NGSIM-data-set/NGSIM_full.npy')
sparse_mat = np.load('../datasets/NGSIM-data-set/NGSIM_80missing.npy')

def plot_speed_field(data, filename):
    fig = plt.figure(figsize = (2.5 * 2.5, 2.5))
    plt.matshow(data, cmap='jet_r', origin='lower', 
                vmin = 0, vmax = 25, fignum = 1)
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.xticks([0, 100, 200, 300, 400, 500], [0, 500, 1000, 1500, 2000, 2500])
    plt.yticks([0, 100, 200], [0, 300, 600])
    plt.xlabel('Time (s)')
    plt.ylabel('Location (m)')
    cbar = plt.colorbar(fraction = 0.015)
    cbar.ax.set_ylabel('Speed (mph)')
    plt.show()
    fig.savefig(filename, bbox_inches = 'tight', dpi = 300)

plot_speed_field(dense_mat, 'speed_field_fully_data.png')
plot_speed_field(sparse_mat, 'speed_field_80_missing_data.png')

import time
start = time.time()
R = 10
rho = 1e+1
maxiter = 200
mat_hat, obj_als = MF_als(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
plot_speed_field(mat_hat, 'speed_field_MF_als_rec.png')

### Objective Function $f$ vs. Iteration

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['mathtext.fontset'] = 'cm'

show_it = 200
fig = plt.figure(figsize = (4, 3.5))
plt.yscale("log")
plt.plot(obj_gd, 'blue', linewidth = 2.5)
plt.plot(obj_sgd, 'green', linewidth = 2.5)
plt.plot(obj_als, 'red', linewidth = 2.5)
plt.xlim([0, show_it])
plt.xlabel('Iteration')
plt.ylabel(r'Objective function $f$')
plt.legend(['GD', 'SGD', 'ALS'])
plt.savefig("MF_convergence_over_gd_and_als_within_{}iter.pdf".format(show_it), 
            format = "pdf", bbox_inches = "tight")
plt.show()

### Seattle Freeway Traffic Speed Imputation

#### Matrix Factorization with GD

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
R = 10
rho = 1e+2
alpha = 2e-5
maxiter = 1000
mat_hat, obj_gd = MF_gd(dense_mat, sparse_mat, R, rho, alpha, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

#### Matrix Factorization with SGD

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
R = 10
rho = 1e+2
maxiter = 1000
mat_hat, obj_sgd = MF_sgd(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

#### Matrix Factorization with ALS

In [None]:
import numpy as np
np.random.seed(1000)

dense_tensor = np.load('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor

import time
start = time.time()
R = 10
rho = 1e+2
maxiter = 200
mat_hat, obj_als = MF_als(dense_mat, sparse_mat, R, rho, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))

#### Objective Function vs. Iteration

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['mathtext.fontset'] = 'cm'

show_it = 200
fig = plt.figure(figsize = (4, 3.5))
plt.yscale("log")
plt.plot(obj_gd, 'blue', linewidth = 2.5)
plt.plot(obj_sgd, 'green', linewidth = 2.5)
plt.plot(obj_als, 'red', linewidth = 2.5)
plt.xlim([0, show_it])
plt.xlabel('Iteration')
plt.ylabel(r'Objective function $f$')
plt.legend(['GD', 'SGD', 'ALS'])
plt.savefig("MF_convergence_over_gd_and_als_within_{}iter_Seattle.pdf".format(show_it), 
            format = "pdf", bbox_inches = "tight")
plt.show()

### License

<div class="alert alert-block alert-danger">
<b>This work is released under the MIT license.</b>
</div>