# About This Notebook

This notebook shows how to implement **Low-Rank Tensor Completion with Truncated Nuclear Norm minimization (LRTC-TNN)** on some real-world data sets. For an in-depth discussion of LRTC-TNN, please see our article [1].

<div class="alert alert-block alert-info">
<font color="black">
<b>[1]</b> Xinyu Chen, Jinming Yang, Lijun Sun (2020). <b>A Nonconvex Low-Rank Tensor Completion Model for Spatiotemporal Traffic Data Imputation</b>. arXiv.2003.10271. <a href="https://arxiv.org/abs/2003.10271" title="PDF"><b>[PDF]</b></a> 
</font>
</div>


## Quick Run

This notebook is publicly available for any usage at our data imputation project. Please check out [**transdim - GitHub**](https://github.com/xinychen/transdim).


## Low-Rank Tensor Completion

We start by importing the necessary dependencies.

In [1]:
import numpy as np
from numpy.linalg import inv as inv

### Tensor Unfolding (`ten2mat`) and Matrix Folding (`mat2ten`)

Using numpy reshape to perform 3rd rank tensor unfold operation. [[**link**](https://stackoverflow.com/questions/49970141/using-numpy-reshape-to-perform-3rd-rank-tensor-unfold-operation)]

In [2]:
def ten2mat(tensor, mode):
    return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order = 'F')

In [3]:
def mat2ten(mat, tensor_size, mode):
    index = list()
    index.append(mode)
    for i in range(tensor_size.shape[0]):
        if i != mode:
            index.append(i)
    return np.moveaxis(np.reshape(mat, list(tensor_size[index]), order = 'F'), 0, mode)

### Singular Value Thresholding (SVT) for TNN

In [4]:
def svt_tnn(mat, tau, theta):
    [m, n] = mat.shape
    if 2 * m < n:
        u, s, v = np.linalg.svd(mat @ mat.T, full_matrices = 0)
        s = np.sqrt(s)
        idx = np.sum(s > tau)
        mid = np.zeros(idx)
        mid[:theta] = 1
        mid[theta:idx] = (s[theta:idx] - tau) / s[theta:idx]
        return (u[:, :idx] @ np.diag(mid)) @ (u[:, :idx].T @ mat)
    elif m > 2 * n:
        return svt_tnn(mat.T, tau, theta).T
    u, s, v = np.linalg.svd(mat, full_matrices = 0)
    idx = np.sum(s > tau)
    vec = s[:idx].copy()
    vec[theta:idx] = s[theta:idx] - tau
    return u[:, :idx] @ np.diag(vec) @ v[:idx, :]

<div class="alert alert-block alert-warning">
<ul>
<li><b><code>compute_mape</code>:</b> <font color="black">Compute the value of Mean Absolute Percentage Error (MAPE).</font></li>
<li><b><code>compute_rmse</code>:</b> <font color="black">Compute the value of Root Mean Square Error (RMSE).</font></li>
</ul>
</div>

> Note that $$\mathrm{MAPE}=\frac{1}{n} \sum_{i=1}^{n} \frac{\left|y_{i}-\hat{y}_{i}\right|}{y_{i}} \times 100, \quad\mathrm{RMSE}=\sqrt{\frac{1}{n} \sum_{i=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2}},$$ where $n$ is the total number of estimated values, and $y_i$ and $\hat{y}_i$ are the actual value and its estimation, respectively.

In [5]:
def compute_rmse(var, var_hat):
    return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

### Define LRTC-TNN Function with `Numpy`

In [6]:
def LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter):
    """Low-Rank Tenor Completion with Truncated Nuclear Norm, LRTC-TNN."""
    
    dim = np.array(sparse_tensor.shape)
    pos_missing = np.where(sparse_tensor == 0)
    pos_test = np.where((dense_tensor != 0) & (sparse_tensor == 0))
    dense_test = dense_tensor[pos_test]
    del dense_tensor
    
    X = np.zeros(np.insert(dim, 0, len(dim))) # \boldsymbol{\mathcal{X}}
    T = np.zeros(np.insert(dim, 0, len(dim))) # \boldsymbol{\mathcal{T}}
    Z = sparse_tensor.copy()
    last_tensor = sparse_tensor.copy()
    snorm = np.sqrt(np.sum(sparse_tensor ** 2))
    it = 0
    while True:
        rho = min(rho * 1.05, 1e5)
        for k in range(len(dim)):
            X[k] = mat2ten(svt_tnn(ten2mat(Z - T[k] / rho, k), alpha[k] / rho, theta), dim, k)
        Z[pos_missing] = np.mean(X + T / rho, axis = 0)[pos_missing]
        T = T + rho * (X - np.broadcast_to(Z, np.insert(dim, 0, len(dim))))
        tensor_hat = np.einsum('k, kmnt -> mnt', alpha, X)
        tol = np.sqrt(np.sum((tensor_hat - last_tensor) ** 2)) / snorm
        last_tensor = tensor_hat.copy()
        it += 1
        if (it + 1) % 200 == 0:
            print('Iter: {}'.format(it + 1))
            print('Tolerance: {:.6}'.format(tol))
            print('MAPE: {:.6}'.format(compute_mape(dense_test, tensor_hat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_test, tensor_hat[pos_test])))
            print()
        if (tol < epsilon) or (it >= maxiter):
            break

    print('Total iteration: {}'.format(it))
    print('Tolerance: {:.6}'.format(tol))
    print('Imputation MAPE: {:.6}'.format(compute_mape(dense_test, tensor_hat[pos_test])))
    print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_test, tensor_hat[pos_test])))
    print()
    
    return tensor_hat

### Guangzhou urban traffic speed data set

In [7]:
import numpy as np
import time
import scipy.io

##### 30% RM #####
r = 0.3
print('Missing rate = {}'.format(r))
missing_rate = r

## Random Missing (RM)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
theta = 30
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 70% RM #####
r = 0.7
print('Missing rate = {}'.format(r))
missing_rate = r

## Random Missing (RM)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
theta = 25
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 90% RM #####
r = 0.9
print('Missing rate = {}'.format(r))
missing_rate = r

## Random Missing (RM)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
theta = 15
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 0.000897737
Imputation MAPE: 0.0699423
Imputation RMSE: 3.00251

Running time: 44 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.000195376
Imputation MAPE: 0.0838086
Imputation RMSE: 3.5926

Running time: 43 seconds

Missing rate = 0.9
Total iteration: 100
Tolerance: 0.000287083
Imputation MAPE: 0.0954888
Imputation RMSE: 4.053

Running time: 45 seconds



In [8]:
import numpy as np
import time
import scipy.io

##### 30% NM #####
r = 0.3
print('Missing rate = {}'.format(r))
missing_rate = r

## Non-random Missing (NM)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim3) + 0.5 - missing_rate)[:, None, :]

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 10
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 70% NM #####
r = 0.7
print('Missing rate = {}'.format(r))
missing_rate = r

## Non-random Missing (NM)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim3) + 0.5 - missing_rate)[:, None, :]

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 5
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 0.014356
Imputation MAPE: 0.0960738
Imputation RMSE: 4.07352

Running time: 36 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.00731673
Imputation MAPE: 0.103587
Imputation RMSE: 4.33705

Running time: 38 seconds



In [9]:
import numpy as np
import time
import scipy.io
np.random.seed(1000)

missing_rate = 0.3

## Block-out Missing (BM)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape

dim_time = dim2 * dim3
block_window = 6
vec = np.random.rand(int(dim_time / block_window))
temp = np.array([vec] * block_window)
vec = temp.reshape([dim2 * dim3], order = 'F')

sparse_tensor = mat2ten(ten2mat(dense_tensor, 0) * np.round(vec + 0.5 - missing_rate)[None, :], np.array([dim1, dim2, dim3]), 0)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 15
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Total iteration: 100
Tolerance: 0.0216147
Imputation MAPE: 0.0944973
Imputation RMSE: 3.96892

Running time: 31 seconds



### Hangzhou metro passenger flow data set

In [13]:
import numpy as np
import time
import scipy.io

##### 30% RM #####
r = 0.3
print('Missing rate = {}'.format(r))
missing_rate = r

## Random Missing (RM)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 10
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 70% RM #####
r = 0.7
print('Missing rate = {}'.format(r))
missing_rate = r

## Random Missing (RM)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 10
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 90% RM #####
r = 0.9
print('Missing rate = {}'.format(r))
missing_rate = r

## Random Missing (RM)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 10
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 0.00380941
Imputation MAPE: 0.188696
Imputation RMSE: 24.8978

Running time: 2 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.002729
Imputation MAPE: 0.200735
Imputation RMSE: 28.1308

Running time: 3 seconds

Missing rate = 0.9
Total iteration: 100
Tolerance: 0.00318586
Imputation MAPE: 0.234631
Imputation RMSE: 35.8425

Running time: 4 seconds



In [14]:
import numpy as np
import time
import scipy.io

##### 30% NM #####
r = 0.3
print('Missing rate = {}'.format(r))
missing_rate = r

## Non-random Missing (NM)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim3) + 0.5 - missing_rate)[:, None, :]

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 5
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 70% NM #####
r = 0.7
print('Missing rate = {}'.format(r))
missing_rate = r

## Non-random Missing (NM)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim3) + 0.5 - missing_rate)[:, None, :]

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 5
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 0.00222106
Imputation MAPE: 0.199415
Imputation RMSE: 50.1233

Running time: 3 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.000486843
Imputation MAPE: 0.238844
Imputation RMSE: 45.0579

Running time: 4 seconds



In [15]:
import numpy as np
import time
import scipy.io
np.random.seed(1000)

missing_rate = 0.3

## Block-out Missing (BM)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor'].transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape

dim_time = dim2 * dim3
block_window = 6
vec = np.random.rand(int(dim_time / block_window))
temp = np.array([vec] * block_window)
vec = temp.reshape([dim2 * dim3], order = 'F')

sparse_tensor = mat2ten(ten2mat(dense_tensor, 0) * np.round(vec + 0.5 - missing_rate)[None, :], np.array([dim1, dim2, dim3]), 0)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 10
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Total iteration: 100
Tolerance: 0.00383013
Imputation MAPE: 0.214031
Imputation RMSE: 27.8261

Running time: 3 seconds



### Seattle freeway traffic speed data set

In [17]:
import numpy as np
import pandas as pd
import time
import scipy.io

##### 30%, RM #####
r = 0.3
print('Missing rate = {}'.format(r))
missing_rate = r

## Random missing (RM)
dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0).values
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 28, 288]).transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 25
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 70%, RM #####
r = 0.7
print('Missing rate = {}'.format(r))
missing_rate = r

## Random missing (RM)
dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0).values
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 28, 288]).transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 25
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 90%, RM #####
r = 0.9
print('Missing rate = {}'.format(r))
missing_rate = r

## Random missing (RM)
dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0).values
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 28, 288]).transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim2, dim3) + 0.5 - missing_rate)

start = time.time()
alpha = np.ones(3) / 3
rho = 2e-5
theta = 25
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 0.00940301
Imputation MAPE: 0.0499265
Imputation RMSE: 3.2002

Running time: 57 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.00735321
Imputation MAPE: 0.0610483
Imputation RMSE: 3.77317

Running time: 58 seconds

Missing rate = 0.9
Total iteration: 100
Tolerance: 0.000656863
Imputation MAPE: 0.0807783
Imputation RMSE: 4.80422

Running time: 68 seconds



In [18]:
import numpy as np
import pandas as pd
import time
import scipy.io

##### 30%, NM #####
r = 0.3
print('Missing rate = {}'.format(r))
missing_rate = r

## Non-random Missing (NM)
dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0).values
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 28, 288]).transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim3) + 0.5 - missing_rate)[:, None, :]

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 25
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()


##### 70%, NM #####
r = 0.7
print('Missing rate = {}'.format(r))
missing_rate = r

## Non-random Missing (NM)
dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0).values
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 28, 288]).transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
np.random.seed(1000)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim3) + 0.5 - missing_rate)[:, None, :]

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 10
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 0.010881
Imputation MAPE: 0.0684811
Imputation RMSE: 4.20606

Running time: 57 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.00694155
Imputation MAPE: 0.0922661
Imputation RMSE: 5.35165

Running time: 60 seconds



In [19]:
import numpy as np
import scipy.io
np.random.seed(1000)

missing_rate = 0.3

## Block-out Missing (BM)
dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0).values
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 28, 288]).transpose(0, 2, 1)
dim1, dim2, dim3 = dense_tensor.shape
block_window = 12
vec = np.random.rand(int(dim2 * dim3 / block_window))
temp = np.array([vec] * block_window)
vec = temp.reshape([dim2 * dim3], order = 'F')
sparse_tensor = mat2ten(dense_mat * np.round(vec + 0.5 - missing_rate)[None, :], np.array([dim1, dim2, dim3]), 0)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 10
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Total iteration: 100
Tolerance: 0.00655722
Imputation MAPE: 0.0952117
Imputation RMSE: 5.40729

Running time: 56 seconds



### Portland highway traffic volume data set

In [20]:
import numpy as np
import pandas as pd
import time
import scipy.io

for r in [0.3, 0.7, 0.9]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    # Random Missing (RM)
    dense_mat = np.load('../datasets/Portland-data-set/volume.npy')
    dim1, dim2 = dense_mat.shape
    dim = np.array([dim1, 96, 31])
    dense_tensor = mat2ten(dense_mat, dim, 0)
    np.random.seed(1000)
    sparse_tensor = mat2ten(dense_mat * np.round(np.random.rand(dim1, dim2) + 0.5 - missing_rate), dim, 0)

    start = time.time()
    alpha = np.ones(3) / 3
    rho = 1e-5
    theta = 20
    epsilon = 1e-4
    maxiter = 100
    LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
    end = time.time()
    print('Running time: %d seconds'%(end - start))
    print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 0.00096194
Imputation MAPE: 0.172744
Imputation RMSE: 16.0784

Running time: 193 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.000247791
Imputation MAPE: 0.199891
Imputation RMSE: 18.7313

Running time: 179 seconds

Missing rate = 0.9
Total iteration: 100
Tolerance: 0.000345163
Imputation MAPE: 0.229019
Imputation RMSE: 22.6845

Running time: 180 seconds



In [21]:
import numpy as np
import pandas as pd
import time
import scipy.io

for r in [0.3, 0.7]:
    print('Missing rate = {}'.format(r))
    missing_rate = r

    # Non-random Missing (NM)
    dense_mat = np.load('../datasets/Portland-data-set/volume.npy')
    dim1, dim2 = dense_mat.shape
    dim = np.array([dim1, 96, 31])
    dense_tensor = mat2ten(dense_mat, dim, 0)
    np.random.seed(1000)
    sparse_tensor = dense_tensor * np.round(np.random.rand(dim1, dim[2]) + 0.5 - missing_rate)[:, None, :]

    start = time.time()
    alpha = np.ones(3) / 3
    rho = 1e-5
    theta = 10
    epsilon = 1e-4
    maxiter = 100
    LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
    end = time.time()
    print('Running time: %d seconds'%(end - start))
    print()

Missing rate = 0.3
Total iteration: 100
Tolerance: 9.49636e-05
Imputation MAPE: 0.195946
Imputation RMSE: 18.9148

Running time: 207 seconds

Missing rate = 0.7
Total iteration: 100
Tolerance: 0.000167081
Imputation MAPE: 0.302636
Imputation RMSE: 60.8453

Running time: 212 seconds



In [22]:
import numpy as np
import scipy.io
np.random.seed(1000)

missing_rate = 0.3

## Block-out Missing (BM)
dense_mat = np.load('../datasets/Portland-data-set/volume.npy')
dim1, dim2 = dense_mat.shape
dim = np.array([dim1, 96, 31])
dense_tensor = mat2ten(dense_mat, dim, 0)
block_window = 4
vec = np.random.rand(int(dim2 / block_window))
temp = np.array([vec] * block_window)
vec = temp.reshape([dim2], order = 'F')
sparse_tensor = mat2ten(dense_mat * np.round(vec + 0.5 - missing_rate)[None, :], dim, 0)

start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 5
epsilon = 1e-4
maxiter = 100
LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Total iteration: 97
Tolerance: 9.60713e-05
Imputation MAPE: 0.317403
Imputation RMSE: 74.4241

Running time: 161 seconds



### License

<div class="alert alert-block alert-danger">
<b>This work is released under the MIT license.</b>
</div>