# Iterative Singular Value Decomposition (iSVD)


In [1]:
import numpy as np
from scipy.linalg import sqrtm

def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return  np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

def isvd(dense_mat, sparse_mat, rank, maxiter = 100):
    
    N, T = sparse_mat.shape
    ind = sparse_mat != 0
    pos_miss = np.where(sparse_mat == 0)
    pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    dense_test = dense_mat[pos_test]
    del dense_mat
    
    ## Initialization
    mu = np.mean(sparse_mat[sparse_mat != 0])
    bias_row = np.zeros(N)
    bias_col = np.zeros(T)
    temp = sparse_mat - mu
    for n in range(N):
        bias_row[n] = np.mean(temp[n, :][sparse_mat[n, :] != 0])
    for t in range(T):
        bias_col[t] = np.mean(temp[:, t][sparse_mat[:, t] != 0])
    mat = sparse_mat.copy()
    del sparse_mat
    mat[pos_miss] = (mu + bias_row.reshape([N, 1]) + bias_col.reshape([1, T]))[pos_miss]
    
    ## Iterative SVD
    show_iter = 10
    for it in range(maxiter):
        u, s, v = np.linalg.svd(mat, full_matrices = False)
        mat_hat = u[:, : rank] @ np.diag(s[: rank]) @ v[: rank, :]
        mat[pos_miss] = mat_hat[pos_miss]
        if (it + 1) % show_iter == 0:
            print('Iter: {}'.format(it + 1))
            print('MAPE: {:.6}'.format(compute_mape(dense_test, mat[pos_test])))
            print('RMSE: {:.6}'.format(compute_rmse(dense_test, mat[pos_test])))
            print()
        
    return mat

## Evaluation on Guangzhou Speed Data



In [2]:
import scipy.io

tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
random_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_tensor.mat')['random_tensor']
dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]])
missing_rate = 0.4

## Random missing (RM)
binary_mat = (np.round(random_tensor + 0.5 - missing_rate)
              .reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]))
sparse_mat = np.multiply(dense_mat, binary_mat)

In [3]:
import time

start = time.time()
rank = 10
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.104059
RMSE: 4.41633

Iter: 20
MAPE: 0.104076
RMSE: 4.43407

Iter: 30
MAPE: 0.104119
RMSE: 4.44041

Iter: 40
MAPE: 0.104144
RMSE: 4.44323

Iter: 50
MAPE: 0.104157
RMSE: 4.44455

Iter: 60
MAPE: 0.104164
RMSE: 4.44518

Iter: 70
MAPE: 0.104168
RMSE: 4.44549

Iter: 80
MAPE: 0.104171
RMSE: 4.44563

Iter: 90
MAPE: 0.104172
RMSE: 4.4457

Iter: 100
MAPE: 0.104174
RMSE: 4.44573

Running time: 12 seconds



In [4]:
import scipy.io

tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
random_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_tensor.mat')['random_tensor']
dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]])
missing_rate = 0.6

## Random missing (RM)
binary_mat = (np.round(random_tensor + 0.5 - missing_rate)
              .reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]))
sparse_mat = np.multiply(dense_mat, binary_mat)

In [5]:
import time

start = time.time()
rank = 5
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.111272
RMSE: 4.59334

Iter: 20
MAPE: 0.111175
RMSE: 4.60143

Iter: 30
MAPE: 0.111179
RMSE: 4.60323

Iter: 40
MAPE: 0.111181
RMSE: 4.60379

Iter: 50
MAPE: 0.111182
RMSE: 4.60403

Iter: 60
MAPE: 0.111183
RMSE: 4.60415

Iter: 70
MAPE: 0.111184
RMSE: 4.60422

Iter: 80
MAPE: 0.111184
RMSE: 4.60426

Iter: 90
MAPE: 0.111185
RMSE: 4.60428

Iter: 100
MAPE: 0.111185
RMSE: 4.6043

Running time: 15 seconds



In [6]:
import scipy.io

tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
random_matrix = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_matrix.mat')['random_matrix']
dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]])
missing_rate = 0.4

## Non-random missing (NM)
binary_tensor = np.zeros(tensor.shape)
for i1 in range(tensor.shape[0]):
    for i2 in range(tensor.shape[1]):
        binary_tensor[i1, i2, :] = np.round(random_matrix[i1, i2] + 0.5 - missing_rate)
binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]])
sparse_mat = np.multiply(dense_mat, binary_mat)

In [7]:
import time

start = time.time()
rank = 5
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.109205
RMSE: 4.52182

Iter: 20
MAPE: 0.109042
RMSE: 4.52132

Iter: 30
MAPE: 0.108967
RMSE: 4.52086

Iter: 40
MAPE: 0.108928
RMSE: 4.5207

Iter: 50
MAPE: 0.108905
RMSE: 4.52065

Iter: 60
MAPE: 0.108892
RMSE: 4.52064

Iter: 70
MAPE: 0.108884
RMSE: 4.52064

Iter: 80
MAPE: 0.108879
RMSE: 4.52064

Iter: 90
MAPE: 0.108875
RMSE: 4.52064

Iter: 100
MAPE: 0.108873
RMSE: 4.52064

Running time: 14 seconds



## Evaluation on Seattle Speed Data


In [8]:
import pandas as pd

dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0)
RM_mat = pd.read_csv('../datasets/Seattle-data-set/RM_mat.csv', index_col = 0)
dense_mat = dense_mat.values
RM_mat = RM_mat.values
missing_rate = 0.4

## Random missing (RM)
binary_mat = np.round(RM_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)

In [9]:
import time

start = time.time()
rank = 20
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.0800988
RMSE: 4.75059

Iter: 20
MAPE: 0.0800882
RMSE: 4.75733

Iter: 30
MAPE: 0.0801242
RMSE: 4.75949

Iter: 40
MAPE: 0.0801455
RMSE: 4.76059

Iter: 50
MAPE: 0.0801579
RMSE: 4.76128

Iter: 60
MAPE: 0.0801661
RMSE: 4.76174

Iter: 70
MAPE: 0.0801717
RMSE: 4.76207

Iter: 80
MAPE: 0.0801758
RMSE: 4.76232

Iter: 90
MAPE: 0.080179
RMSE: 4.76251

Iter: 100
MAPE: 0.0801816
RMSE: 4.76266

Running time: 18 seconds



In [10]:
import pandas as pd

dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0)
RM_mat = pd.read_csv('../datasets/Seattle-data-set/RM_mat.csv', index_col = 0)
dense_mat = dense_mat.values
RM_mat = RM_mat.values
missing_rate = 0.6

## Random missing (RM)
binary_mat = np.round(RM_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)

In [11]:
import time

start = time.time()
rank = 10
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.0912561
RMSE: 5.25692

Iter: 20
MAPE: 0.0908988
RMSE: 5.26482

Iter: 30
MAPE: 0.0908622
RMSE: 5.26735

Iter: 40
MAPE: 0.0908442
RMSE: 5.26812

Iter: 50
MAPE: 0.0908352
RMSE: 5.26843

Iter: 60
MAPE: 0.0908309
RMSE: 5.2686

Iter: 70
MAPE: 0.0908288
RMSE: 5.26869

Iter: 80
MAPE: 0.0908279
RMSE: 5.26875

Iter: 90
MAPE: 0.0908276
RMSE: 5.26879

Iter: 100
MAPE: 0.0908276
RMSE: 5.26882

Running time: 19 seconds



In [12]:
import pandas as pd

dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0)
NM_mat = pd.read_csv('../datasets/Seattle-data-set/NM_mat.csv', index_col = 0)
dense_mat = dense_mat.values
NM_mat = NM_mat.values
missing_rate = 0.4

## Non-random missing (NM)
binary_tensor = np.zeros((dense_mat.shape[0], 28, 288))
for i1 in range(binary_tensor.shape[0]):
    for i2 in range(binary_tensor.shape[1]):
        binary_tensor[i1, i2, :] = np.round(NM_mat[i1, i2] + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_tensor.reshape([dense_mat.shape[0], dense_mat.shape[1]]))

In [13]:
import time

start = time.time()
rank = 10
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.0928692
RMSE: 5.34215

Iter: 20
MAPE: 0.0924829
RMSE: 5.34129

Iter: 30
MAPE: 0.0924889
RMSE: 5.34832

Iter: 40
MAPE: 0.0924894
RMSE: 5.35267

Iter: 50
MAPE: 0.0924783
RMSE: 5.35418

Iter: 60
MAPE: 0.0924689
RMSE: 5.35461

Iter: 70
MAPE: 0.0924629
RMSE: 5.35472

Iter: 80
MAPE: 0.0924598
RMSE: 5.35475

Iter: 90
MAPE: 0.0924582
RMSE: 5.35475

Iter: 100
MAPE: 0.0924573
RMSE: 5.35475

Running time: 17 seconds



## Evaluation on London Movement Speed Data


In [14]:
import numpy as np
np.random.seed(1000)

missing_rate = 0.4

dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]

## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)

In [15]:
import time

start = time.time()
rank = 30
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.0914622
RMSE: 2.21197

Iter: 20
MAPE: 0.0912951
RMSE: 2.2108

Iter: 30
MAPE: 0.0912451
RMSE: 2.21012

Iter: 40
MAPE: 0.0912256
RMSE: 2.2098

Iter: 50
MAPE: 0.0912169
RMSE: 2.20964

Iter: 60
MAPE: 0.0912128
RMSE: 2.20956

Iter: 70
MAPE: 0.0912112
RMSE: 2.20952

Iter: 80
MAPE: 0.091211
RMSE: 2.20949

Iter: 90
MAPE: 0.0912112
RMSE: 2.20948

Iter: 100
MAPE: 0.0912117
RMSE: 2.20948

Running time: 285 seconds



In [16]:
import numpy as np
np.random.seed(1000)

missing_rate = 0.6

dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]

## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)

In [17]:
import time

start = time.time()
rank = 30
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.0952572
RMSE: 2.30035

Iter: 20
MAPE: 0.0950153
RMSE: 2.3036

Iter: 30
MAPE: 0.0949294
RMSE: 2.30445

Iter: 40
MAPE: 0.0948757
RMSE: 2.30451

Iter: 50
MAPE: 0.0948405
RMSE: 2.30443

Iter: 60
MAPE: 0.0948165
RMSE: 2.30435

Iter: 70
MAPE: 0.0948001
RMSE: 2.30432

Iter: 80
MAPE: 0.094789
RMSE: 2.30431

Iter: 90
MAPE: 0.0947818
RMSE: 2.30433

Iter: 100
MAPE: 0.0947775
RMSE: 2.30438

Running time: 299 seconds



In [18]:
import numpy as np
np.random.seed(1000)

missing_rate = 0.4

dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]

## Non-random missing (NM)
binary_mat = np.zeros(dense_mat.shape)
random_mat = np.random.rand(dense_mat.shape[0], 30)
for i1 in range(dense_mat.shape[0]):
    for i2 in range(30):
        binary_mat[i1, i2 * 24 : (i2 + 1) * 24] = np.round(random_mat[i1, i2] + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)

In [19]:
import time

start = time.time()
rank = 20
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.0980421
RMSE: 2.37297

Iter: 20
MAPE: 0.0969901
RMSE: 2.36351

Iter: 30
MAPE: 0.096834
RMSE: 2.36511

Iter: 40
MAPE: 0.0968288
RMSE: 2.36762

Iter: 50
MAPE: 0.0968499
RMSE: 2.36969

Iter: 60
MAPE: 0.0968742
RMSE: 2.3713

Iter: 70
MAPE: 0.0968976
RMSE: 2.37258

Iter: 80
MAPE: 0.09692
RMSE: 2.37368

Iter: 90
MAPE: 0.0969417
RMSE: 2.37466

Iter: 100
MAPE: 0.0969625
RMSE: 2.37557

Running time: 309 seconds



In [20]:
import time

start = time.time()
rank = 10
mat_hat = isvd(dense_mat, sparse_mat, rank)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()

Iter: 10
MAPE: 0.0972465
RMSE: 2.36189

Iter: 20
MAPE: 0.0969603
RMSE: 2.35948

Iter: 30
MAPE: 0.0968548
RMSE: 2.35781

Iter: 40
MAPE: 0.0967946
RMSE: 2.3568

Iter: 50
MAPE: 0.096761
RMSE: 2.35623

Iter: 60
MAPE: 0.0967422
RMSE: 2.35593

Iter: 70
MAPE: 0.0967309
RMSE: 2.35575

Iter: 80
MAPE: 0.0967241
RMSE: 2.35565

Iter: 90
MAPE: 0.0967197
RMSE: 2.35559

Iter: 100
MAPE: 0.096717
RMSE: 2.35556

Running time: 302 seconds



### License

<div class="alert alert-block alert-danger">
<b>This work is released under the MIT license.</b>
</div>