In [1]:
import numpy as np
from numpy import linalg as LA

def pmf(train_vec, val_vec, num_feat, epsilon, _lambda, momentum, maxepoch, num_batches, batch_size):   
    # mean subtraction
    mean_inv = np.mean(train_vec[:,2])
        
    pairs_tr = train_vec.shape[0]
    pairs_va = val_vec.shape[0]
        
    # 1-p-i, 2-m-c
    num_inv = int(max(np.amax(train_vec[:,0]), np.amax(val_vec[:,0]))) + 1
    num_com = int(max(np.amax(train_vec[:,1]), np.amax(val_vec[:,1]))) + 1

    incremental = False
    if ((not incremental) or (w_C is None)):
        # initialize
        epoch = 0
        w_C = 0.1 * np.random.randn(num_com, num_feat)
        w_I = 0.1 * np.random.randn(num_inv, num_feat)
            
        w_C_inc = np.zeros((num_com, num_feat))
        w_I_inc = np.zeros((num_inv, num_feat))

    while epoch < maxepoch:
        epoch += 1

        # Shuffle training truples
        shuffled_order = np.arange(train_vec.shape[0])
        np.random.shuffle(shuffled_order)

        # Batch update
        for batch in range(num_batches):
            # print "epoch %d batch %d" % (epoch, batch+1)
            batch_idx = np.mod(np.arange(batch_size * batch,
                                         batch_size * (batch+1)),
                               shuffled_order.shape[0])

            batch_invID = np.array(train_vec[shuffled_order[batch_idx], 0], dtype='int32')
            batch_comID = np.array(train_vec[shuffled_order[batch_idx], 1], dtype='int32')

            # Compute Objective Function
            pred_out = np.sum(np.multiply(w_I[batch_invID,:], 
                                          w_C[batch_comID,:]),
                              axis=1) # mean_inv subtracted

            rawErr = pred_out - train_vec[shuffled_order[batch_idx], 2] + mean_inv

            # Compute gradients
            Ix_C = 2 * np.multiply(rawErr[:, np.newaxis], w_I[batch_invID,:]) \
                    + _lambda * w_C[batch_comID,:]
            Ix_I = 2 * np.multiply(rawErr[:, np.newaxis], w_C[batch_comID,:]) \
                    + _lambda * w_I[batch_invID,:]
            
            dw_C = np.zeros((num_com, num_feat))
            dw_I = np.zeros((num_inv, num_feat))

            # loop to aggreate the gradients of the same element
            for i in range(batch_size):
                dw_C[batch_comID[i],:] += Ix_C[i,:]
                dw_I[batch_invID[i],:] += Ix_I[i,:]


            # Update with momentum
            w_C_inc = momentum * w_C_inc + epsilon * dw_C / batch_size
            w_I_inc = momentum * w_I_inc + epsilon * dw_I / batch_size


            w_C = w_C - w_C_inc
            w_I = w_I - w_I_inc

        # Compute train error
        train_out = np.sum(np.multiply(w_I[np.array(train_vec[:, 0], dtype='int32'), :],
                                       w_C[np.array(train_vec[:, 1], dtype='int32'), :]), axis = 1)
        error_train = train_out - train_vec[:, 2] + mean_inv
        train_rmse = LA.norm(error_train)/np.sqrt(len(train_vec))
        # Compute validation error
        test_out = np.sum(np.multiply(w_I[np.array(val_vec[:, 0], dtype='int32'), :],
                                      w_C[np.array(val_vec[:, 1], dtype='int32'), :]), axis = 1)
        error_test = test_out - val_vec[:, 2] + mean_inv
        test_rmse = LA.norm(error_test)/np.sqrt(len(val_vec))
        if epoch % 50 == 0:
            # Print information
            print('%f th epoch, train RMSE: %f, test RMSE: %f' %(epoch, train_rmse, test_rmse))

In [2]:
import scipy.io
from tensorly import unfold

tensor = scipy.io.loadmat('Guangzhou-data-set/tensor.mat')
tensor = tensor['tensor']
random_matrix = scipy.io.loadmat('Guangzhou-data-set/random_matrix.mat')
random_matrix = random_matrix['random_matrix']
random_tensor = scipy.io.loadmat('Guangzhou-data-set/random_tensor.mat')
random_tensor = random_tensor['random_tensor']

mat = unfold(tensor, 0)
missing_rate = 0.4

# =============================================================================
### Random missing (RM) scenario:
### ------------------------------
###   missing rate | 0.2 | 0.4 |
###   rank         |  80 |  80 |
### ------------------------------
### Set the RM scenario by:
# binary_mat = unfold(np.round(random_tensor + 0.5 - missing_rate), 0)
# =============================================================================

# =============================================================================
### Non-random missing (NM) scenario:
### ------------------------------
###   missing rate | 0.2 | 0.4 |
###   rank         |  10 |  10 |
### ------------------------------
### Set the NM scenario by:
binary_tensor = np.zeros(tensor.shape)
for i1 in range(tensor.shape[0]):
    for i2 in range(tensor.shape[1]):
        binary_tensor[i1,i2,:] = np.round(random_matrix[i1,i2] + 0.5 - missing_rate)
binary_mat = unfold(binary_tensor, 0)
# =============================================================================

sparse_mat = np.multiply(mat, binary_mat)

Using numpy backend.


In [3]:
train_data = np.zeros((sparse_mat[sparse_mat > 0].shape[0], 3))
start_idx = 0
for i in range(sparse_mat.shape[0]):
    for t in range(sparse_mat.shape[1]):
        if sparse_mat[i, t] > 0:
            train_data[start_idx, 0] = i + 1
            train_data[start_idx, 1] = t + 1
            train_data[start_idx, 2] = sparse_mat[i, t]
            start_idx += 1

validation_mat = mat
validation_mat[sparse_mat > 0] = 0
test_data = np.zeros((validation_mat[validation_mat > 0].shape[0], 3))
start_idx = 0
for i in range(validation_mat.shape[0]):
    for t in range(validation_mat.shape[1]):
        if validation_mat[i, t] > 0:
            test_data[start_idx, 0] = i + 1
            test_data[start_idx, 1] = t + 1
            test_data[start_idx, 2] = validation_mat[i, t]
            start_idx += 1

In [4]:
pmf(train_data, test_data, 10, 0.05, 0.1, 0.8, 1000, 50, 500)
### number of features (rank): 10
### epsilon: 0.05
### _lambda: 0.1
### momentum: 0.8
### maxepoch: 1000
### num_batches: 50
### batch_size: 500

50.000000 th epoch, train RMSE: 8.015995, test RMSE: 8.061721
100.000000 th epoch, train RMSE: 5.160675, test RMSE: 5.180034
150.000000 th epoch, train RMSE: 4.839092, test RMSE: 4.894862
200.000000 th epoch, train RMSE: 4.651670, test RMSE: 4.754909
250.000000 th epoch, train RMSE: 4.497721, test RMSE: 4.653358
300.000000 th epoch, train RMSE: 4.366399, test RMSE: 4.578945
350.000000 th epoch, train RMSE: 4.251039, test RMSE: 4.523455
400.000000 th epoch, train RMSE: 4.154031, test RMSE: 4.486616
450.000000 th epoch, train RMSE: 4.090019, test RMSE: 4.473039
500.000000 th epoch, train RMSE: 4.052358, test RMSE: 4.469863
550.000000 th epoch, train RMSE: 4.030907, test RMSE: 4.471329
600.000000 th epoch, train RMSE: 4.016695, test RMSE: 4.470867
650.000000 th epoch, train RMSE: 4.007287, test RMSE: 4.472719
700.000000 th epoch, train RMSE: 4.001105, test RMSE: 4.475769
750.000000 th epoch, train RMSE: 3.996005, test RMSE: 4.474324
800.000000 th epoch, train RMSE: 3.990647, test RMSE: 4.

**Experiment results** of missing data imputation using probabilistic matrix factorization (PMF):

|  scenario |`rank`|`epsilon`|`_lambda`|`momentum`|`num_batches`|`batch_size`| train_rmse | test_rmse |
|:----------|-----:|--------:|--------:|---------:|------------:|-----------:|-----------:|----------:|
|**0.4, NM**|   10 |     1.0 |     0.1 |      0.8 |          30 |       1000 |     4.5358 |    4.9376 |
|**0.4, NM**|   10 |     0.5 |     0.1 |      0.8 |          30 |       1000 |     4.1773 |    4.6813 |
|**0.4, NM**|   10 |     0.1 |     0.1 |      0.8 |          30 |       1000 |     3.9645 |    4.4900 |
|**0.4, NM**|   10 |    0.05 |     0.1 |      0.8 |          30 |       1000 |     4.0068 |    4.4679 |
|**0.2, NM**|   10 |     0.1 |     0.1 |      0.8 |          30 |       1000 |     4.0253 |    4.3754 |
|**0.2, NM**|   10 |    0.05 |     0.1 |      0.8 |          30 |       1000 |     4.0530 |    4.3651 |
|**0.2, NM**|   10 |    0.05 |     0.1 |      0.8 |          20 |       2000 |     4.2094 |    4.4133 |
|**0.2, NM**|   10 | **0.05**|     0.1 |      0.8 |       **50**|     **500**|     4.0291 | **4.3575**|
|**0.4, NM**|   10 | **0.05**|     0.1 |      0.8 |       **50**|     **500**|     3.9758 | **4.4866**|
|**0.2, RM**|   80 | **0.05**|     0.1 |      0.8 |       **50**|     **500**|     2.8055 | **4.0909**|
|**0.4, RM**|   80 | **0.05**|     0.1 |      0.8 |       **50**|     **500**|     2.4889 | **4.2280**|

   > The experiment relies on the *Urban traffic speed data set in Guangzhou, China*.