In [6]:
### Step 1: Load data and create masks
import numpy as np

def load_rating(fname, N=943, M=1682):
    ''' load rating file with the format: UserID::MovieID::Rating::Timestamp
    Can be used with MovieLens100K & MovieLens1M
    Params:
        - fname: file name
        - N: number of users
        - M: number of items (e.g. movies)
    '''
    R = np.zeros((N, M))
    with open(fname, 'r') as fin:
        lines = fin.readlines()
        for line in lines:
            splt = line.strip().split('\t')
            uid = int(splt[0]) - 1
            mid = int(splt[1]) - 1
            r = float(splt[2])
            R[uid,mid] = r
    return R

# load training and testing sets
data_train = load_rating("u1.base")
data_test = load_rating("u1.test")

n_users_train, n_movies_train = data_train.shape
n_users_test, n_movies_test = data_test.shape

# create mask matrix
def create_mask(X):
    mask = X > 0
    return mask.astype(int)

In [7]:
### Step 2: Implement functions to calculate cost and gradients
# This function computes the cost that we want to minimize
def compute_cost(U, V, R, _lambda, mask):
    ''' 
    Inputs: 
        U: A matrix contains users' feature
        V: A matrix contains movies' feature
        R: A matrix contains ground truth (size: n_movies x n_users)
        _lambda: Regularization weight
        mask: The binary mask matrix
    Output:
        loss: (scalar) the mean square error over known elements in R
    '''
    assert U.shape[1] == V.shape[1]
    assert U.shape[0]== R.shape[0]
    assert V.shape[0] == R.shape[1]
    assert R.shape == mask.shape
    numerator = (((np.matmul(U,V.T) - R)**2) * mask).sum()
    return (0.5 * numerator / mask.sum() + 0.5 * _lambda * (U**2).sum() 
            + 0.5 * _lambda * (V**2).sum())
    

# This function computes partial derivatives of the cost function w.r.t. movie and user features
def compute_gradient(U, V, R, _lambda, mask):
    ''' 
    Inputs: 
        U: A matrix contains users' feature
        V: A matrix contains movies' feature
        R: A matrix contains ground truth (size: n_movies x n_users)
        _lambda: Regularization weight
        mask: The binary mask matrix
    Output:
        grad_U: gradients of the cost function w.r.t. U, must have the same shape as U
        grad_V: gradients of the cost function w.r.t. U, must have the same shape as V
    '''
    assert U.shape[1] == V.shape[1]
    assert U.shape[0] == R.shape[0]
    assert V.shape[0] == R.shape[1]
    assert R.shape == mask.shape
    mul = np.matmul(U, V.T)
    gradient_U = np.matmul((mul - R) * mask, V) + _lambda * U
    gradient_V = np.matmul(((mul - R) * mask).T, U) + _lambda * V
    
    return gradient_U, gradient_V

In [8]:
### Step 3: Training
n_features = 10
# randomly initialize U and V 
U = 0.25 * np.random.randn(n_users_train, n_features)
V = 0.25 * np.random.randn(n_movies_train, n_features)
# regularization weight
_lambda = 0.0001
# learning rate
alpha = 0.001
# number of training iteration
training_iterations = 150

mask = create_mask(data_train)
counter = 0
while counter < training_iterations:
    # perform one step of training
    grad_U, grad_V = compute_gradient(U, V, data_train, _lambda, mask)
    
    U = U - alpha * grad_U
    V = V - alpha * grad_V
    
    cost = compute_cost(U, V, data_train, _lambda, mask)
    # increase counter
    counter += 1
    if counter % 10 == 0:
        print("Iteration:",counter,"cost: ",cost)

Iteration: 10 cost:  4.089088848512629
Iteration: 20 cost:  1.0763312094434376
Iteration: 30 cost:  0.8962543752829503
Iteration: 40 cost:  0.855920503274689
Iteration: 50 cost:  0.8400049077738642
Iteration: 60 cost:  0.8310800493222239
Iteration: 70 cost:  0.824948176456816
Iteration: 80 cost:  0.8203329414750543
Iteration: 90 cost:  0.8167665892950822
Iteration: 100 cost:  0.8140178603327046
Iteration: 110 cost:  0.8119336970273399
Iteration: 120 cost:  0.8103986434413946
Iteration: 130 cost:  0.8093216503985644
Iteration: 140 cost:  0.8086292188177431
Iteration: 150 cost:  0.8082611070249703


In [9]:
# Step 4: Evaluation function 
def RMSE(A, B, mask):
    ''' Root mean square error
    '''
    rmse = np.sqrt(np.sum(mask * np.square(A - B)) / np.sum(mask))
    return rmse

def MAE(A, B, mask):
    ''' Mean absolute error
    '''
    mae = np.sum(mask * np.abs(A - B)) / np.sum(mask)
    return mae

In [10]:
### Step 5: Evaluate the model
# make prediction
prediction = np.dot(U, V.T)

# Compute RMSE and MAE on the training set
print("RMSE_train: ", RMSE(data_train, prediction, mask))
print("MAE_train: ", MAE(data_train, prediction, mask))

# Compute RMSE and MAE on the testing set
mask_test = create_mask(data_test)
print("RMSE_test: ",RMSE(data_test, prediction, mask_test))
print("MAE_test: ",MAE(data_test, prediction, mask_test))

RMSE_train:  0.7857441822078833
MAE_train:  0.6191299429025515
RMSE_test:  0.9905016659985496
MAE_test:  0.7706869450652213
