In [None]:
#!/usr/bin/python
#
# Created by Albert Au Yeung (2010)
#
# An implementation of matrix factorization
#
try:
    import numpy
except:
    print ("This implementation requires the numpy module.")
    exit(0)

###############################################################################

"""
@INPUT:
    R     : a matrix to be factorized, dimension N x M
    P     : an initial matrix of dimension N x K
    Q     : an initial matrix of dimension M x K
    K     : the number of latent features
    steps : the maximum number of steps to perform the optimisation
    alpha : the learning rate
    beta  : the regularization parameter
@OUTPUT:
    the final matrices P and Q
"""
def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    Q = Q.T
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])
                    for k in range(K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
        eR = numpy.dot(P,Q)
        e = 0
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)
                    for k in range(K):
                        e = e + (beta/2) * ( pow(P[i][k],2) + pow(Q[k][j],2) )
        if e < 0.001:
            break
    return P, Q.T

###############################################################################

if __name__ == "__main__":
    R = [
        [1 , 0 , 0 , 0 , 2 , 0 , 0 , 1 , 0 , 0],
        [3 , 0 , 3 , 0 , 4 , 0 , 0 , 2 , 5 , 4],
        [0 , 0 , 4 , 0 , 0 , 3 , 0 , 0 , 0 , 0],
        [2 , 0 , 0 , 0 , 0 , 0 , 3 , 5 , 0 , 4],
        [0 , 0 , 0 , 3 , 0 , 0 , 3 , 0 , 5 , 0],
        [3 , 0 , 0 , 0 , 4 , 2 , 2 , 3 , 0 , 0],
        [0 , 5 , 3 , 0 , 0 , 0 , 3 , 3 , 0 , 0],
        [0 , 0 , 0 , 0 , 4 , 4 , 0 , 0 , 0 , 0],
        [5 , 0 , 0 , 3 , 0 , 0 , 3 , 0 , 3 , 0],
        [5 , 5 , 0 , 0 , 5 , 0 , 0 , 3 , 4 , 0],
        [0 , 4 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 5],
        [0 , 5 , 0 , 0 , 0 , 2 , 4 , 0 , 0 , 0],
        [4 , 0 , 0 , 3 , 0 , 0 , 0 , 5 , 0 , 3],
        [0 , 0 , 3 , 0 , 0 , 0 , 2 , 0 , 5 , 0],
        [0 , 0 , 3 , 3 , 0 , 4 , 0 , 0 , 0 , 0],
        [0 , 0 , 0 , 0 , 1 , 4 , 0 , 0 , 4 , 2]
        ]

    R = numpy.array(R)

    N = len(R)
    M = len(R[0])
    K = 2

    P = numpy.random.rand(N,K)
    Q = numpy.random.rand(M,K)

    nP, nQ = matrix_factorization(R, P, Q, K)
    
    print("the Original Matrix") 
    print(R)
    print("The Approximation matrix by MF") 
    print(numpy.dot(nP, nQ.T))



the Original Matrix
[[1 0 0 0 2 0 0 1 0 0]
 [3 0 3 0 4 0 0 2 5 4]
 [0 0 4 0 0 3 0 0 0 0]
 [2 0 0 0 0 0 3 5 0 4]
 [0 0 0 3 0 0 3 0 5 0]
 [3 0 0 0 4 2 2 3 0 0]
 [0 5 3 0 0 0 3 3 0 0]
 [0 0 0 0 4 4 0 0 0 0]
 [5 0 0 3 0 0 3 0 3 0]
 [5 5 0 0 5 0 0 3 4 0]
 [0 4 1 0 0 0 0 0 0 5]
 [0 5 0 0 0 2 4 0 0 0]
 [4 0 0 3 0 0 0 5 0 3]
 [0 0 3 0 0 0 2 0 5 0]
 [0 0 3 3 0 4 0 0 0 0]
 [0 0 0 0 1 4 0 0 4 2]]
The Approximation matrix by MF
[[ 1.4506013   1.54753091  1.13349612  0.98754853  1.60907609  0.84591776
   1.01727648  0.97155158  1.49057691  1.35641475]
 [ 3.74319714  4.03797666  2.86177833  2.62068588  4.15991321  2.42560182
   2.6704176   2.77128313  4.06382163  3.59639223]
 [ 5.13546098  5.47048683  4.02432781  3.48297743  5.69508208  2.95056384
   3.59313117  3.39142544  5.23740701  4.78450483]
 [ 2.56565351  3.31876227  1.18231352  2.68929454  2.94728722  4.65808122
   2.39046339  5.16019678  5.46890473  3.65257376]
 [ 4.16243123  4.57693371  3.05968958  3.05471563  4.64092582  3.16860483
   3.0