In [1]:
# TODO: Fix algorithm, because it outputs values outside the desirable range

#### Libraries

In [2]:
import numpy as np
from scipy.io import loadmat
from scipy.optimize import minimize

#### Collaborative Filtering

##### Loading Files

In [3]:
# load movies rating
movies = loadmat('movies-rating.mat')
y = movies['Y']
r = movies['R']

# m: number of movies
# n: number of users
# f: number of features
m, n, f = *y.shape, 100

# load movies name
movies = []
with open('movies-name.txt', 'r', encoding='utf-8') as file:
    movies = file.readlines()

##### Algorithm

In [4]:
def collab_filter(xt):
    '''Collaborative filtering cost function.'''

    # convert vector to matrix
    x = xt[:m * f].reshape((m, -1))
    t = xt[m * f:].reshape((n,  -1))

    # calculate cost
    diff = ((x.dot(t.T) * r) - y)
    cost = (diff ** 2).sum() / 2

    # calculate gradient
    x_grad = diff.dot(t).ravel()
    t_grad = diff.T.dot(x).ravel()

    # convert matrix to vector
    xt = np.hstack((x_grad, t_grad))

    return cost, xt

##### Execution

In [5]:
# collaborative filtering matrices
# initialization with uniform distribution
x = np.random.uniform(size=(m, f))
t = np.random.uniform(size=(n, f))
xt = np.hstack((x.ravel(), t.ravel()))

# minimization by conjugate gradient
res = minimize(collab_filter,
               xt,
               method='CG',
               jac=True,
               options={
                   'maxiter': 10000,
                   'disp': True})
xt = res['x']

# convert vector to matrix
x = xt[:m * f].reshape((m, -1))
t = xt[m * f:].reshape((n,  -1))

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 7366
         Function evaluations: 11220
         Gradient evaluations: 11220


In [6]:
# calculate mean of movies
rates = x.dot(t.T).mean(axis=1)

# sort and extract the best scores
best_rates = rates.argsort()[::-1][:10]

In [7]:
# print best scores and movies
for i in best_rates:
    print(f'{rates[i]:.1f} <=> {movies[i][:-1]}')

8.7 <=> 1670 Tainted (1998)
8.5 <=> 1520 Fear, The (1995)
8.1 <=> 1486 Girl in the Cadillac (1995)
8.0 <=> 1320 Homage (1995)
8.0 <=> 1349 Mille bolle blu (1993)
8.0 <=> 1340 Crude Oasis, The (1995)
8.0 <=> 1122 They Made Me a Criminal (1939)
7.9 <=> 1679 B. Monkey (1998)
7.9 <=> 1505 Killer: A Journal of Murder (1995)
7.9 <=> 1624 Hush (1998)
