In [None]:
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import ShuffleSplit, GridSearchCV

In [None]:
class PMF(BaseEstimator):
    def __init__(self, n_users, n_items, n_factors=30, lambda_U=0.1, lambda_V=0.1, \
                 learning_rate=1e-3, max_iter=1000, tol=1e-4):
        self.n_users = n_users
        self.n_iterms = n_items
        self.n_factors = n_factors
        self.lambda_U = lambda_U
        self.lambda_V = lambda_V
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.tol = tol
    
    # X: rows of (user_id, item_id)
    # y: corresponding ratings
    def fit(self, X, y):
        r = self.n_factors
        n, m = self.n_users, self.n_items
        self.U = np.random.uniform(-0.01, 0.01, size=(n, r))
        self.V = np.random.uniform(-0.01, 0.01, size=(m, r))
        for n_iter in range(0, self.max_iter):
            E = self.score(X, y)
            print(f'Iteration {n_iter + 1}, Error: {E}')
            grad_U = self.lambda_U * self.U
            grad_V = self.lambda_V * self.V
            for [i, j], rating in zip(X, y):
                #z = self.g(self.U[i] @ self.V[j])
                #a = (rating - z) * z * (1 - z)
                a = rating - self.U[i] @ self.V[j]
                grad_U[i] -= a * self.V[j]
                grad_V[j] -= a * self.U[i]
            self.U -= self.learning_rate * grad_U
            self.V -= self.learning_rate * grad_V
        return self
    
    # sigmoid function
    @staticmethod
    def g(X):
        return 1 / (1 - np.exp(-X))
        
    def score(self, X, y):
        mse = 0.0
        for [i, j], rating in zip(X, y):
            #z = self.g(self.U[i] @ self.V[j])
            #a = rating - z
            a = rating - self.U[i] @ self.V[j]
            mse += a ** 2
        E = 0.5 * mse + self.lambda_U / 2 * np.sum(self.U ** 2) + self.lambda_V / 2 * np.sum(self.V ** 2)
        return E

In [None]:
data = np.load('data.npz')
scaler = MinMaxScaler()
n_users = np.max(data['user_id'])
n_items = np.max(data['item_id'])
X = np.concatenate((data['user_id'] - 1, data['item_id'] - 1), axis=1)
y = scaler.fit_transform(data['rating'].astype(np.float64)).flatten()

In [None]:
pmf = PMF(n_users, n_items)
params = {
    'n_factors': [5],
    'lambda_U': [0.1, 1, 10, 100],
    'lambda_V': [0.1, 1, 10, 100],
}
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
model = GridSearchCV(pmf, params, cv=cv)