In [None]:
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import ShuffleSplit, GridSearchCV, train_test_split
from collections import deque
import multiprocessing

In [None]:
class PMF(BaseEstimator):
    def __init__(self, n_users, n_items, n_factors=30, lambda_U=0.1, lambda_V=0.1, \
                 learning_rate=0.1, max_iter=2000, tol=1e-3, patience=20, \
                 verbose=False, log_interval=20):
        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors
        self.lambda_U = lambda_U
        self.lambda_V = lambda_V
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.tol = tol
        self.patience = max(3, patience)
        self.verbose = verbose
        self.log_interval = log_interval

    # X: rows of (user_id, item_id)
    # y: corresponding ratings
    def fit(self, X, y):
        self.U = np.random.uniform(-0.01, 0.01, size=(self.n_users, self.n_factors))
        self.V = np.random.uniform(-0.01, 0.01, size=(self.n_items, self.n_factors))
        history = deque([np.inf] * self.patience, maxlen=self.patience)
        for n_iter in range(0, self.max_iter):
            loss = self.compute_loss(X, y)
            if self.verbose and n_iter % self.log_interval == 0:
                print(f'Iteration {n_iter}, Loss: {loss}')
            recent_losses = [history[i] for i in range(-3, 0)]
            if max(recent_losses) - loss < self.tol and self.learning_rate > 1e-6:
                print(f'{self.learning_rate}')
                self.learning_rate /= 5
            if max(history) - loss < self.tol:
                break
            history.append(loss)
            grad_U = self.lambda_U * self.U
            grad_V = self.lambda_V * self.V
            for [i, j], rating in zip(X, y):
                z = self.sigmoid(self.U[i] @ self.V[j])
                a = (rating - z) * z * (1 - z)
                #a = rating - self.U[i] @ self.V[j]
                grad_U[i] -= a * self.V[j]
                grad_V[j] -= a * self.U[i]
            self.U -= self.learning_rate * grad_U
            self.V -= self.learning_rate * grad_V
        return self
    
    @staticmethod
    def sigmoid(X):
        # prevent overflow
        X = np.clip(X, -500, 500)
        return 1 / (1 + np.exp(-X))
    
    def predict(self, X):
        y = np.zeros(X.shape[0])
        for idx, [i, j] in enumerate(X):
            y[idx] = self.sigmoid(self.U[i] @ self.V[j])
        return y

    def compute_loss(self, X, y):
        square_error = 0
        for [i, j], rating in zip(X, y):
            z = self.sigmoid(self.U[i] @ self.V[j])
            a = rating - z
            #a = rating - self.U[i] @ self.V[j]
            square_error += a ** 2
        loss = 0.5 * square_error + self.lambda_U / 2 * np.sum(self.U ** 2) + self.lambda_V / 2 * np.sum(self.V ** 2)
        return loss

In [None]:
data = np.load('data.npz')
scaler = MinMaxScaler()
n_users = np.max(data['user_id'])
n_items = np.max(data['item_id'])
X = np.concatenate((data['user_id'] - 1, data['item_id'] - 1), axis=1)
y = scaler.fit_transform(data['rating'].astype(np.float64)).flatten()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=52)

In [None]:
# cross validation
pmf = PMF(n_users, n_items, learning_rate=0.1, verbose=True)
params = {
    'n_factors': [2],
    'lambda_U': [0.1, 1, 10, 100],
    'lambda_V': [0.1, 1, 10, 100],
}
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=12)
n_jobs = multiprocessing.cpu_count() // 2
model = GridSearchCV(pmf, params, cv=cv, scoring='neg_mean_squared_error', n_jobs=n_jobs)
model.fit(X_train, y_train)

In [None]:
pmf = PMF(n_users, n_items, n_factors=2, lambda_U=0.1, lambda_V=0.1, learning_rate=0.1, verbose=True)

In [None]:
pmf.fit(X, y)