In [5]:
import numpy as np
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise import accuracy

# 載入 movielens-100k dataset 第一次會需要下載.
data = Dataset.load_builtin('ml-100k')


# 矩陣分解 SVD (梯度下降)
詳細SVD算法可參考：
1. [官方SVD算法介紹](https://surprise.readthedocs.io/en/stable/matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVD)
2. [SVD_推薦系統_原理](https://medium.com/data-scientists-playground/svd-%E6%8E%A8%E8%96%A6%E7%B3%BB%E7%B5%B1-%E5%8E%9F%E7%90%86-c72c2e35af9c)



In [6]:
trainset, testset = train_test_split(data, test_size=.25, random_state=1)

# 演算法這邊使用 SVD 就是矩陣分解的概念 (背後使用隨機梯度下降)
algo = SVD()

algo.fit(trainset)
predictions = algo.test(testset)


In [7]:
predictions

[Prediction(uid='345', iid='715', r_ui=4.0, est=3.5772293304245646, details={'was_impossible': False}),
 Prediction(uid='92', iid='998', r_ui=2.0, est=2.779116531521131, details={'was_impossible': False}),
 Prediction(uid='934', iid='195', r_ui=4.0, est=3.853752046963999, details={'was_impossible': False}),
 Prediction(uid='586', iid='423', r_ui=2.0, est=3.930263424948401, details={'was_impossible': False}),
 Prediction(uid='336', iid='383', r_ui=1.0, est=1.9354965040268137, details={'was_impossible': False}),
 Prediction(uid='654', iid='678', r_ui=4.0, est=2.753347237915239, details={'was_impossible': False}),
 Prediction(uid='64', iid='511', r_ui=4.0, est=4.332167774221425, details={'was_impossible': False}),
 Prediction(uid='425', iid='209', r_ui=2.0, est=3.4095324762307695, details={'was_impossible': False}),
 Prediction(uid='821', iid='132', r_ui=5.0, est=4.682807389427265, details={'was_impossible': False}),
 Prediction(uid='262', iid='559', r_ui=3.0, est=3.077680270721653, detai

In [8]:
accuracy.rmse(predictions)

RMSE: 0.9389


0.938946392530948

In [10]:
def get_mrr(surprise_predictions, k_highest_scores=None):    
    from sklearn.metrics import label_ranking_average_precision_score
    from scipy import sparse
    
    uids = np.array([p.uid for p in surprise_predictions]).astype(np.int)
    iids = np.array([p.iid for p in surprise_predictions]).astype(np.int)

    # 三分以上判斷為相關 低於三分判斷為不相關
    r_uis = np.array([1 if p.r_ui >= 3 else 0 for p in surprise_predictions]).astype(np.float)
    r_uis = np.asarray(r_uis)        
    ests = np.array([p.est for p in surprise_predictions]).astype(np.float)

    sparse_preds = sparse.coo_matrix((ests, (uids ,iids)))
    sparse_vals = sparse.coo_matrix((r_uis, (uids ,iids)))
    
    dense_preds = sparse_preds.toarray()
    dense_vals = sparse_vals.toarray()  
    def cal_mrr(y_true, y_score):
        mrr_list = []
        for i in range(len(y_score)):
            rank = np.argsort(y_score[i])[::-1]
            pos = 0
            for r in rank:
                pos += 1
                if y_true[i][r] == 1:
                    rr = 1/pos
                    mrr_list.append(rr) 
                    break
        return sum(mrr_list)/len(mrr_list)
    return cal_mrr(y_true=dense_vals, y_score=dense_preds) 

In [11]:
get_mrr(predictions)

0.9690298507462688

In [12]:
def get_map(surprise_predictions, k_highest_scores=None):    
    from sklearn.metrics import label_ranking_average_precision_score
    from scipy import sparse
    
    uids = np.array([p.uid for p in surprise_predictions ]).astype(np.int)
    iids = np.array([p.iid for p in surprise_predictions ]).astype(np.int)
    # 三分以上判斷為相關 低於三分判斷為不相關
    r_uis = np.array([1 if p.r_ui >= 3 else 0 for p in surprise_predictions]).astype(np.float)
    ests = np.array([p.est for p in surprise_predictions ]).astype(np.float)

    sparse_preds = sparse.coo_matrix((ests, (uids ,iids )))
    sparse_vals = sparse.coo_matrix((r_uis, (uids ,iids )))
    
    dense_preds = sparse_preds.toarray()
    dense_vals = sparse_vals.toarray()    
    return label_ranking_average_precision_score(y_true= dense_vals, y_score= dense_preds) 

In [13]:
get_map(predictions)

0.9276755135860559

In [14]:
def get_ndcg(surprise_predictions, k_highest_scores=None):
    
    from sklearn.metrics import ndcg_score
    from scipy import sparse
    
    uids = np.array([p.uid for p in surprise_predictions ]).astype(np.int)
    iids = np.array([p.iid for p in surprise_predictions ]).astype(np.int)
    r_uis = np.array([p.r_ui for p in surprise_predictions ]).astype(np.float)
    ests = np.array([p.est for p in surprise_predictions ]).astype(np.float)

    sparse_preds = sparse.coo_matrix((ests, (uids ,iids )))
    sparse_vals = sparse.coo_matrix((r_uis, (uids ,iids )))
    
    dense_preds = sparse_preds.toarray()
    dense_vals = sparse_vals.toarray()    
    return ndcg_score(y_true= dense_vals , y_score= dense_preds, k=k_highest_scores) 

In [15]:
get_ndcg(predictions)

0.9524153626409766