# Bayesian Personalized Ranking
이 노트는 암시적인 피드백에 대한 BPR 모델을 소개한다. 행렬 분해를 사용하는 BPR 모델을 배우는 데 집중한다.   
모델은 Cornac에서 구현되었다. 

In [1]:
import sys
import os
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.7.13 (default, Mar 29 2022, 02:18:16) 
[GCC 7.5.0]
Cornac version: 1.14.2


In [2]:
Movielens_data_size = '100k'

TOP_K = 10

Num_factors = 200
Num_epochs = 100

## BPA Algorithm
### Personalized Ranking from Implicit Feedback

## Cornac implementation of BPR

## Cornac BPR movie recommender
### Load and split data

In [5]:
data = pd.read_csv('Movielens_100k.csv')
data = data[['UserId', 'MovieId', 'Rating']]
train, test = python_random_split(data, 0.75)

In [6]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

### Train the BPR model
`k`와 `max_iter` 파라미터는 훈련 시간에 영향을 준다.

In [8]:
bpr = cornac.models.BPR(
    k=Num_factors,
    max_iter=Num_epochs,
    learning_rate=0.01,
    lambda_reg=0.001,
    verbose=True,
    seed=SEED)

with Timer() as t:
    bpr.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/100 [00:00<?, ?it/s]

Optimization finished!
Took 1.7654 seconds for training.


### Prediction and Evaluation
BPR 모델은 아이템 랭킹에 효과적으로 디자인되어있다. 그러므로 랭킹 메트릭을 이용한 성능만 측정한다.

In [10]:
with Timer() as t:
    all_pred = predict_ranking(bpr, train, usercol='UserId', itemcol='MovieId', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 0.8041 seconds for prediction.


In [12]:
k = 10
eval_map = map_at_k(test, all_pred, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=k)
eval_ndcg = ndcg_at_k(test, all_pred, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=k)
eval_precision = precision_at_k(test, all_pred, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=k)
eval_recall = recall_at_k(test, all_pred, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=k)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')


MAP:	0.110293
NDCG:	0.407024
Precision@K:	0.359873
Recall@K:	0.183517


In [13]:
# Record results with papermill for tests
sb.glue("map", eval_map)
sb.glue("ndcg", eval_ndcg)
sb.glue("precision", eval_precision)
sb.glue("recall", eval_recall)