# 패키지 설치

In [2]:
%pip install implicit



## Import

In [55]:
# 유틸리티
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
# 모델
from implicit.als import AlternatingLeastSquares as ALS
from implicit.bpr import BayesianPersonalizedRanking as BPR
# 평가 지표
from implicit.evaluation import train_test_split;
from implicit.evaluation import AUC_at_k, mean_average_precision_at_k, ndcg_at_k, precision_at_k;

# 데이터 적재

## 평점 데이터 적재

In [9]:
ratingDF = pd.read_csv('drive/MyDrive/Data/philadelphia_rating.csv')

In [16]:
ratingDF = ratingDF[['newUserId', 'newBusinessId', 'stars']]

In [17]:
ratingDF.head()

Unnamed: 0,newUserId,newBusinessId,stars
0,1,1,5
1,2,1,2
2,3,1,5
3,3,1,5
4,4,1,5


## 사용자, 사업체 추출

In [20]:
users = ratingDF["newUserId"].unique()
users

array([     1,      2,      3, ..., 202067, 202068, 202069])

In [22]:
businesses = ratingDF["newBusinessId"].unique()
businesses

array([   1,  120, 3105, ..., 3990, 4176, 6228])

## CSR 구성

In [34]:
data = ratingDF['stars'].to_numpy()
row = ratingDF['newUserId'].to_numpy()
col = ratingDF['newBusinessId'].to_numpy()

csr = csr_matrix((data, (row, col)))

csr

<216945x7077 sparse matrix of type '<class 'numpy.int64'>'
	with 715637 stored elements in Compressed Sparse Row format>

# 훈련

## train-test 분할

In [35]:
(train_set, test_set) = train_test_split(csr, train_percentage=0.8)

In [36]:
train_set

<216945x7077 sparse matrix of type '<class 'numpy.int64'>'
	with 572260 stored elements in Compressed Sparse Row format>

In [37]:
test_set

<216945x7077 sparse matrix of type '<class 'numpy.int64'>'
	with 143377 stored elements in Compressed Sparse Row format>

## 훈련

In [46]:
bprModel = BPR(factors=100, regularization=0.05, iterations=100)

In [47]:
bprModel.fit(train_set)

  0%|          | 0/100 [00:00<?, ?it/s]

# 테스트

In [56]:
userid = 1
ids, scores = bprModel.recommend(userid, csr[userid], N=10, filter_already_liked_items=True)

In [57]:
pd.DataFrame({"business": businesses[ids], "score": scores, "already_liked": np.in1d(ids, csr[userid].indices)})

Unnamed: 0,business,score,already_liked
0,2796,0.319887,False
1,5869,0.313853,False
2,2960,0.302721,False
3,2659,0.295121,False
4,4789,0.293308,False
5,3991,0.290907,False
6,6941,0.286163,False
7,3444,0.278523,False
8,3471,0.275827,False
9,5165,0.275784,False


# 평가

## NDCG@K

In [51]:
ndcgK = ndcg_at_k(bprModel, train_set, test_set)

  0%|          | 0/74740 [00:00<?, ?it/s]

In [52]:
ndcgK

0.012546227703154136

## Precision@K

In [53]:
precK = precision_at_k(bprModel, train_set, test_set)

  0%|          | 0/74740 [00:00<?, ?it/s]

In [54]:
precK

0.020805019919699925