In [1]:
!pip install recbole

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting recbole
  Downloading recbole-1.0.1-py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 4.3 MB/s 
Collecting colorama==0.4.4
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting pyyaml>=5.1.0
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 49.3 MB/s 
[?25hCollecting scipy==1.6.0
  Downloading scipy-1.6.0-cp37-cp37m-manylinux1_x86_64.whl (27.4 MB)
[K     |████████████████████████████████| 27.4 MB 1.5 MB/s 
Collecting colorlog==4.7.2
  Downloading colorlog-4.7.2-py2.py3-none-any.whl (10 kB)
Installing collected packages: scipy, pyyaml, colorlog, colorama, recbole
  Attempting uninstall: scipy
    Found existing installation: scipy 1.4.1
    Uninstalling scipy-1.4.1:
      Successfully uninstalled scipy-1.4.1

In [2]:
import time
import pandas as pd
import numpy as np
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR
from recbole.trainer import Trainer
from recbole.data.interaction import Interaction

In [3]:
!git clone https://github.com/RUCAIBox/RecDatasets

Cloning into 'RecDatasets'...
remote: Enumerating objects: 1034, done.[K
remote: Counting objects: 100% (197/197), done.[K
remote: Compressing objects: 100% (108/108), done.[K
remote: Total 1034 (delta 89), reused 173 (delta 85), pack-reused 837[K
Receiving objects: 100% (1034/1034), 277.95 KiB | 964.00 KiB/s, done.
Resolving deltas: 100% (436/436), done.


In [4]:
!wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
!unzip ml-1m.zip -d raw_data

--2022-05-28 07:33:20--  http://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5.6M) [application/zip]
Saving to: ‘ml-1m.zip’


2022-05-28 07:33:22 (4.13 MB/s) - ‘ml-1m.zip’ saved [5917549/5917549]

Archive:  ml-1m.zip
   creating: raw_data/ml-1m/
  inflating: raw_data/ml-1m/movies.dat  
  inflating: raw_data/ml-1m/ratings.dat  
  inflating: raw_data/ml-1m/README   
  inflating: raw_data/ml-1m/users.dat  


In [5]:
!python RecDatasets/conversion_tools/run.py \
--dataset ml-1m \
--input_path raw_data/ml-1m \
--output_path processed_data/ml-1m \
--convert_inter #--convert_item --convert_user

100% 1000209/1000209 [01:51<00:00, 8941.95it/s]


In [6]:
config = Config(model='BPR', dataset='ml-1m')
config['data_path'] = 'processed_data/ml-1m'
config['metrics'].append('MAP')

In [7]:
print(config)


[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m processed_data/ml-1m[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 300[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0m
[1;36mlearning_rate[0m =[1;33m 0.001[0m
[1;36mneg_sampling[0m =[1;33m {'uniform': 1}[0m
[1;36meval_step[0m =[1;33m 1[0m
[1;36mstopping_step[0m =[1;33m 10[0m
[1;36mclip_grad_norm[0m =[1;33m None[0m
[1;36mweight_decay[0m =[1;33m 

In [8]:
dataset = create_dataset(config)
train_data, valid_data, test_data = \
  data_preparation(config, dataset)

In [9]:
model = \
  BPR(config, train_data.dataset).to(config['device'])

In [10]:
trainer = Trainer(config, model)
best_valid_score, best_valid_result = \
  trainer.fit(train_data, valid_data)

In [11]:
test_result = trainer.evaluate(test_data)

In [12]:
test_result

OrderedDict([('recall@10', 0.1647),
             ('mrr@10', 0.449),
             ('ndcg@10', 0.2593),
             ('hit@10', 0.746),
             ('precision@10', 0.2028),
             ('map@10', 0.1544)])

In [13]:
user_feature = \
  dataset.get_user_feature().to(config['device'])
item_feature = \
  dataset.get_item_feature().to(config['device'])

In [14]:
print(user_feature, item_feature)

The batch_size of interaction: 6041
    user_id, torch.Size([6041]), cuda, torch.int64

 The batch_size of interaction: 3707
    item_id, torch.Size([3707]), cuda, torch.int64




In [15]:
topk = 10
rank_list = [i+1 for i in range(topk)]

In [16]:
# 推薦結果を格納するテーブルを作成する
df_recommend_list = pd.DataFrame(
  columns=['user_id', 'item_id', 'score', 'rank']
)

# 各ユーザに対して、トップ10アイテムを絞り込む
for user_id in user_feature['user_id']:
  item_list = item_feature['item_id']
  user_list = \
    [user_id.to('cpu').detach().numpy()] * \
    len(item_feature)

  df_predict = pd.DataFrame()
  df_predict['user_id'] = user_list
  df_predict['item_id'] = \
    item_list.to('cpu').detach().numpy()
  # IDの補正
  df_predict['user_id'] += 1
  df_predict['item_id'] += 1
  
  # 対象ユーザ×全アイテムのインタラクションデータを作成 
  all_inter = Interaction({
    'user_id': user_id.unsqueeze(0), 
    'item_id': item_list
  })
  # スコアの推論
  prediction = model.full_sort_predict(all_inter)
  df_predict['score'] = \
    prediction.to('cpu').detach().numpy()
  # スコアでの並び替え、トップ10で絞り込み
  df_recommend = df_predict.sort_values(
    'score', ascending=False
  )[:topk]
  df_recommend['rank'] = rank_list

  # 全体テーブルに格納する
  df_recommend_list = \
    df_recommend_list.append(
      df_recommend, 
      ignore_index=True
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [17]:
df_recommend_list

Unnamed: 0,user_id,item_id,score,rank
0,1,2393,0.154529,1
1,1,503,0.154065,2
2,1,938,0.149972,3
3,1,1705,0.140344,4
4,1,2141,0.139466,5
...,...,...,...,...
60405,6041,1299,6.028157,6
60406,6041,1243,5.922610,7
60407,6041,1586,5.891199,8
60408,6041,1117,5.762080,9
