# Bilateral Variational Autoencoder
**BiVAE**에 관한 논문은 WSDM'21 컨퍼런스에서 처음 발표되었습니다.   
모델의 시행은 Cornac으로부터 이루어지며, Cornac은 보조적인 데이터를 활용하는 모델들에 집중한 멀티모달 추천 시스템을 위한 프레임워크입니다.

In [1]:
import sys
import os
import torch
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
print("PyTorch version: {}".format(torch.__version__))
print("Cornac version: {}".format(cornac.__version__))


System version: 3.7.13 (default, Mar 29 2022, 02:18:16) 
[GCC 7.5.0]
PyTorch version: 1.11.0+cu102
Cornac version: 1.14.2


In [20]:
data_size = '100k'
TOP_k = 10
latent_dim = 50
encoder_dims = [100]
act_func = 'tanh'
likelihood = 'pois'
num_epochs = 500
batch_size = 128
learning_rate = 0.001

## 1. Theory
## 2. Cornac implementation of BiVAE
- Cornac에 있는 BiVAE 모델의 자세한 문서는 [여기](https://cornac.readthedocs.io/en/latest/models.html#module-cornac.models.bivaecf.recom_bivaecf)
- BiVAE 실행에 대한 소스코드는 [여기](https://github.com/PreferredAI/cornac/tree/master/cornac/models/bivaecf)
- BiVAE와 관련된 모든 실험은 [여기](https://github.com/PreferredAI/bi-vae)를 참고

## 3. Experiments on Movielens

In [4]:
# Load and split data
data = pd.read_csv('Movielens_100k.csv')
data = data[['UserId', 'MovieId', 'Rating']]
data.head()

Unnamed: 0,UserId,MovieId,Rating
0,196,242,3.0
1,186,302,3.0
2,22,377,1.0
3,244,51,2.0
4,166,346,1.0


In [5]:
train, test = python_random_split(data, 0.75)

### Cornac Dataset
Cornac의 모델들로 작업하려면 Dataset 클래스를 만들어야 한다. Cornac의 Dataset 클래스는 모델이 직접 상호작용하는 메인 오브젝트이다. 데이터 변형에 더해, 데이터를 순회하는 유용한 iterator들을 제공하고, negative 샘플링 기술들을 지원한다.

In [13]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)
print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 943
Number of items: 1642


In [11]:
for i in train.itertuples(index=False):
    print(i)

Pandas(UserId=811, MovieId=901, Rating=4.0)
Pandas(UserId=804, MovieId=755, Rating=3.0)
Pandas(UserId=52, MovieId=287, Rating=5.0)
Pandas(UserId=735, MovieId=181, Rating=4.0)
Pandas(UserId=897, MovieId=96, Rating=5.0)
Pandas(UserId=934, MovieId=83, Rating=4.0)
Pandas(UserId=933, MovieId=1, Rating=3.0)
Pandas(UserId=57, MovieId=225, Rating=3.0)
Pandas(UserId=389, MovieId=199, Rating=5.0)
Pandas(UserId=759, MovieId=181, Rating=5.0)
Pandas(UserId=805, MovieId=24, Rating=4.0)
Pandas(UserId=693, MovieId=632, Rating=5.0)
Pandas(UserId=683, MovieId=321, Rating=5.0)
Pandas(UserId=856, MovieId=272, Rating=5.0)
Pandas(UserId=445, MovieId=544, Rating=2.0)
Pandas(UserId=7, MovieId=472, Rating=2.0)
Pandas(UserId=409, MovieId=266, Rating=1.0)
Pandas(UserId=103, MovieId=127, Rating=4.0)
Pandas(UserId=394, MovieId=651, Rating=4.0)
Pandas(UserId=313, MovieId=969, Rating=4.0)
Pandas(UserId=655, MovieId=896, Rating=4.0)
Pandas(UserId=655, MovieId=1008, Rating=3.0)
Pandas(UserId=280, MovieId=449, Rating=3

Pandas(UserId=541, MovieId=196, Rating=4.0)
Pandas(UserId=880, MovieId=1244, Rating=3.0)
Pandas(UserId=749, MovieId=326, Rating=4.0)
Pandas(UserId=115, MovieId=980, Rating=4.0)
Pandas(UserId=521, MovieId=385, Rating=3.0)
Pandas(UserId=346, MovieId=669, Rating=1.0)
Pandas(UserId=586, MovieId=576, Rating=3.0)
Pandas(UserId=583, MovieId=663, Rating=4.0)
Pandas(UserId=303, MovieId=1303, Rating=3.0)
Pandas(UserId=44, MovieId=756, Rating=3.0)
Pandas(UserId=932, MovieId=676, Rating=4.0)
Pandas(UserId=194, MovieId=521, Rating=4.0)
Pandas(UserId=399, MovieId=744, Rating=3.0)
Pandas(UserId=665, MovieId=597, Rating=3.0)
Pandas(UserId=406, MovieId=639, Rating=4.0)
Pandas(UserId=683, MovieId=268, Rating=4.0)
Pandas(UserId=224, MovieId=223, Rating=3.0)
Pandas(UserId=749, MovieId=80, Rating=1.0)
Pandas(UserId=297, MovieId=447, Rating=4.0)
Pandas(UserId=363, MovieId=575, Rating=1.0)
Pandas(UserId=830, MovieId=172, Rating=5.0)
Pandas(UserId=115, MovieId=234, Rating=5.0)
Pandas(UserId=309, MovieId=306, 

Pandas(UserId=648, MovieId=1030, Rating=2.0)
Pandas(UserId=495, MovieId=797, Rating=4.0)
Pandas(UserId=552, MovieId=1051, Rating=3.0)
Pandas(UserId=26, MovieId=300, Rating=4.0)
Pandas(UserId=653, MovieId=451, Rating=2.0)
Pandas(UserId=784, MovieId=268, Rating=3.0)
Pandas(UserId=416, MovieId=254, Rating=2.0)
Pandas(UserId=524, MovieId=525, Rating=3.0)
Pandas(UserId=869, MovieId=240, Rating=4.0)
Pandas(UserId=237, MovieId=238, Rating=4.0)
Pandas(UserId=788, MovieId=572, Rating=3.0)
Pandas(UserId=219, MovieId=347, Rating=1.0)
Pandas(UserId=94, MovieId=506, Rating=5.0)
Pandas(UserId=693, MovieId=197, Rating=3.0)
Pandas(UserId=224, MovieId=148, Rating=3.0)
Pandas(UserId=645, MovieId=427, Rating=5.0)
Pandas(UserId=715, MovieId=761, Rating=3.0)
Pandas(UserId=85, MovieId=215, Rating=4.0)
Pandas(UserId=94, MovieId=1211, Rating=5.0)
Pandas(UserId=187, MovieId=64, Rating=5.0)
Pandas(UserId=442, MovieId=174, Rating=4.0)
Pandas(UserId=909, MovieId=682, Rating=3.0)
Pandas(UserId=825, MovieId=591, Ra

Pandas(UserId=447, MovieId=91, Rating=4.0)
Pandas(UserId=7, MovieId=285, Rating=5.0)
Pandas(UserId=381, MovieId=480, Rating=5.0)
Pandas(UserId=759, MovieId=237, Rating=3.0)
Pandas(UserId=267, MovieId=145, Rating=4.0)
Pandas(UserId=54, MovieId=1012, Rating=2.0)
Pandas(UserId=532, MovieId=316, Rating=4.0)
Pandas(UserId=883, MovieId=331, Rating=3.0)
Pandas(UserId=931, MovieId=845, Rating=3.0)
Pandas(UserId=927, MovieId=7, Rating=3.0)
Pandas(UserId=239, MovieId=9, Rating=5.0)
Pandas(UserId=234, MovieId=1044, Rating=2.0)
Pandas(UserId=650, MovieId=378, Rating=3.0)
Pandas(UserId=518, MovieId=124, Rating=3.0)
Pandas(UserId=429, MovieId=214, Rating=3.0)
Pandas(UserId=796, MovieId=474, Rating=2.0)
Pandas(UserId=92, MovieId=227, Rating=1.0)
Pandas(UserId=679, MovieId=73, Rating=4.0)
Pandas(UserId=833, MovieId=931, Rating=4.0)
Pandas(UserId=601, MovieId=429, Rating=5.0)
Pandas(UserId=13, MovieId=809, Rating=4.0)
Pandas(UserId=780, MovieId=204, Rating=5.0)
Pandas(UserId=671, MovieId=443, Rating=3.

Pandas(UserId=593, MovieId=470, Rating=2.0)
Pandas(UserId=338, MovieId=133, Rating=4.0)
Pandas(UserId=454, MovieId=526, Rating=4.0)
Pandas(UserId=117, MovieId=15, Rating=5.0)
Pandas(UserId=6, MovieId=528, Rating=4.0)
Pandas(UserId=279, MovieId=845, Rating=1.0)
Pandas(UserId=136, MovieId=475, Rating=4.0)
Pandas(UserId=804, MovieId=926, Rating=4.0)
Pandas(UserId=810, MovieId=876, Rating=3.0)
Pandas(UserId=125, MovieId=751, Rating=5.0)
Pandas(UserId=234, MovieId=142, Rating=2.0)
Pandas(UserId=881, MovieId=393, Rating=4.0)
Pandas(UserId=94, MovieId=544, Rating=3.0)
Pandas(UserId=717, MovieId=313, Rating=5.0)
Pandas(UserId=472, MovieId=588, Rating=3.0)
Pandas(UserId=472, MovieId=374, Rating=2.0)
Pandas(UserId=747, MovieId=663, Rating=5.0)
Pandas(UserId=739, MovieId=603, Rating=4.0)
Pandas(UserId=862, MovieId=181, Rating=5.0)
Pandas(UserId=334, MovieId=229, Rating=2.0)
Pandas(UserId=456, MovieId=1129, Rating=4.0)
Pandas(UserId=298, MovieId=527, Rating=5.0)
Pandas(UserId=342, MovieId=237, Rat

Pandas(UserId=498, MovieId=160, Rating=5.0)
Pandas(UserId=42, MovieId=77, Rating=5.0)
Pandas(UserId=892, MovieId=7, Rating=4.0)
Pandas(UserId=903, MovieId=48, Rating=4.0)
Pandas(UserId=596, MovieId=300, Rating=4.0)
Pandas(UserId=404, MovieId=310, Rating=4.0)
Pandas(UserId=682, MovieId=153, Rating=3.0)
Pandas(UserId=843, MovieId=501, Rating=2.0)
Pandas(UserId=846, MovieId=511, Rating=5.0)
Pandas(UserId=337, MovieId=15, Rating=5.0)
Pandas(UserId=522, MovieId=208, Rating=5.0)
Pandas(UserId=97, MovieId=79, Rating=5.0)
Pandas(UserId=409, MovieId=327, Rating=2.0)
Pandas(UserId=528, MovieId=748, Rating=3.0)
Pandas(UserId=164, MovieId=406, Rating=2.0)
Pandas(UserId=903, MovieId=1098, Rating=5.0)
Pandas(UserId=580, MovieId=25, Rating=3.0)
Pandas(UserId=498, MovieId=56, Rating=3.0)
Pandas(UserId=328, MovieId=77, Rating=4.0)
Pandas(UserId=234, MovieId=1369, Rating=3.0)
Pandas(UserId=886, MovieId=209, Rating=4.0)
Pandas(UserId=659, MovieId=88, Rating=2.0)
Pandas(UserId=843, MovieId=99, Rating=2.0)

Pandas(UserId=49, MovieId=290, Rating=2.0)
Pandas(UserId=851, MovieId=748, Rating=3.0)
Pandas(UserId=682, MovieId=248, Rating=3.0)
Pandas(UserId=591, MovieId=194, Rating=4.0)
Pandas(UserId=214, MovieId=269, Rating=3.0)
Pandas(UserId=62, MovieId=498, Rating=4.0)
Pandas(UserId=440, MovieId=886, Rating=5.0)
Pandas(UserId=896, MovieId=721, Rating=4.0)
Pandas(UserId=498, MovieId=183, Rating=4.0)
Pandas(UserId=167, MovieId=318, Rating=5.0)
Pandas(UserId=448, MovieId=271, Rating=4.0)
Pandas(UserId=514, MovieId=135, Rating=4.0)
Pandas(UserId=122, MovieId=708, Rating=5.0)
Pandas(UserId=18, MovieId=612, Rating=4.0)
Pandas(UserId=201, MovieId=676, Rating=2.0)
Pandas(UserId=527, MovieId=496, Rating=4.0)
Pandas(UserId=804, MovieId=71, Rating=4.0)
Pandas(UserId=495, MovieId=1183, Rating=4.0)
Pandas(UserId=587, MovieId=681, Rating=2.0)
Pandas(UserId=891, MovieId=740, Rating=5.0)
Pandas(UserId=407, MovieId=1118, Rating=4.0)
Pandas(UserId=705, MovieId=225, Rating=4.0)
Pandas(UserId=269, MovieId=741, Ra

Pandas(UserId=846, MovieId=403, Rating=3.0)
Pandas(UserId=222, MovieId=1079, Rating=1.0)
Pandas(UserId=128, MovieId=815, Rating=3.0)
Pandas(UserId=763, MovieId=4, Rating=5.0)
Pandas(UserId=489, MovieId=261, Rating=2.0)
Pandas(UserId=586, MovieId=185, Rating=2.0)
Pandas(UserId=848, MovieId=152, Rating=5.0)
Pandas(UserId=730, MovieId=328, Rating=2.0)
Pandas(UserId=805, MovieId=274, Rating=2.0)
Pandas(UserId=148, MovieId=1012, Rating=4.0)
Pandas(UserId=506, MovieId=228, Rating=5.0)
Pandas(UserId=291, MovieId=411, Rating=4.0)
Pandas(UserId=313, MovieId=192, Rating=3.0)
Pandas(UserId=393, MovieId=132, Rating=2.0)
Pandas(UserId=94, MovieId=603, Rating=4.0)
Pandas(UserId=66, MovieId=508, Rating=4.0)
Pandas(UserId=717, MovieId=471, Rating=4.0)
Pandas(UserId=936, MovieId=1, Rating=4.0)
Pandas(UserId=782, MovieId=1038, Rating=4.0)
Pandas(UserId=653, MovieId=441, Rating=3.0)
Pandas(UserId=326, MovieId=4, Rating=1.0)
Pandas(UserId=23, MovieId=433, Rating=5.0)
Pandas(UserId=735, MovieId=304, Rating

Pandas(UserId=201, MovieId=281, Rating=2.0)
Pandas(UserId=477, MovieId=282, Rating=4.0)
Pandas(UserId=714, MovieId=471, Rating=4.0)
Pandas(UserId=68, MovieId=25, Rating=4.0)
Pandas(UserId=393, MovieId=692, Rating=3.0)
Pandas(UserId=406, MovieId=318, Rating=5.0)
Pandas(UserId=487, MovieId=94, Rating=3.0)
Pandas(UserId=698, MovieId=607, Rating=2.0)
Pandas(UserId=292, MovieId=234, Rating=5.0)
Pandas(UserId=454, MovieId=86, Rating=2.0)
Pandas(UserId=8, MovieId=258, Rating=5.0)
Pandas(UserId=551, MovieId=941, Rating=4.0)
Pandas(UserId=711, MovieId=97, Rating=4.0)
Pandas(UserId=514, MovieId=136, Rating=4.0)
Pandas(UserId=806, MovieId=257, Rating=4.0)
Pandas(UserId=59, MovieId=466, Rating=4.0)
Pandas(UserId=587, MovieId=916, Rating=3.0)
Pandas(UserId=907, MovieId=107, Rating=5.0)
Pandas(UserId=558, MovieId=285, Rating=5.0)
Pandas(UserId=830, MovieId=648, Rating=5.0)
Pandas(UserId=532, MovieId=931, Rating=3.0)
Pandas(UserId=647, MovieId=1016, Rating=4.0)
Pandas(UserId=803, MovieId=754, Rating=

Pandas(UserId=461, MovieId=285, Rating=4.0)
Pandas(UserId=470, MovieId=874, Rating=3.0)
Pandas(UserId=106, MovieId=191, Rating=5.0)
Pandas(UserId=48, MovieId=185, Rating=4.0)
Pandas(UserId=92, MovieId=455, Rating=2.0)
Pandas(UserId=11, MovieId=213, Rating=4.0)
Pandas(UserId=913, MovieId=423, Rating=3.0)
Pandas(UserId=881, MovieId=441, Rating=2.0)
Pandas(UserId=256, MovieId=127, Rating=4.0)
Pandas(UserId=148, MovieId=114, Rating=5.0)
Pandas(UserId=130, MovieId=800, Rating=4.0)
Pandas(UserId=201, MovieId=148, Rating=1.0)
Pandas(UserId=707, MovieId=212, Rating=4.0)
Pandas(UserId=559, MovieId=261, Rating=3.0)
Pandas(UserId=416, MovieId=625, Rating=5.0)
Pandas(UserId=480, MovieId=183, Rating=4.0)
Pandas(UserId=269, MovieId=959, Rating=5.0)
Pandas(UserId=259, MovieId=271, Rating=3.0)
Pandas(UserId=474, MovieId=671, Rating=3.0)
Pandas(UserId=71, MovieId=248, Rating=3.0)
Pandas(UserId=291, MovieId=50, Rating=5.0)
Pandas(UserId=141, MovieId=7, Rating=5.0)
Pandas(UserId=474, MovieId=481, Rating=

Pandas(UserId=711, MovieId=269, Rating=5.0)
Pandas(UserId=804, MovieId=566, Rating=4.0)
Pandas(UserId=184, MovieId=1398, Rating=5.0)
Pandas(UserId=456, MovieId=23, Rating=4.0)
Pandas(UserId=600, MovieId=1004, Rating=4.0)
Pandas(UserId=308, MovieId=213, Rating=4.0)
Pandas(UserId=405, MovieId=1027, Rating=1.0)
Pandas(UserId=694, MovieId=385, Rating=4.0)
Pandas(UserId=111, MovieId=304, Rating=4.0)
Pandas(UserId=364, MovieId=262, Rating=3.0)
Pandas(UserId=435, MovieId=763, Rating=5.0)
Pandas(UserId=690, MovieId=1210, Rating=3.0)
Pandas(UserId=883, MovieId=386, Rating=3.0)
Pandas(UserId=788, MovieId=554, Rating=3.0)
Pandas(UserId=305, MovieId=793, Rating=5.0)
Pandas(UserId=603, MovieId=227, Rating=3.0)
Pandas(UserId=616, MovieId=299, Rating=3.0)
Pandas(UserId=49, MovieId=1071, Rating=3.0)
Pandas(UserId=660, MovieId=63, Rating=2.0)
Pandas(UserId=313, MovieId=218, Rating=2.0)
Pandas(UserId=334, MovieId=275, Rating=4.0)
Pandas(UserId=592, MovieId=56, Rating=5.0)
Pandas(UserId=290, MovieId=527,

Pandas(UserId=643, MovieId=176, Rating=5.0)
Pandas(UserId=892, MovieId=186, Rating=3.0)
Pandas(UserId=290, MovieId=474, Rating=3.0)
Pandas(UserId=846, MovieId=463, Rating=5.0)
Pandas(UserId=416, MovieId=241, Rating=5.0)
Pandas(UserId=889, MovieId=655, Rating=4.0)
Pandas(UserId=3, MovieId=341, Rating=1.0)
Pandas(UserId=48, MovieId=302, Rating=4.0)
Pandas(UserId=527, MovieId=192, Rating=4.0)
Pandas(UserId=749, MovieId=24, Rating=2.0)
Pandas(UserId=1, MovieId=218, Rating=3.0)
Pandas(UserId=95, MovieId=24, Rating=3.0)
Pandas(UserId=533, MovieId=525, Rating=3.0)
Pandas(UserId=744, MovieId=963, Rating=5.0)
Pandas(UserId=886, MovieId=772, Rating=1.0)
Pandas(UserId=727, MovieId=542, Rating=2.0)
Pandas(UserId=249, MovieId=318, Rating=5.0)
Pandas(UserId=804, MovieId=238, Rating=4.0)
Pandas(UserId=882, MovieId=988, Rating=5.0)
Pandas(UserId=330, MovieId=11, Rating=4.0)
Pandas(UserId=603, MovieId=326, Rating=4.0)
Pandas(UserId=911, MovieId=98, Rating=2.0)
Pandas(UserId=144, MovieId=632, Rating=4.0

### Train the BiVAE model
- `k`: dimension of the latent space (i.e. the size of theta_u and beta_i).
- `encoder_structure`: dimension(s) of hidden layer(s) of the user and item encoders.
- `act_fn`: non-linear activation function used in the encoders.
- `likelihood`: choice of the likelihood function being optimized.
- `n_epochs`: number of passes through training data.
- `batch_size`: size of mini-batches of data during training.
- `learning_rate`: step size in the gradient update rules.

In [17]:
bivae = cornac.models.BiVAECF(k=latent_dim, encoder_structure=encoder_dims,
                             act_fn=act_func, likelihood=likelihood,
                             n_epochs=num_epochs, batch_size=batch_size,
                             learning_rate=learning_rate, seed=SEED,
                             use_gpu=torch.cuda.is_available(), verbose=True)

with Timer() as t:
    bivae.fit(train_set)
print('Took {} seconds for training'.format(t))

  0%|          | 0/500 [00:00<?, ?it/s]

Took 26.3615 seconds for training


### Prediction and Evaluation
Cornac에서 제공하는 모든 추천 모델은 아이템의 평점을 예측하는 `rate()`와 유저에 대한 아이템 랭크 리스트`rank()` 메소드를 갖고 있다. 현재의 검증 스키마에서 예측을 위해 `predict()`와 `predict_ranking()` 함수를 사용한다.

In [27]:
with Timer() as t:
    all_predictions = predict_ranking(bivae, train, usercol='UserId', itemcol='MovieId', remove_seen=True)
print('Took {} seconds for prediction'.format(t))

Took 0.6089 seconds for prediction


In [32]:
eval_map = map_at_k(test, all_predictions, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=TOP_k)
eval_ndcg = ndcg_at_k(test, all_predictions, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=TOP_k)
eval_precision = precision_at_k(test, all_predictions, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=TOP_k)
eval_recall = recall_at_k(test, all_predictions, col_user='UserId', col_item='MovieId', col_rating='Rating', col_prediction='prediction', k=TOP_k)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')


MAP:	0.142060
NDCG:	0.465813
Precision@K:	0.404671
Recall@K:	0.222637


In [28]:
# Record results with papermill for tests
sb.glue("map", eval_map)
sb.glue("ndcg", eval_ndcg)
sb.glue("precision", eval_precision)
sb.glue("recall", eval_recall)


Unnamed: 0,UserId,MovieId,Rating
75721,877,381,4.0
80184,815,602,3.0
19864,94,431,4.0
76699,416,875,2.0
92991,500,182,2.0
...,...,...,...
21271,399,684,3.0
34014,222,580,3.0
81355,551,162,5.0
65720,803,988,1.0
