[View in Colaboratory](https://colab.research.google.com/github/ylongqi/openrec/blob/master/tutorials/Youtube_Recommender_example.ipynb)

<p align="center">
  <img src ="https://recsys.acm.org/wp-content/uploads/2017/07/recsys-18-small.png" height="40" /> <font size="4">Recsys 2018 Tutorial</font>
</p>
<p align="center">
  <font size="4"><b>Modularizing Deep Neural Network-Inspired Recommendation Algorithms</b></font>
</p>
<p align="center">
  <font size="4">Hands on: Customizing Deep YouTube Video Recommendation. Youtube example</font>
</p>

# the Youtube Recommender

The training graph of YouTube-Rec can be decomposed as follows.

<p align="center">
  <img src ="https://s3.amazonaws.com/cornell-tech-sdl-openrec/tutorials/youtube_rec_module.png" height="400" />
</p>




* **inputgraph**: user demographis, item consumption history and the groundtruth label.
* **usergraph**: extract user-specific latent factor.
* **itemgraph**: extract latent factors for items.
* **interactiongraph**: uses MLP and softmax to model user-item interactions.

After defining subgraphs, their interfaces and connections need to be specified. A sample specification of YouTube-Rec can be as follows.
<p align="center">
  <img src ="https://s3.amazonaws.com/cornell-tech-sdl-openrec/tutorials/youtube_rec.png" height="300" />
</p>

# Install OpenRec and download dataset

In [0]:
!pip install openrec

import urllib.request

dataset_prefix = 'http://s3.amazonaws.com/cornell-tech-sdl-openrec'
urllib.request.urlretrieve('%s/lastfm/lastfm_test.npy' % dataset_prefix, 
                   'lastfm_test.npy')
urllib.request.urlretrieve('%s/lastfm/lastfm_train.npy' % dataset_prefix, 
                   'lastfm_train.npy')
urllib.request.urlretrieve('%s/lastfm/user_feature.npy' % dataset_prefix, 
                   'user_feature.npy')

# Your task 
-  understand reuse and extend an exsiting recommender
-  fill in the placeholders in the implementation of the `YouTubeRec` function 
-  successfully run the experimental code with the recommender you just built. 

In [0]:
from openrec.recommenders import VanillaYouTubeRec  # load the vanilla version and extend it with user demographic informaton
from openrec.modules.extractions import LatentFactor
import tensorflow as tf


def Tutorial_YouTubeRec(batch_size, user_dict, item_dict, dim_user_embed, dim_item_embed, 
        max_seq_len, l2_reg_embed=None, l2_reg_mlp=None, dropout=None, 
        init_model_dir=None, save_model_dir='Youtube/', train=True, serve=False):

  
    rec = VanillaYouTubeRec(batch_size=batch_size,
                            dim_item_embed=dim_item_embed['id'], 
                            max_seq_len=max_seq_len, 
                            total_items=item_dict['id'],
                            l2_reg_embed=l2_reg_embed, 
                            l2_reg_mlp=l2_reg_embed, 
                            dropout=dropout, 
                            init_model_dir=init_model_dir,
                            save_model_dir=save_model_dir, 
                            train=train, 
                            serve=serve)
    

    #TODO: fill in training inputgraph extension
    @rec.traingraph.inputgraph.extend(outs=['FILL_IN_VARIABLE', 'FILL_IN_VARIABLE'])
    def add_feature(subgraph):
        subgraph['FILL_IN_VARIABLE'] = tf.placeholder(tf.int32, shape=[batch_size], name='FILL_IN_VARIABLE')
        subgraph['FILL_IN_VARIABLE'] = tf.placeholder(tf.int32, shape=[batch_size], name='FILL_IN_VARIABLE')
       
        subgraph.update_global_input_mapping({'FILL_IN_VARIABLE': subgraph['FILL_IN_VARIABLE'],
                                              'FILL_IN_VARIABLE': subgraph['FILL_IN_VARIABLE']})

        
     #TODO: fill in serve inputgraph extension [Hint: similar to training graph]
    @rec.servegraph.inputgraph.extend(outs=['FILL_IN_VARIABLE', 'FILL_IN_VARIABLE'])
    def add_feature(subgraph):
        subgraph['FILL_IN_VARIABLE'] = tf.placeholder(tf.int32, shape=[None], name='FILL_IN_VARIABLE')
        subgraph['FILL_IN_VARIABLE'] = tf.placeholder(tf.int32, shape=[None], name='FILL_IN_VARIABLE')

        subgraph.update_global_input_mapping({v1: subgraph['FILL_IN_VARIABLE'],
                                              v2: subgraph['FILL_IN_VARIABLE']})
        
    
    #TODO: fill in usergraph
    @rec.traingraph.usergraph(ins=['FILL_IN_VARIABLE', 'FILL_IN_VARIABLE'], outs=['FILL_IN_VARIABLE'])
    @rec.servegraph.usergraph(ins=['FILL_IN_VARIABLE', 'FILL_IN_VARIABLE'], outs=['FILL_IN_VARIABLE'])
    def user_graph(subgraph):
        _, o1 = LatentFactor(l2_reg=l2_reg_embed,
                              shape=[user_dict['gender'], dim_user_embed['gender']],
                              id_=subgraph['FILL_IN_VARIABLE'],
                              subgraph=subgraph,
                              init='normal',
                              scope=v1)

        _, o2 = LatentFactor(l2_reg=l2_reg_embed,
                             shape=[user_dict['geo'], dim_user_embed['geo']],
                             id_=subgraph['FILL_IN_VARIABLE'],
                             subgraph=subgraph,
                             init='normal',
                             scope=v2)
        subgraph['FILL_IN_VARIABLE'] = tf.concat([o1, o2], axis=1)
    
    
    
    #TODO: fill in training interactiongraph
    @rec.traingraph.interactiongraph(ins=['FILL_IN_VARIABLE', 'seq_item_vec', 'seq_len', 'label'])
    def train_interaction_graph(subgraph):
        
        MLPSoftmax(user=subgraph['FILL_IN_VARIABLE'],
                   item=subgraph['seq_item_vec'],
                   seq_len=subgraph['seq_len'],
                   max_seq_len=max_seq_len,
                   dims=[dim_user_embed['total'] + dim_item_embed['total'], item_dict['id']],
                   l2_reg=l2_reg_mlp,
                   labels=subgraph['label'],
                   dropout=dropout,
                   train=True,
                   subgraph=subgraph,
                   scope='MLPSoftmax')
        
        

     #TODO: fill in serve interactiongraph
    @rec.servegraph.interactiongraph(ins=['FILL_IN_VARIABLE', 'seq_item_vec', 'seq_len'])
    def serve_interaction_graph(subgraph):

        MLPSoftmax(user=subgraph['FILL_IN_VARIABLE'],
                   item=subgraph['seq_item_vec'],
                   seq_len=subgraph['seq_len'],
                   max_seq_len=max_seq_len,
                   dims=[dim_user_embed['total'] + dim_item_embed['total'], item_dict['id']],
                   l2_reg=l2_reg_mlp,
                   train=False,
                   subgraph=subgraph,
                   scope='MLPSoftmax') 
    
    
    #TODO: fill in connector extension
    @rec.traingraph.connector.extend
    @rec.servegraph.connector.extend
    def connect(graph): 
        graph.usergraph['FILL_IN_VARIABLE'] = graph.inputgraph['FILL_IN_VARIABLE']
        graph.usergraph['FILL_IN_VARIABLE'] = graph.inputgraph['FILL_IN_VARIABLE']
        graph.interactiongraph['FILL_IN_VARIABLE'] = graph.usergraph['FILL_IN_VARIABLE']

    return rec

# Experiement
We will use the recommender you implemented to run a toy experiement on the LastFM dataset. 

## load lastfm dataset

In [0]:
import numpy as np

train_data = np.load('lastfm_train.npy')
test_data = np.load('lastfm_test.npy')
user_feature = np.load('user_feature.npy')

total_users = 992   
total_items = 14598
user_dict = {'gender': 3, 
             'geo': 67}
item_dict = {'id': total_items}

In [0]:
user_feature[:10], test_data[:10]

## preprocessing dataset

In [0]:
from openrec.utils import Dataset

train_dataset = Dataset(train_data, total_users, total_items, 
                        sortby='ts', name='Train')
test_dataset = Dataset(test_data, total_users, total_items, 
                       sortby='ts', name='Test')

## hyperparameters and training parameters

In [0]:
dim_user_embed = {'geo': 40,    # dimension of user geographic embedding
                  'gender': 10, # dimension of user gender embedding
                   'total': 50} 
dim_item_embed = {'id': 50, 'total': 50}     # dimension of item embedding


max_seq_len = 100       # the maxium length of user's listen history
total_iter = int(1e3)   # iterations for training 
batch_size = 100        # training batch size
eval_iter = 100         # iteration of evaluation
save_iter = eval_iter   # iteration of saving model   

## define sampler
We use `YouTubeSampler`  and `YouTubeEvaluationSampler` to sample sequences of training and testing samples. 

In [0]:
from openrec.utils.samplers import YouTubeSampler, YouTubeEvaluationSampler
  
train_sampler = YouTubeSampler(user_feature=user_feature, 
                                batch_size=batch_size, 
                                max_seq_len=max_seq_len, 
                                dataset=train_dataset, 
                                num_process=1)
test_sampler = YouTubeEvaluationSampler(user_feature=user_feature, 
                              dataset=test_dataset, 
                               max_seq_len=max_seq_len)

## define evaluator

In [0]:
from openrec.utils.evaluators import AUC, Recall

auc_evaluator = AUC()
recall_evaluator = Recall(recall_at=[100, 200, 300, 400, 500])

## define model trainer

we used the Vanilla version of the Youtube recommender to train our model.

In [0]:
from openrec import ModelTrainer
# from openrec.recommenders import YouTubeRec # load YouTubeRec recommender from openrec library

model = Tutorial_YouTubeRec(batch_size=batch_size,
                            user_dict=user_dict,
                            item_dict=item_dict,
                            max_seq_len=max_seq_len,
                            dim_item_embed=dim_item_embed,
                            dim_user_embed=dim_user_embed,
                            save_model_dir='youtube_recommender/',
                            train=True, serve=True)

model_trainer = ModelTrainer(model=model)

## training and testing

In [0]:
model_trainer.train(total_iter=total_iter, 
                    eval_iter=eval_iter,
                    save_iter=save_iter,
                    train_sampler=train_sampler,
                    eval_samplers=[test_sampler], 
                    evaluators=[auc_evaluator, recall_evaluator])