[View in Colaboratory](https://colab.research.google.com/github/ylongqi/unbiased-offline-recommender-evaluation/blob/master/experiments_section_5/experiment_cml_yahoo_val.ipynb)

## Install framework and download datasets

In [0]:
!pip install openrec

Collecting openrec
[?25l  Downloading https://files.pythonhosted.org/packages/6d/ef/9ff797867220d00f672f3917600392b4201d08b526f4e48a6039193e28bb/openrec-0.2.2.tar.gz (46kB)
[K    22% |███████                         | 10kB 20.6MB/s eta 0:00:01[K    44% |██████████████▏                 | 20kB 3.2MB/s eta 0:00:01[K    66% |█████████████████████▎          | 30kB 3.7MB/s eta 0:00:01[K    88% |████████████████████████████▍   | 40kB 3.2MB/s eta 0:00:01[K    100% |████████████████████████████████| 51kB 3.6MB/s 
Building wheels for collected packages: openrec
  Running setup.py bdist_wheel for openrec ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/06/42/c6/67dbcffd63a7becc08004e97b7b23572f46e21d43f1c50e175
Successfully built openrec
Installing collected packages: openrec
Successfully installed openrec-0.2.2


In [0]:
!rm -rf *
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/dataset/yahoo/training_arr.npy
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/dataset/yahoo/validation_arr.npy

!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/best-models/cml-yahoo/cml-yahoo.data-00000-of-00001
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/best-models/cml-yahoo/cml-yahoo.meta
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/best-models/cml-yahoo/cml-yahoo.index

In [0]:
import numpy as np
import pickle
import random
from __future__ import division

from openrec.legacy import ImplicitModelTrainer
from openrec.legacy.utils import ImplicitDataset
from openrec.legacy.utils.evaluators import ImplicitEvalManager
from openrec.legacy.recommenders import CML
from openrec.legacy.utils.evaluators import AUC
from openrec.legacy.utils.samplers import PairwiseSampler

## Function Definition

In [0]:
# function that calculates auc, recall and dcg of the evaulation result

def calc_metrics(infilename, trainfilename, gamma=0.2, K=10):
    infile = open(infilename, 'rb')
    P = pickle.load(infile)
    infile.close()
    NUM_NEGATIVES = P["num_negatives"]
    _NUM_POSs = dict()
    
    for theuser in P["users"]:
        _NUM_POSs[theuser] = len(P["user_items"][theuser][NUM_NEGATIVES:])
        P["user_items"][theuser] = list(P["user_items"][theuser])[-300:]
        P["results"][theuser] = list(P["results"][theuser])[-300:]
    
    Zui = dict()
    Ni = dict()
    
    # fill in dictionary Ni
    trainset = np.load(trainfilename)
    for i in trainset['item_id']:
        if i in Ni:
            Ni[i] += 1
        else:
            Ni[i] = 1
    del trainset
    
    # count #users with non-zero item frequencies
    nonzero_user_count = 0
    for theuser in P["users"]:
        pos_items = P["user_items"][theuser][0 - _NUM_POSs[theuser]:]
        for pos_item in pos_items:
            if pos_item in Ni:
                nonzero_user_count += 1
                break
                
    # fill in dictionary Zui
    for theuser in P["users"]:
        all_scores = np.array(P["results"][theuser])
        pos_items = P["user_items"][theuser][0 - _NUM_POSs[theuser]:]
        pos_scores = P["results"][theuser][0 - _NUM_POSs[theuser]:]
        for i, pos_item in enumerate(pos_items):
            pos_score = pos_scores[i]
            Zui[(theuser, pos_item)] = float(np.sum(all_scores > pos_score))
            
    # calculate per-user scores
    sum_user_auc = 0.0
    sum_user_recall = 0.0
    for theuser in P["users"]:
        numerator_auc = 0.0
        numerator_recall = 0.0
        denominator = 0.0
        for theitem in P["user_items"][theuser][0 - _NUM_POSs[theuser]:]:
            if theitem not in Ni:
                continue
            pui = np.power(Ni[theitem], (gamma + 1) / 2.0)
            numerator_auc += (1 - Zui[(theuser, theitem)] / len(P["user_items"][theuser])) / pui
            if Zui[(theuser, theitem)] < K:
                numerator_recall += 1.0 / pui
            denominator += 1 / pui
        if denominator > 0:
            sum_user_auc += numerator_auc / denominator
            sum_user_recall += numerator_recall / denominator
    
    return {
        "auc"       : sum_user_auc / nonzero_user_count,
        "recall"    : sum_user_recall / nonzero_user_count
    }


## Model Serving

In [0]:
raw_data = dict()
raw_data['train_data'] = np.load("training_arr.npy")
raw_data['val_data'] = np.load("validation_arr.npy")
raw_data['max_user'] = 15401
raw_data['max_item'] = 1001
batch_size = 8000
test_batch_size = 1000
display_itr = 1000

train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val')

In [0]:
cml_model = CML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 
    dim_embed=50, l2_reg=0.001, opt='Adam', sess_config=None)
sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=4)
model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size,
                                     train_dataset=train_dataset, model=cml_model, sampler=sampler,
                                     eval_save_prefix="./yahoo",
                                     item_serving_size=500)
auc_evaluator = AUC()

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [0]:
cml_model.load("cml-yahoo")

INFO:tensorflow:Restoring parameters from cml-yahoo


In [0]:
model_trainer._eval_manager = ImplicitEvalManager(evaluators=[auc_evaluator])
model_trainer._num_negatives = 300
model_trainer._exclude_positives([train_dataset, val_dataset])
model_trainer._sample_negatives(seed=10)
model_trainer._eval_save_prefix = "cml-yahoo-val-new"
model_trainer._evaluate_partial(val_dataset)

## Evalution test set under different gamma values

In [0]:
# Evaluation on test set

test_eval_file = "cml-yahoo-val-new_evaluate_partial.pickle"
trainfilename = "training_arr.npy"

for gamma in [1.5, 2.0, 2.5, 3.0]:
      print(test_eval_file +  " with gamma @" + str(gamma) + " :")
      print(calc_metrics(test_eval_file, trainfilename, gamma, 1))

cml-yahoo-val-new_evaluate_partial.pickle with gamma @1.5 :
{'recall': 0.07239385465623455, 'auc': 0.8537802322166608}
cml-yahoo-val-new_evaluate_partial.pickle with gamma @2.0 :
{'recall': 0.06970855806988434, 'auc': 0.8499550597403158}
cml-yahoo-val-new_evaluate_partial.pickle with gamma @2.5 :
{'recall': 0.0679062533205266, 'auc': 0.8471228812596456}
cml-yahoo-val-new_evaluate_partial.pickle with gamma @3.0 :
{'recall': 0.06665414398744902, 'auc': 0.8449859840114543}
