[View in Colaboratory](https://colab.research.google.com/github/ylongqi/unbiased-offline-recommender-evaluation/blob/master/experiments_section_4/experiment_pmf_citeulike_serve.ipynb)

## Install framework and download datasets

In [1]:
!pip install openrec

Collecting openrec
[?25l  Downloading https://files.pythonhosted.org/packages/aa/e5/0c4688f97a3e22c0ecee1ac295aa5ebf46ef49d3e9fe14bb5a8e01d838e2/openrec-0.2.3.tar.gz (46kB)
[K    100% |████████████████████████████████| 51kB 3.0MB/s 
Building wheels for collected packages: openrec
  Running setup.py bdist_wheel for openrec ... [?25l- \ done
[?25h  Stored in directory: /root/.cache/pip/wheels/af/f0/59/fdb60eccb7921f144d67a3b6a1098453e85424b75329b9607d
Successfully built openrec
Installing collected packages: openrec
Successfully installed openrec-0.2.3


In [0]:
!rm -rf *

!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/dataset/citeulike/rsrf_user_data_train.npy
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/dataset/citeulike/rsrf_user_data_val.npy
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/dataset/citeulike/rsrf_user_data_test.npy

!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/best-models/pmf-citeulike/pmf-citeulike-all.data-00000-of-00001
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/best-models/pmf-citeulike/pmf-citeulike-all.index
!wget https://s3.amazonaws.com/cornell-tech-sdl-rec-bias/best-models/pmf-citeulike/pmf-citeulike-all.meta

In [0]:
import numpy as np
import pickle
import random
from __future__ import division

from openrec.legacy import ImplicitModelTrainer
from openrec.legacy.utils import ImplicitDataset
from openrec.legacy.utils.evaluators import ImplicitEvalManager
from openrec.legacy.recommenders import PMF
from openrec.legacy.utils.evaluators import AUC, Recall, NDCG
from openrec.legacy.utils.samplers import PointwiseSampler

## Function Definition

In [0]:
# function that calculates auc, recall and dcg of the evaulation result
def eq12_test(infilename, trainfilename, mimic_uniform_sampling=False, gamma=0.2, K=10):
    infile = open(infilename, 'rb')
    P = pickle.load(infile)
    infile.close()
    NUM_NEGATIVES = P["num_negatives"]
    _NUM_POSs = dict()
    
    for theuser in P["users"]:
        _NUM_POSs[theuser] = len(P["user_items"][theuser][NUM_NEGATIVES:])
        P["user_items"][theuser] = list(P["user_items"][theuser])[-300:]
        P["results"][theuser] = list(P["results"][theuser])[-300:]
    
    Zui = dict()
    Ni = dict()
    # fill in dictionary Ni
    trainset = np.load(trainfilename)
    for i in trainset['item_id']:
        if i in Ni:
            Ni[i] += 1
        else:
            Ni[i] = 1
    del trainset
    
    # count #users with non-zero item frequencies
    nonzero_user_count = 0
    for theuser in P["users"]:
        pos_items = P["user_items"][theuser][0 - _NUM_POSs[theuser]:]
        for pos_item in pos_items:
            if pos_item in Ni:
                nonzero_user_count += 1
                break
                
    # fill in dictionary Zui
    for theuser in P["users"]:
        all_scores = np.array(P["results"][theuser])
        pos_items = P["user_items"][theuser][0 - _NUM_POSs[theuser]:]
        pos_scores = P["results"][theuser][0 - _NUM_POSs[theuser]:]
        for i, pos_item in enumerate(pos_items):
            pos_score = pos_scores[i]
            Zui[(theuser, pos_item)] = float(np.sum(all_scores > pos_score))
            
    # calculate per-user scores
    sum_user_auc = 0.0
    sum_user_dcg = 0.0
    sum_user_recall = 0.0
    sum_user_dcg_all = 0.0
    for theuser in P["users"]:
        numerator_auc = 0.0
        numerator_dcg_all = 0.0
        numerator_recall = 0.0
        numerator_dcg = 0.0
        denominator = 0.0
        for theitem in P["user_items"][theuser][0 - _NUM_POSs[theuser]:]:
            if theitem not in Ni:
                continue
            pui = np.power(Ni[theitem], (gamma + 1) / 2.0)
            numerator_auc += (1 - Zui[(theuser, theitem)] / len(P["user_items"][theuser])) / pui
            numerator_dcg_all += (1 / np.log2(Zui[(theuser, theitem)] + 2)) / pui
            if Zui[(theuser, theitem)] < K:
                numerator_dcg += (1 / np.log2(Zui[(theuser, theitem)] + 2)) / pui
                numerator_recall += 1.0 / pui
            denominator += 1 / pui
        if denominator > 0:
            sum_user_auc += numerator_auc / denominator
            sum_user_dcg += numerator_dcg / denominator
            sum_user_recall += numerator_recall / denominator
            sum_user_dcg_all += numerator_dcg_all / denominator
    
    return {
        "auc"       : sum_user_auc / nonzero_user_count,
        "dcg"       : sum_user_dcg / nonzero_user_count,
        "recall"    : sum_user_recall / nonzero_user_count,
        "dcg@all"   : sum_user_dcg_all / nonzero_user_count
    }


## Model Serving

In [0]:
# load training/val/test data

raw_data = dict()
raw_data['train_data'] = np.load("rsrf_user_data_train.npy")
raw_data['val_data'] = np.load("rsrf_user_data_val.npy")
raw_data['test_data'] = np.load("rsrf_user_data_test.npy")
raw_data['max_user'] = 5551
raw_data['max_item'] = 16980

batch_size = 8000
test_batch_size = 1000
display_itr = 5000

In [0]:
train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val')
test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test')

# declare model and sampler
model = PMF(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 
            dim_embed=50, opt='Adam', sess_config=None, l2_reg=0.0)
sampler = PointwiseSampler(batch_size=batch_size, dataset=train_dataset, pos_ratio=0.2, num_process=5)

# declare model trainer and evaluation metrics
model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, 
    train_dataset=train_dataset, model=model, sampler=sampler, 
    eval_save_prefix="./0.0-pmf-citeulike")
auc_evaluator = AUC()
recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
dcg_evaluator = NDCG(ndcg_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])


model.load("pmf-citeulike-all")     #load model

model_trainer._eval_manager = ImplicitEvalManager(evaluators=[auc_evaluator, recall_evaluator, dcg_evaluator])
model_trainer._num_negatives = 200
model_trainer._exclude_positives([train_dataset, val_dataset, test_dataset])
model_trainer._sample_negatives(seed=10)
model_trainer._eval_save_prefix = "pmf-citeulike-test"
model_trainer._evaluate_partial(test_dataset)

## Evalution test set under different gamma values

In [7]:
# Evaluation on test set

test_eval_file = "pmf-citeulike-test_evaluate_partial.pickle"
trainfilename = "rsrf_user_data_train.npy"

for gamma in [-1.0, 1.55, 1.69, 1.89]:
      print(test_eval_file +  " with gamma @" + str(gamma) + " :")
      print(eq12_test(test_eval_file, trainfilename, False, gamma, 1))

pmf-citeulike-test_evaluate_partial.pickle with gamma @-1.0 :
{'dcg': 0.1705733547559378, 'auc': 0.9352016691878766, 'dcg@all': 0.4700232822328804, 'recall': 0.1705733547559378}
pmf-citeulike-test_evaluate_partial.pickle with gamma @1.55 :
{'dcg': 0.0999129124771798, 'auc': 0.9110870669662351, 'dcg@all': 0.3993879306455869, 'recall': 0.0999129124771798}
pmf-citeulike-test_evaluate_partial.pickle with gamma @1.69 :
{'dcg': 0.09793321878764621, 'auc': 0.910129043348133, 'dcg@all': 0.3971521688853667, 'recall': 0.09793321878764621}
pmf-citeulike-test_evaluate_partial.pickle with gamma @1.89 :
{'dcg': 0.09532463864051804, 'auc': 0.9088317026258013, 'dcg@all': 0.394176410182987, 'recall': 0.09532463864051804}
