In [1]:
import pandas as pd
import numpy as np
import random
import sys
import os
import json
import datetime as dt
from collections import Counter
import argparse
parser = argparse.ArgumentParser(description='Sequential or session-based recommendation')
parser.add_argument('--model', type=str, default='fossil', help='model: fossil/fpmc. (default: fossil)')
parser.add_argument('--batch_size', type=int, default=128, help='batch size (default: 128)')
parser.add_argument('--seq_len', type=int, default=20, help='max sequence length (default: 20)')
parser.add_argument('--l2_reg', type=float, default=0.0, help='regularization scale (default: 0.0)')
parser.add_argument('--lr', type=float, default=0.01, help='initial learning rate for Adam (default: 0.01)')
parser.add_argument('--lr_decay', type=float, default=0.5, help='learning rate decay rate (default: 0.5)')
parser.add_argument('--emsize', type=int, default=100, help='dimension of item embedding (default: 100)')
parser.add_argument('--neg_size', type=int, default=1, help='size of negative samples (default: 1)')
parser.add_argument('--worker', type=int, default=10, help='number of sampling workers (default: 10)')
parser.add_argument('--seed', type=int, default=1111, help='random seed (default: 1111)')
parser.add_argument('--data', type=str, default='gowalla', help='data set name (default: gowalla)')
parser.add_argument('--log_interval', type=int, default=1e2, help='log interval (default: 1e2)')
parser.add_argument('--eval_interval', type=int, default=1e3, help='eval/test interval (default: 1e3)')
parser.add_argument('--optim', type=str, default='adam', help='optimizer: sgd/adam (default: adam)')
parser.add_argument('--warm_up', type=int, default=0, help='warm up step (default: 0)')
# ****************************** unique arguments for FOSSIL *******************************************************
parser.add_argument('--alpha', type=float, default=0.2, help='alpha (default: 0.2)')
parser.add_argument('--order', type=int, default=1, help='order of Fossil (default: 1)')

# ****************************** unique arguments for FPMC *******************************************************
# None
args = parser.parse_args(args=[])
data_path = 'data/' 

In [2]:
def prepare_data(corpus_item, corpus_user, data, dname, path_to_data):
    ret = {}
    user_str_ids = data.keys()
    for u in user_str_ids:
        u_int_id = corpus_user.dict.item2idx[u]
        i_int_ids = []
        item_str_ids = data[u]
        for i in item_str_ids:
            i_int_ids.append(corpus_item.dict.item2idx[i])
        ret[u_int_id] = i_int_ids
    with open(path_to_data + dname + '.json', 'w') as fp:
        json.dump(ret, fp)

    return ret

In [3]:
class Dictionary(object):
    def __init__(self):
        self.item2idx = {}
        self.idx2item = []
        self.counter = Counter()

    def add_item(self, item):
        self.counter[item] +=1

    def prep_dict(self):
        for item in self.counter:
            if item not in self.item2idx:
                self.idx2item.append(item)
                self.item2idx[item] = len(self.idx2item)

    def __len__(self):
        return len(self.idx2item)

In [4]:
class Corpus(object):
    def __init__(self, ItemId):
        self.dict = Dictionary()
        for item in ItemId:
            self.dict.add_item(item)
        self.dict.prep_dict()

In [5]:
def prepare_eval_test(data, batch_size, max_test_len=100):
    if batch_size < 2:
        batch_size = 2
    uids = data.keys()
    all_u = []
    all_inp = []
    all_pos = []
    for u in uids:
        #all_u.append(int(u))
        #cur_u = []
        itemids = data[u]
        nb_test = min(max_test_len, len(itemids)) - 1
        all_u.extend([int(u)] * nb_test)
        for i in range(1, nb_test+1):
            pos = itemids[i]
            inp = np.zeros([max_test_len], dtype=np.int32)
            start = max_test_len - i
            len_of_item = i
            inp[start:] = itemids[i-len_of_item: i]
        #inp = np.zeros([max_test_len], dtype=np.int32)
        #pos = np.zeros([max_test_len], dtype=np.int32)
        #l = min(max_test_len, len(itemids))
        #inp[:l] = itemids[:l]
        #pos[:l-1] = itemids[1:l]
            all_inp.append(inp)
            all_pos.append(pos)

    num_batches = int(len(all_u) / batch_size)
    batches = []
    for i in range(num_batches):
        batch_u = all_u[i*batch_size: (i+1)*batch_size]
        batch_inp = all_inp[i*batch_size: (i+1)*batch_size]
        batch_pos = all_pos[i*batch_size: (i+1)*batch_size]
        batches.append((batch_u, batch_inp, batch_pos))
    if num_batches * batch_size < len(all_u):
        batches.append((all_u[num_batches * batch_size:], all_inp[num_batches * batch_size:], all_pos[num_batches * batch_size:]))
        
    return batches

def preprocess_session(dname):
    data = pd.read_csv(data_path + dname + '/' + dname + '.tsv', sep='\t', header=None)
    if dname == 'tmall':
        data.columns = ['SessionId', 'ItemId', 'Time']
    else:
        raise NotImplementedError
    session_lengths = data.groupby('SessionId').size()
    data = data[np.in1d(data.SessionId, session_lengths[session_lengths>2].index)]
    
    item_supports = data.groupby('ItemId').size()
    data = data[np.in1d(data.ItemId, item_supports[item_supports>=10].index)]
    print('Unique items: {}'.format(data.ItemId.nunique()))
        
    session_lengths = data.groupby('SessionId').size()
    print('Average session length: {}'.format(session_lengths.mean()))
    data = data[np.in1d(data.SessionId, session_lengths[session_lengths>2].index)]
    
    session_lengths = data.groupby('SessionId').size()
    print('Average session length after removing sessions with less than two event: {}'.format(session_lengths.mean()))
    
    session_max_times = data.groupby('SessionId').Time.max()
    tmax = data.Time.max()
    session_train = session_max_times[session_max_times < tmax-86400*2].index
    session_test = session_max_times[session_max_times >= tmax-86400*2].index
    train = data[np.in1d(data.SessionId, session_train)]
    test = data[np.in1d(data.SessionId, session_test)]
    test = test[np.in1d(test.ItemId, train.ItemId)]
    
    tslength = test.groupby('SessionId').size()
    test = test[np.in1d(test.SessionId, tslength[tslength>2].index)]

    test_session = test.SessionId.unique()
    test_session_ = np.random.choice(test_session, int(len(test_session) / 2), replace=False)
    test_ = test.loc[test['SessionId'].isin(test_session_)]
    val_ = test.loc[~test['SessionId'].isin(test_session_)]
    print('Train size: {}'.format(len(train)))
    print('Dev size: {}'.format(len(val_)))
    print('Test size: {}'.format(len(test_)))

    header = columns = ['SessionId', 'ItemId', 'Time']
    train.to_csv(data_path + dname + '/' + dname + '_train_tr.txt', sep='\t', columns=columns, header=header, index=False)
    test_.to_csv(data_path + dname + '/' + dname + '_test.txt', sep='\t',columns=columns, header=header, index=False)
    val_.to_csv(data_path + dname + '/' + dname + '_train_valid.txt', sep='\t', columns=columns, header=header, index=False)


def preprocess_sequence():
    data = pd.read_csv(data_path + dname + '/' + dname + '.tsv', sep='\t', header=None)
    
    data.columns = ['user', 'TimeStr', 'lat', 'long', 'item']
    data['Time'] = data.TimeStr.apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ').timestamp())
    del(data['lat'])
    del(data['long'])
    del(data['TimeStr'])
   
    
    event_lengths = data.groupby('user').size()
    print('Average check-ins per user: {}'.format(event_lengths.mean()))
    data = data[np.in1d(data.user, event_lengths[event_lengths>10].index)]
    
    item_supports = data.groupby('item').size()
    # 50 for delicious, 10 for gowalla
    data = data[np.in1d(data.item, item_supports[item_supports>=10].index)]
    print('Unique items: {}'.format(data.item.nunique()))
    
    event_lengths = data.groupby('user').size()
    data = data[np.in1d(data.user, event_lengths[event_lengths>=10].index)]
    
    event_lengths = data.groupby('user').size()
    print('Average check-ins per user after removing sessions with one event: {}'.format(event_lengths.mean()))
    
    tmin = data.Time.min()
    tmax = data.Time.max()
    pivot = (tmax-tmin) * 0.9 + tmin
    train = data.loc[data['Time'] < pivot]
    test = data.loc[data['Time'] >= pivot]

    tr_event_lengths = train.groupby('user').size()
    train = train[np.in1d(train.user, tr_event_lengths[tr_event_lengths>3].index)]
    print('Average (train) check-ins per user: {}'.format(tr_event_lengths.mean()))

    user_to_predict = train.user.unique()
    test = test[test['user'].isin(user_to_predict)]
    item_to_predict = train.item.unique()
    test = test[test['item'].isin(item_to_predict)]
    test_event_lengths = test.groupby('user').size()
    test = test[np.in1d(test.user, test_event_lengths[test_event_lengths>3].index)]
    print('Average (test) check-ins per user: {}'.format(test_event_lengths.mean()))

   
    test_user = test.user.unique()
    test_user_ = np.random.choice(test_user, int(len(test_user) / 2), replace=False)
    test_ = test.loc[test['user'].isin(test_user_)]
    val_ = test.loc[~test['user'].isin(test_user_)]
    print('Train size: {}'.format(len(train)))
    print('Dev size: {}'.format(len(val_)))
    print('Test size: {}'.format(len(test_)))

  
    header = columns = ['user', 'item', 'Time']
    header = ['UserId', 'ItemId', 'Time']
    train.to_csv(data_path + dname + '/' + dname + '_train_tr.txt', sep='\t', columns=columns, header=header, index=False)
    test_.to_csv(data_path + dname + '/' + dname + '_test.txt', sep='\t',columns=columns, header=header, index=False)
    val_.to_csv(data_path + dname + '/' + dname + '_train_valid.txt', sep='\t', columns=columns, header=header, index=False)


In [6]:
def data_generator(args):
    path_to_data= data_path + args.data + '/'
    if not os.path.exists(path_to_data + args.data + '_train_tr.json'):
        tr_df = pd.read_csv(path_to_data + args.data + '_train_tr.txt', sep='\t')
        val_df = pd.read_csv(path_to_data + args.data + '_train_valid.txt', sep='\t')
        test_df = pd.read_csv(path_to_data + args.data + '_test.txt', sep='\t')
        corpus_item = Corpus(tr_df['ItemId'])
        corpus_user = Corpus(tr_df['UserId'])
        np.save(path_to_data + args.data + '_item_dict', np.asarray(corpus_item.dict.idx2item))
        np.save(path_to_data + args.data + '_user_dict', np.asarray(corpus_user.dict.idx2item))

        tr = tr_df.sort_values(['UserId', 'Time']).groupby('UserId')['ItemId'].apply(list).to_dict()
        val = val_df.sort_values(['UserId', 'Time']).groupby('UserId')['ItemId'].apply(list).to_dict()
        test = test_df.sort_values(['UserId', 'Time']).groupby('UserId')['ItemId'].apply(list).to_dict()
            
        _ = prepare_data(corpus_item, corpus_user, tr, args.data + '_train_tr', path_to_data)
        _ = prepare_data(corpus_item, corpus_user, val, args.data + '_train_valid',path_to_data)
        _ = prepare_data(corpus_item, corpus_user, test, args.data + '_test', path_to_data)

    with open(path_to_data + args.data + '_train_tr.json', 'r') as fp:
        train_data = json.load(fp)
    with open(path_to_data + args.data + '_train_valid.json', 'r') as fp:
        val_data = json.load(fp)
    with open(path_to_data + args.data + '_test.json', 'r') as fp:
        test_data = json.load(fp)

    item2idx = np.load(path_to_data + args.data + '_item_dict.npy')
    user2idx = np.load(path_to_data + args.data + '_user_dict.npy')
    n_items = item2idx.size
    n_users = user2idx.size
    
    return [train_data, val_data, test_data, n_items, n_users]


In [7]:
import tensorflow as tf
tf.set_random_seed(args.seed)
train_data, val_data, test_data, n_items, n_users = data_generator(args)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [8]:
#sampler.py
import numpy as np
from multiprocessing import Process, Queue

def random_neg(pos, n, s):
    '''
    p: positive one
    n: number of items
    s: size of samples.
    '''
    neg = set()
    for _ in range(s):
        t = np.random.randint(1, n+1)
        while t in pos or t in neg:
            t = np.random.randint(1, n+1)
        neg.add(t)
    return list(neg)

def sample_function(data, n_items, n_users, batch_size, max_len, neg_size, result_queue, SEED, neg_method='rand'):
 
    num_samples = np.array([len(data[str(u)]) for u in range(1, n_users+1)])
    prob_ = num_samples / (1.0 * np.sum(num_samples))
    def sample():
        
        user = np.random.choice(a=range(1,1+n_users), p=prob_)
        u = str(user)

        # sample a slice from user u randomly. 
        idx = np.random.randint(1, len(data[u]))
        start = 0 if idx >= max_len else max_len - idx
        len_of_item = max_len - start

        seq = np.zeros([max_len], dtype=np.int32)
        seq[start:] = data[u][idx-len_of_item:idx]


        pos = data[u][idx]
        neg = np.zeros([neg_size], dtype=np.int32)


        if neg_method == 'rand':
            neg = random_neg([pos], n_items, neg_size)
        else:
            raise NotImplementedError


        return (user, seq, pos, neg)

    np.random.seed(SEED)
    while True:
        one_batch = []
        for i in range(batch_size):
            one_batch.append(sample())
        result_queue.put(list(zip(*one_batch)))

class Sampler(object):
    def __init__(self, data, n_items, n_users, batch_size=128, max_len=20, neg_size=10, n_workers=10, neg_method='rand'):
        self.result_queue = Queue(maxsize=int(2e5))
        self.processors = []
        for i in range(n_workers):
            self.processors.append(
                Process(target=sample_function, args=(data,
                                                    n_items, 
                                                    n_users,
                                                    batch_size, 
                                                    max_len, 
                                                    neg_size, 
                                                    self.result_queue, 
                                                    np.random.randint(2e9),
                                                    neg_method)))
            self.processors[-1].daemon = True
            self.processors[-1].start()
    
    def next_batch(self):
        return self.result_queue.get()

    def close(self):
        for p in self.processors:
            p.terminate()
            p.join()

In [9]:
#model.py
def log2(x):
    numerator = tf.log(x)
    denominator = tf.log(tf.constant(2, dtype=numerator.dtype))
    return numerator / denominator


class FOSSIL(object):
    def __init__(self, args, n_items, n_users):
        self.args = args
        self.n_items = n_items
        self.n_users = n_users
        self._build()

        self.saver = tf.train.Saver()

    def _build(self):
        self.inp = tf.placeholder(tf.int32, shape=(None, None), name='inp') # if maxlen is 5, valid len of sample i is 3, then inp[i] = [0, 0, x, x, x]
        self.user = tf.placeholder(tf.int32, shape=(None), name='user')
        self.pos = tf.placeholder(tf.int32, shape=(None), name='pos')
        self.neg = tf.placeholder(tf.int32, shape=(None, self.args.neg_size), name='neg')  

        self.lr = tf.placeholder(tf.float32, shape=None, name='lr')

        self.item_embedding1 = tf.get_variable('item_embedding1', 
                                               shape=(self.n_items+1, self.args.emsize),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.truncated_normal_initializer(stddev=0.01))
        self.item_embedding2 = tf.get_variable('item_embedding2', 
                                               shape=(self.n_items+1, self.args.emsize),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.truncated_normal_initializer(stddev=0.01))     
        self.user_bias = tf.get_variable('user_bias', 
                                               shape=(self.n_users+1, self.args.order),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.truncated_normal_initializer(stddev=0.01))
        self.global_bias = tf.get_variable('global_bias', 
                                               shape=(self.args.order),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.constant_initializer(0.))
        self.item_bias = tf.get_variable('item_bias', 
                                               shape=(self.n_items+1),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.constant_initializer(0.))        

        mask_inp = tf.expand_dims(tf.to_float(tf.not_equal(self.inp, 0)), -1) #(batch, maxlen, 1)
        len_inp = tf.reduce_sum(tf.squeeze(mask_inp, axis=2), axis=1) #(batch)
        item_embed = tf.nn.embedding_lookup(self.item_embedding1, self.inp) * mask_inp #(batch, maxlen, k)
        long_term = tf.reduce_sum(item_embed, axis=1) #(batch, k)
        long_term = tf.expand_dims(tf.pow(len_inp, -self.args.alpha), -1) * long_term #(batch, k)

        effective_order = tf.minimum(len_inp, self.args.order) #(batch)
        effective_order = tf.expand_dims(tf.to_float(tf.sequence_mask(effective_order,self.args.order)), -1) #(batch, order, 1)

        short_term = tf.nn.embedding_lookup(self.user_bias, self.user) #(batch, order)
        short_term = tf.expand_dims(short_term + self.global_bias, axis=-1) #(batch, order, 1)
        short_term = short_term * item_embed[:, :-1-self.args.order:-1]  #(batch, order, k)
        short_term = tf.reduce_sum(short_term * effective_order, axis=1) #(batch, k)

        ### for train only
        pos_bias = tf.nn.embedding_lookup(self.item_bias, self.pos) #(batch)
        pos_embed = tf.nn.embedding_lookup(self.item_embedding2, self.pos) #(batch, k)
        neg_bias = tf.nn.embedding_lookup(self.item_bias, self.neg) #(batch, neg_size)
        neg_embed = tf.nn.embedding_lookup(self.item_embedding2, self.neg) #(batch, neg_size, k)

        temp_vec = short_term + long_term #(batch, k)

        pos_score = pos_bias + tf.reduce_sum(temp_vec*pos_embed, axis=1) #(batch)
        neg_score = neg_bias + tf.reduce_sum(tf.expand_dims(temp_vec, axis=1) * neg_embed, axis=2) #(batch, neg_size)
        neg_score = tf.reduce_mean(neg_score, axis=1) #(batch)

        loss = -tf.reduce_mean(tf.log(tf.clip_by_value(tf.sigmoid(pos_score-neg_score), 1e-24, 1-1e-24)))

        ### for prediction only
        full_score = self.item_bias + tf.matmul(temp_vec, self.item_embedding2, transpose_b=True) #(batch, n_items+1)
        self.prediction = full_score
        self.loss = loss
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss += sum(reg_losses)
        if self.args.optim == 'adam':
            self.optimizer = tf.train.AdamOptimizer(self.lr)
        elif self.args.optim == 'sgd':
            self.optimizer = tf.train.GradientDescentOptimizer(self.lr)
        else:
            raise NotImplementedError

        self.train_op = self.optimizer.minimize(self.loss)
        self.recall_at_k, self.ndcg_at_k = self._metric_at_k()

    def _metric_at_k(self, k=20):
        prediction = self.prediction #(batch, n_items+1)
        prediction_transposed = tf.transpose(prediction)
        labels = tf.reshape(self.pos, shape=(-1,))
        pred_values = tf.expand_dims(tf.diag_part(tf.nn.embedding_lookup(prediction_transposed, labels)), -1)
        tile_pred_values = tf.tile(pred_values, [1, self.n_items])
        ranks = tf.reduce_sum(tf.cast(prediction[:,1:] > tile_pred_values, dtype=tf.float32), -1) + 1
        ndcg = 1. / (log2(1.0 + ranks))
        hit_at_k = tf.nn.in_top_k(prediction, labels, k=k)
        hit_at_k = tf.cast(hit_at_k, dtype=tf.float32)
        ndcg_at_k = ndcg * hit_at_k
        return tf.reduce_sum(hit_at_k), tf.reduce_sum(ndcg_at_k)





class FPMC(object):
    def __init__(self, args, n_items, n_users):
        self.args = args
        self.n_items = n_items
        self.n_users = n_users
        self._build()

        self.saver = tf.train.Saver()

    def _build(self):
        self.inp = tf.placeholder(tf.int32, shape=(None, None), name='inp') # if maxlen is 5, valid len of sample i is 3, then inp[i] = [0, 0, x, x, x]
        self.user = tf.placeholder(tf.int32, shape=(None), name='user')
        self.pos = tf.placeholder(tf.int32, shape=(None), name='pos')
        self.neg = tf.placeholder(tf.int32, shape=(None, self.args.neg_size), name='neg')  

        self.lr = tf.placeholder(tf.float32, shape=None, name='lr')


        self.VUI = tf.get_variable('user_item', 
                                               shape=(self.n_users+1, self.args.emsize),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.truncated_normal_initializer(stddev=0.01))
        self.VIU = tf.get_variable('item_user', 
                                               shape=(self.n_items+1, self.args.emsize),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.truncated_normal_initializer(stddev=0.01))     
        self.VIL = tf.get_variable('item_prev', 
                                               shape=(self.n_items+1, self.args.emsize),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.truncated_normal_initializer(stddev=0.01))
        self.VLI = tf.get_variable('prev_item', 
                                               shape=(self.n_items+1, self.args.emsize),
                                               dtype=tf.float32,
                                               regularizer=tf.contrib.layers.l2_regularizer(self.args.l2_reg),
                                               initializer=tf.truncated_normal_initializer(stddev=0.01))


        self.prev = self.inp[:, -1] #(batch)
        u = tf.nn.embedding_lookup(self.VUI, self.user) #(batch, k)
        prev = tf.nn.embedding_lookup(self.VLI, self.prev) #(batch, k)     

        ### for train only
        pos_iu = tf.nn.embedding_lookup(self.VIU, self.pos) #(batch, k)
        pos_il = tf.nn.embedding_lookup(self.VIL, self.pos) #(batch, k)
        pos_score = tf.reduce_sum(u*pos_iu, axis=1) + tf.reduce_sum(prev*pos_il, axis=1) #(batch)

        neg_iu = tf.nn.embedding_lookup(self.VIU, self.neg) #(batch, neg, k)
        neg_il = tf.nn.embedding_lookup(self.VIL, self.neg) #(batch, neg, k)
        neg_score = tf.reduce_sum(tf.expand_dims(u, 1)*neg_iu, axis=2) + tf.reduce_sum(tf.expand_dims(prev, 1)*neg_il, axis=2) #(batch, neg)
        neg_score = tf.reduce_mean(neg_score, axis=1) #(batch)

        loss = -tf.reduce_mean(tf.log(tf.clip_by_value(tf.sigmoid(pos_score-neg_score), 1e-24, 1-1e-24)))

        ### for prediction only
        full_score = tf.matmul(u, self.VIU, transpose_b=True) + tf.matmul(prev, self.VIL, transpose_b=True)  #(batch, n_items+1)

        self.prediction = full_score
        self.loss = loss
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss += sum(reg_losses)
        if self.args.optim == 'adam':
            self.optimizer = tf.train.AdamOptimizer(self.lr)
        elif self.args.optim == 'sgd':
            self.optimizer = tf.train.GradientDescentOptimizer(self.lr)
        else:
            raise NotImplementedError

        self.train_op = self.optimizer.minimize(self.loss)
        self.recall_at_k, self.ndcg_at_k = self._metric_at_k()


    def _metric_at_k(self, k=20):
        prediction = self.prediction #(batch, n_items+1)
        prediction_transposed = tf.transpose(prediction)
        labels = tf.reshape(self.pos, shape=(-1,))
        pred_values = tf.expand_dims(tf.diag_part(tf.nn.embedding_lookup(prediction_transposed, labels)), -1)
        tile_pred_values = tf.tile(pred_values, [1, self.n_items])
        ranks = tf.reduce_sum(tf.cast(prediction[:,1:] > tile_pred_values, dtype=tf.float32), -1) + 1
        ndcg = 1. / (log2(1.0 + ranks))
        hit_at_k = tf.nn.in_top_k(prediction, labels, k=k)
        hit_at_k = tf.cast(hit_at_k, dtype=tf.float32)
        ndcg_at_k = ndcg * hit_at_k
        return tf.reduce_sum(hit_at_k), tf.reduce_sum(ndcg_at_k)


In [None]:
#train.py continue:

train_sampler = Sampler(
                    data=train_data, 
                    n_items=n_items, 
                    n_users=n_users,
                    batch_size=args.batch_size, 
                    max_len=args.seq_len,
                    neg_size=args.neg_size,
                    n_workers=args.worker,
                    neg_method='rand')

val_data = prepare_eval_test(val_data, batch_size=100, max_test_len= 20)
test_data = prepare_eval_test(test_data, batch_size=100, max_test_len= 20)


checkpoint_dir = '_'.join(['save', args.data, args.model, str(args.lr), str(args.l2_reg), str(args.emsize)])

print(args)
print ('#Item: ', n_items)
print ('#User: ', n_users)

model_dict = {'fossil': FOSSIL, 'fpmc': FPMC}
assert args.model in ['fossil', 'fpmc']


model = model_dict[args.model](args, n_items, n_users)

lr = args.lr

def evaluate(source, sess):
    total_recall = 0.0
    total_ndcg = 0.0
    count = 0.0
    for batch in source:
        feed_dict = {model.inp: batch[1], model.user:batch[0], model.pos:batch[2]}
        recall, ndcg = sess.run([model.recall_at_k, model.ndcg_at_k], feed_dict=feed_dict)
        count += len(batch[0])
        total_recall += recall
        total_ndcg += ndcg

    val_recall = total_recall / count 
    val_ndcg = total_ndcg / count

    return [val_recall, val_ndcg]

def main():
    global lr
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    sess.run(init)
    all_val_recall = [-1]
    early_stop_cn = 0
    step_count = 0
    train_loss_l = 0.
    start_time = time.time()
    print('Start training...')
    try:
        while True:
            cur_batch = train_sampler.next_batch()
            inp = np.array(cur_batch[1])
            feed_dict = {model.inp: inp, model.lr: lr}
            feed_dict[model.pos] = np.array(cur_batch[2])
            feed_dict[model.neg] = np.array(cur_batch[3])
            feed_dict[model.user] = np.array(cur_batch[0])

            _, train_loss = sess.run([model.train_op, model.loss], feed_dict=feed_dict)
            train_loss_l += train_loss
            step_count += 1

            if step_count % args.log_interval == 0:
                cur_loss = train_loss_l / args.log_interval
                elapsed = time.time() - start_time
                print('| Totol step {:10d} | lr {:02.5f} | ms/batch {:5.2f} | loss {:5.3f}'.format(
                        step_count, lr, elapsed * 1000 / args.log_interval, cur_loss))
                sys.stdout.flush()
                train_loss_l = 0.
                start_time = time.time()

            if step_count % args.eval_interval == 0 and step_count > args.warm_up:
                val_recall, val_ndcg = evaluate(val_data, sess)
                all_val_recall.append(val_recall)
                print('-' * 90)
                print('| End of step {:10d} | valid recall@20 {:8.5f} | valid ndcg@20 {:8.5f}'.format(
                        step_count, val_recall, val_ndcg))
                print('=' * 90)
                sys.stdout.flush()

                if all_val_recall[-1] <= all_val_recall[-2]:
                    lr = lr * args.lr_decay
                    lr = max(lr, 1e-6)
                    early_stop_cn += 1
                else:
                    early_stop_cn = 0
                    model.saver.save(sess, checkpoint_dir + '/model.ckpt')
                if early_stop_cn == 3:
                    print('Validation recall decreases in three consecutive epochs. Stop Training!')
                    sys.stdout.flush()
                    break
                start_time = time.time()
    except Exception as e:
        print(str(e))
        train_sampler.close()
        exit(1)
    train_sampler.close()
    print('Done')


    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
        model.saver.restore(sess, '{}/{}'.format(checkpoint_dir, 'model.ckpt'))
        print('Restore model successfully')
    else:
        print('Restore model failed!!!!!')
    test_recall, test_ndcg = evaluate(test_data, sess)        
    print('-' * 90)
    print('test recall@20 {:8.5f} | test ndcg@20 {:8.5f}'.format(
            test_recall, test_ndcg))
    print('=' * 90)         

if __name__ == '__main__':
    if not os.path.exists(checkpoint_dir):
        os.mkdir(checkpoint_dir)
    main()

