In [1]:
import scipy.io
import numpy as np
import tensorflow as tf
import os
import logging
import datetime
import shutil
import argparse
from collections import defaultdict
import random
from collections import Counter
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

## Data preprocessing

### Data partitioning

In [2]:
np.random.seed(7)
random.seed(7)

In [3]:
ratio = 0.6  # train:val:test = ratio:(1-ratio)/2:(1-ratio)/2

In [4]:
mat = scipy.io.loadmat('yelp.mat')
UB, UU, UCom, BCat, BCity = (x.tocoo() for x in list(mat['relation'][0]))

In [5]:
user_biz = defaultdict(list)
for u, b in zip(UB.row, UB.col):
    user_biz[u].append(b)

user_biz_train = defaultdict(list)
user_biz_val = defaultdict(list)
user_biz_test = defaultdict(list)
train_set = set()
val_set = set()
test_set = set()
for u in user_biz:
    if len(user_biz[u]) >= 5:
        val = int(len(user_biz[u]) * ratio)
        test = int(len(user_biz[u]) * (ratio+(1-ratio)/2))
        random.shuffle(user_biz[u])
        user_biz_train[u] = user_biz[u][:val]
        for b in user_biz[u][:val]:
            train_set.add((u, b))
        user_biz_val[u] = user_biz[u][val:test]
        for b in user_biz[u][val:test]:
            val_set.add((u, b))
        user_biz_test[u] = user_biz[u][test:]
        for b in user_biz[u][test:]:
            test_set.add((u, b))

In [6]:
train_mask = []
val_mask = []
test_mask = []
for ind, ub in enumerate(zip(UB.row, UB.col)):
    if ub in train_set:
        train_mask.append(ind)
    if ub in val_set:
        val_mask.append(ind)
    if ub in test_set:
        test_mask.append(ind)
train_mask = np.array(train_mask)
val_mask = np.array(val_mask)
test_mask = np.array(test_mask)

### Data preparation

In [7]:
user_biz_train = np.zeros([UB.shape[0], UB.shape[1]])
for u, b in zip(UB.row[train_mask], UB.col[train_mask]):
    user_biz_train[u, b] += 1.

In [8]:
# filter out cold start users and items
sum_tmp = np.sum(user_biz_train, -1)
user_non_zero_ind = np.array([i for i in range(UB.shape[0]) if sum_tmp[i] > 0])
user_non_zero_set = set(user_non_zero_ind)

sum_tmp = np.sum(user_biz_train[user_non_zero_ind, :], 0)
biz_non_zero_mask = (sum_tmp != 0).astype(float)
biz_non_zero_set = set([i for i in range(UB.shape[1]) if sum_tmp[i] > 0])

In [9]:
user_biz_val = np.zeros([UB.shape[0], UB.shape[1]])
user_biz_val_dict = defaultdict(list)
for u, b in zip(UB.row[val_mask], UB.col[val_mask]):
    if u in user_non_zero_set and b in biz_non_zero_set:
        user_biz_val[u, b] += 1.
    user_biz_val_dict[u].append(b)

user_biz_test = np.zeros([UB.shape[0], UB.shape[1]])
user_biz_test_dict = defaultdict(list)
for u, b in zip(UB.row[test_mask], UB.col[test_mask]):
    if u in user_non_zero_set and b in biz_non_zero_set:
        user_biz_test[u, b] += 1.
    user_biz_test_dict[u].append(b)

In [10]:
# data structure needed for the convenience of evaluation
val_set_u = list(user_non_zero_set)
val_set_mask = []
val_set_set = []
for user in val_set_u:
    val_set_mask.append(biz_non_zero_mask - user_biz_train[user, :] - user_biz_test[user, :])
    val_set_set.append(set(user_biz_val_dict[user]))

test_set_u = list(user_non_zero_set)
test_set_mask = []
test_set_set = []
for user in test_set_u:
    test_set_mask.append(biz_non_zero_mask - user_biz_train[user, :] - user_biz_val[user, :])
    test_set_set.append(set(user_biz_test_dict[user]))

In [11]:
# normalize the training data
user_biz_train = user_biz_train[user_non_zero_ind, :]
user_biz_train = user_biz_train/np.sum(user_biz_train, -1).reshape([-1, 1])

## Model

### HINs & parameters

In [12]:
# sparse tensors for adjacency matrices
UB_train = tf.SparseTensor(indices=np.array([UB.row[train_mask], UB.col[train_mask]]).transpose(),
                           values=np.ones(len(train_mask)).astype(np.float32),
                           dense_shape=UB.shape)
UU_t = tf.SparseTensor(indices=np.array([UU.row, UU.col]).transpose(), values=UU.data.astype(np.float32), dense_shape=UU.shape)
UCom_t = tf.SparseTensor(indices=np.array([UCom.row, UCom.col]).transpose(), values=UCom.data.astype(np.float32), dense_shape=UCom.shape)
BCat_t = tf.SparseTensor(indices=np.array([BCat.row, BCat.col]).transpose(), values=BCat.data.astype(np.float32), dense_shape=BCat.shape)
BCity_t = tf.SparseTensor(indices=np.array([BCity.row, BCity.col]).transpose(),
                          values=BCity.data.astype(np.float32),
                          dense_shape=BCity.shape)

In [13]:
# paras
num_U = UB.shape[0]
num_B = UB.shape[1]
num_Com = UCom.shape[1]
num_Cat = BCat.shape[1]
num_City = BCity.shape[1]

dim = 64
batch_size = 64

In [14]:
# embeddings
U_embeddings = tf.get_variable("U_embeddings", [num_U, dim], trainable=True, regularizer=tf.contrib.layers.l2_regularizer(scale=0.1))
U_b = tf.get_variable("U_b", [num_U, dim])
B_embeddings = tf.get_variable("B_embeddings", [num_B, dim], trainable=True, regularizer=tf.contrib.layers.l2_regularizer(scale=0.1))
B_b = tf.get_variable("B_b", [num_U, dim])
Com_embeddings = tf.get_variable("Com_embeddings", [num_Com, dim], regularizer=tf.contrib.layers.l2_regularizer(scale=0.1))
Cat_embeddings = tf.get_variable("Cat_embeddings", [num_Cat, dim], regularizer=tf.contrib.layers.l2_regularizer(scale=0.1))
City_embeddings = tf.get_variable("City_embeddings", [num_City, dim], regularizer=tf.contrib.layers.l2_regularizer(scale=0.1))
U_embeddings_mlp = tf.get_variable("U_embeddings_mlp", [num_U, dim], trainable=True, regularizer=tf.contrib.layers.l2_regularizer(scale=0.1))
B_embeddings_mlp = tf.get_variable("B_embeddings_mlp", [num_U, dim], trainable=True, regularizer=tf.contrib.layers.l2_regularizer(scale=0.1))

In [15]:
# user & item lookup table
U_vec = U_embeddings + tf.sparse_tensor_dense_matmul(tf.sparse_softmax(UB_train), B_embeddings) + \
        tf.sparse_tensor_dense_matmul(tf.sparse_softmax(UU_t), U_embeddings)
U_vec = tf.nn.tanh(U_vec)

B_vec = B_embeddings + tf.sparse_tensor_dense_matmul(tf.sparse_softmax(tf.sparse_transpose(UB_train)), U_embeddings) + \
        tf.sparse_tensor_dense_matmul(tf.sparse_softmax(BCity_t), City_embeddings) + \
        tf.sparse_tensor_dense_matmul(tf.sparse_softmax(BCat_t), Cat_embeddings)
B_vec = tf.nn.tanh(B_vec)

In [16]:
# placeholders
ux = tf.placeholder(tf.int32, shape=(None,))
uy = tf.placeholder(tf.float32, shape=(None, UB.shape[1]))

### Embedding-based model

In [17]:
uvec = tf.tile(tf.expand_dims(tf.nn.embedding_lookup(U_vec, ux), 1), [1, UB.shape[1], 1])
bvec = tf.tile(tf.expand_dims(tf.nn.embedding_lookup(B_vec, tf.range(0, UB.shape[1])), 0), [tf.shape(uvec)[0], 1, 1])

x = tf.concat([uvec * bvec], axis=-1)
y_emb_logit = tf.squeeze(tf.layers.dense(x, 1, name='output_2', kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.1)))
y_inference = tf.nn.softmax(y_emb_logit, -1)

### Path-based model

In [18]:
# learnable adjacency matrices

UB_e = tf.Variable(np.zeros(shape=len(train_mask)), trainable=True, dtype=tf.float32)
B_p = tf.Variable(np.zeros(shape=UB.shape[1]), trainable=True, dtype=tf.float32)
UU_e = tf.Variable(np.zeros(shape=len(UU.row)), trainable=True, dtype=tf.float32)
BCat_e = tf.Variable(np.zeros(shape=len(BCat.row)), trainable=True, dtype=tf.float32)
BCity_e = tf.Variable(np.zeros(shape=len(BCity.row)), trainable=True, dtype=tf.float32)


UB_t = tf.SparseTensor(indices=np.array([UB.row[train_mask], UB.col[train_mask]]).transpose(),
                       values=tf.nn.softplus(UB_e),
                       dense_shape=UB.shape)
UU_t = tf.SparseTensor(indices=np.array([UU.row, UU.col]).transpose(),
                       values=tf.nn.softplus(UU_e),
                       dense_shape=UU.shape)
BCat_t = tf.SparseTensor(indices=np.array([BCat.row, BCat.col]).transpose(),
                         values=tf.nn.softplus(BCat_e),
                         dense_shape=BCat.shape)
BCity_t = tf.SparseTensor(indices=np.array([BCity.row, BCity.col]).transpose(),
                          values=tf.nn.softplus(BCity_e),
                          dense_shape=BCity.shape)

In [19]:
u_one_hot = tf.one_hot(ux, depth=UB.shape[0])

# meta-paths

# P1: UBUB
B_1 = tf.transpose(tf.sparse_tensor_dense_matmul(UB_t, u_one_hot, adjoint_a=True, adjoint_b=True)) + 1e-10
B_1 = B_1 / tf.reshape(tf.reduce_sum(B_1, axis=1), [-1, 1])
U_1 = tf.transpose(tf.sparse_tensor_dense_matmul(UB_t, B_1, adjoint_b=True)) + 1e-10
U_1 = U_1 / tf.reshape(tf.reduce_sum(U_1, axis=1), [-1, 1])
B_2 = tf.transpose(tf.sparse_tensor_dense_matmul(UB_t, U_1, adjoint_a=True, adjoint_b=True)) + 1e-10
B_2 = B_2 / tf.reshape(tf.reduce_sum(B_2, axis=1), [-1, 1])

# P2: UBUBU
U_3 = tf.transpose(tf.sparse_tensor_dense_matmul(UB_t, B_2, adjoint_b=True)) + 1e-10
U_3 = U_3 / tf.reshape(tf.reduce_sum(U_3, axis=1), [-1, 1])
B_3 = tf.transpose(tf.sparse_tensor_dense_matmul(UB_t, U_3, adjoint_a=True, adjoint_b=True)) + 1e-10
B_3 = B_3 / tf.reshape(tf.reduce_sum(B_3, axis=1), [-1, 1])

# P3: UUB
U_2 = tf.transpose(tf.sparse_tensor_dense_matmul(UU_t, u_one_hot, adjoint_b=True)) + 1e-10
U_2 = U_2 / tf.reshape(tf.reduce_sum(U_2, axis=1), [-1, 1])
B_4 = tf.transpose(tf.sparse_tensor_dense_matmul(UB_t, U_2, adjoint_a=True, adjoint_b=True)) + 1e-10
B_4 = B_4 / tf.reshape(tf.reduce_sum(B_4, axis=1), [-1, 1])

# P4: UBCatB
Cat = tf.transpose(tf.sparse_tensor_dense_matmul(BCat_t, B_1, adjoint_a=True, adjoint_b=True)) + 1e-10
Cat = Cat / tf.reshape(tf.reduce_sum(Cat, axis=1), [-1, 1])
B_5 = tf.transpose(tf.sparse_tensor_dense_matmul(BCat_t, Cat, adjoint_b=True)) + 1e-10
B_5 = B_5 / tf.reshape(tf.reduce_sum(B_5, axis=1), [-1, 1])

# P5: UBCityB
City = tf.transpose(tf.sparse_tensor_dense_matmul(BCity_t, B_1, adjoint_a=True, adjoint_b=True)) + 1e-10
City = City / tf.reshape(tf.reduce_sum(City, axis=1), [-1, 1])
B_6 = tf.transpose(tf.sparse_tensor_dense_matmul(BCity_t, City, adjoint_b=True)) + 1e-10
B_6 = B_6 / tf.reshape(tf.reduce_sum(B_6, axis=1), [-1, 1])

In [20]:
r = tf.nn.softmax(tf.Variable(np.ones(shape=[5]), dtype=tf.float32, trainable=True))
y_path = tf.einsum('i,ijk->jk', r, tf.stack([B_2, B_3, B_4, B_5, B_6], axis=0))

### Loss function

In [21]:
# L1
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_emb_logit, labels=uy))
# L2
loss_path = tf.reduce_mean(tf.reduce_sum(- uy * tf.log(y_path), -1))
# L3
kl_div = tf.reduce_mean(tf.reduce_sum(- tf.log(tf.nn.softmax(y_emb_logit, -1)) * y_path + y_path * tf.log(y_path), -1))

reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [22]:
# optimizer
opt = tf.train.AdamOptimizer().minimize(loss + kl_div + loss_path + 1e-5 * tf.reduce_sum(reg))
opt_p = tf.train.AdamOptimizer().minimize(0.1 * loss + kl_div + 0.1 * loss_path + 1e-5 * tf.reduce_sum(reg)) 

In [23]:
opt_base = tf.train.AdamOptimizer().minimize(loss + 1e-5 * tf.reduce_sum(reg))

## Training & Evaluation

In [24]:
def evaluate(u_batch, mask_batch, set_batch, y_val, cutoff=[20, 20, 100]):
    hit = []
    recall = []
    ndcg = []
    y_val_argsort = np.argsort(-y_val, axis=-1)[:, :cutoff[2]]
    for i in range(len(u_batch)):
        has_hit = 0
        recall_ = 0.
        dcg_max = 0.
        dcg = 0.
        top_k = y_val_argsort[i]
        h = set_batch[i]
        for ind, b_rec in enumerate(top_k):
            if ind < len(h):
                dcg_max += 1. / np.log2(ind + 2)
            if b_rec in h:
                if ind < cutoff[0]:
                    has_hit = 1
                if ind < cutoff[1]:
                    recall_ += 1.
                dcg += 1. / np.log2(ind + 2)
        
        hit.append(has_hit)
        ndcg.append(dcg / dcg_max)
        recall_ /= min(len(h), cutoff[1])
        recall.append(recall_)
    return hit, recall, ndcg

In [25]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

In [26]:
train_loss = 0.
train_auc = 0.
train_hit = 0.
loss_cnt = 0.
best_ndcg = 0.
counter = 0
base = False  # whether to train the base model alone 
saver = tf.train.Saver(max_to_keep=None)


# sess.graph.finalize()
for epoch in range(50):
    train_loss = 0.
    train_kl = 0.
    loss_cnt = 0.

    for i in range(0, user_biz_train.shape[0], batch_size):
        uy_batch = user_biz_train[i:i+batch_size, :]
        ux_batch = user_non_zero_ind[i:i+batch_size]
        if base:
            _, loss_val, kl_val = sess.run([opt_base, loss, kl_div], feed_dict={ux: ux_batch, uy: uy_batch})
        else:
            if epoch < 17:
                _, loss_val, kl_val = sess.run([opt, loss, kl_div], feed_dict={ux: ux_batch, uy: uy_batch})
            else:
                _, loss_val, kl_val = sess.run([opt_p, loss, kl_div], feed_dict={ux: ux_batch, uy: uy_batch})

        train_loss += loss_val
        train_kl += kl_val
        loss_cnt += 1

    print("{}, {}, {}".format(epoch, train_loss/loss_cnt, train_kl/loss_cnt))

    val_hit = []
    val_recall = []
    val_ndcg = []
    for i in range(0, len(val_set_u), batch_size):
        u_batch = val_set_u[i:i+batch_size]
        mask_batch = val_set_mask[i:i+batch_size]
        set_batch = val_set_set[i:i+batch_size]
        y_val = sess.run(y_inference, feed_dict={ux: u_batch})
        y_val *= np.array(mask_batch)
        hit_, recall_, ndcg_ = evaluate(u_batch, mask_batch, set_batch, y_val)
        val_hit += hit_
        val_recall += recall_
        val_ndcg += ndcg_
    val_hit = np.array(val_hit)
    val_recall = np.array(val_recall)
    val_ndcg = np.array(val_ndcg)
    print("{}, {}, {}".format(val_hit.mean(), val_recall.mean(), val_ndcg.mean()))
    
    if val_ndcg.mean() >= best_ndcg:
        best_ndcg = val_ndcg.mean()
        counter = 0
        saver.save(sess, './ckpt/saved_model_%s.bin'%('base' if base else 'proposed'))

    counter += 1
    if counter > 2:
        break

saver.restore(sess, './ckpt/saved_model_%s.bin'%('base' if base else 'proposed'))
test_hit = []
test_recall = []
test_ndcg = []
for i in range(0, len(test_set_u), batch_size):
    u_batch = test_set_u[i:i+batch_size]
    mask_batch = test_set_mask[i:i+batch_size]
    set_batch = test_set_set[i:i+batch_size]
    y_val = sess.run(y_inference, feed_dict={ux: u_batch})
    y_val *= np.array(mask_batch)
    hit_, recall_, ndcg_ = evaluate(u_batch, mask_batch, set_batch, y_val)
    test_hit += hit_
    test_recall += recall_
    test_ndcg += ndcg_
test_hit = np.array(test_hit)
test_recall = np.array(test_recall)
test_ndcg = np.array(test_ndcg)
print("{}, {}, {}".format(test_hit.mean(), test_recall.mean(), test_ndcg.mean()))

0, 9.52600564209, 0.819747501729
0.0911182696733, 0.0212318609073, 0.0236917557847
INFO:tensorflow:./ckpt/saved_model_proposed.bin is not in all_model_checkpoint_paths. Manually adding it.
1, 9.27545319351, 0.661956438831
0.152017180549, 0.0378696462174, 0.0445312683817
INFO:tensorflow:./ckpt/saved_model_proposed.bin is not in all_model_checkpoint_paths. Manually adding it.
2, 8.97072987463, 0.582255990482
0.206319987728, 0.056016515972, 0.0619171287606
INFO:tensorflow:./ckpt/saved_model_proposed.bin is not in all_model_checkpoint_paths. Manually adding it.
3, 8.68978257273, 0.538929787921
0.239760699494, 0.0668493110019, 0.0715801799243
INFO:tensorflow:./ckpt/saved_model_proposed.bin is not in all_model_checkpoint_paths. Manually adding it.
4, 8.44811814439, 0.508720795898
0.250805338242, 0.0725702561961, 0.0775220789355
INFO:tensorflow:./ckpt/saved_model_proposed.bin is not in all_model_checkpoint_paths. Manually adding it.
5, 8.23964913686, 0.48322818326
0.254793680012, 0.0740050364

In [27]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

In [28]:
train_loss = 0.
train_auc = 0.
train_hit = 0.
loss_cnt = 0.
best_ndcg = 0.
counter = 0
base = True  # whether to train the base model alone 
saver = tf.train.Saver(max_to_keep=None)


# sess.graph.finalize()
for epoch in range(50):
    train_loss = 0.
    train_kl = 0.
    loss_cnt = 0.

    for i in range(0, user_biz_train.shape[0], batch_size):
        uy_batch = user_biz_train[i:i+batch_size, :]
        ux_batch = user_non_zero_ind[i:i+batch_size]
        if base:
            _, loss_val, kl_val = sess.run([opt_base, loss, kl_div], feed_dict={ux: ux_batch, uy: uy_batch})
        else:
            if epoch < 17:
                _, loss_val, kl_val = sess.run([opt, loss, kl_div], feed_dict={ux: ux_batch, uy: uy_batch})
            else:
                _, loss_val, kl_val = sess.run([opt_p, loss, kl_div], feed_dict={ux: ux_batch, uy: uy_batch})

        train_loss += loss_val
        train_kl += kl_val
        loss_cnt += 1

    print("{}, {}, {}".format(epoch, train_loss/loss_cnt, train_kl/loss_cnt))

    val_hit = []
    val_recall = []
    val_ndcg = []
    for i in range(0, len(val_set_u), batch_size):
        u_batch = val_set_u[i:i+batch_size]
        mask_batch = val_set_mask[i:i+batch_size]
        set_batch = val_set_set[i:i+batch_size]
        y_val = sess.run(y_inference, feed_dict={ux: u_batch})
        y_val *= np.array(mask_batch)
        hit_, recall_, ndcg_ = evaluate(u_batch, mask_batch, set_batch, y_val)
        val_hit += hit_
        val_recall += recall_
        val_ndcg += ndcg_
    val_hit = np.array(val_hit)
    val_recall = np.array(val_recall)
    val_ndcg = np.array(val_ndcg)
    print("{}, {}, {}".format(val_hit.mean(), val_recall.mean(), val_ndcg.mean()))
    
    if val_ndcg.mean() >= best_ndcg:
        best_ndcg = val_ndcg.mean()
        counter = 0
        saver.save(sess, './ckpt/saved_model_%s.bin'%('base' if base else 'proposed'))

    counter += 1
    if counter > 2:
        break

saver.restore(sess, './ckpt/saved_model_%s.bin'%('base' if base else 'proposed'))
test_hit = []
test_recall = []
test_ndcg = []
for i in range(0, len(test_set_u), batch_size):
    u_batch = test_set_u[i:i+batch_size]
    mask_batch = test_set_mask[i:i+batch_size]
    set_batch = test_set_set[i:i+batch_size]
    y_val = sess.run(y_inference, feed_dict={ux: u_batch})
    y_val *= np.array(mask_batch)
    hit_, recall_, ndcg_ = evaluate(u_batch, mask_batch, set_batch, y_val)
    test_hit += hit_
    test_recall += recall_
    test_ndcg += ndcg_
test_hit = np.array(test_hit)
test_recall = np.array(test_recall)
test_ndcg = np.array(test_ndcg)
print("{}, {}, {}".format(test_hit.mean(), test_recall.mean(), test_ndcg.mean()))

0, 9.53830493665, 0.863836005622
0.113054149409, 0.0278419288381, 0.0343883493011
INFO:tensorflow:./ckpt/saved_model_base.bin is not in all_model_checkpoint_paths. Manually adding it.
1, 9.28183423772, 0.749057843989
0.151250191747, 0.0384134763391, 0.0456546621902
INFO:tensorflow:./ckpt/saved_model_base.bin is not in all_model_checkpoint_paths. Manually adding it.
2, 8.90374230404, 0.69897037803
0.209541340696, 0.0563067861711, 0.0615108513308
INFO:tensorflow:./ckpt/saved_model_base.bin is not in all_model_checkpoint_paths. Manually adding it.
3, 8.55251574984, 0.741602438922
0.240374290535, 0.0678058194534, 0.0734374446276
INFO:tensorflow:./ckpt/saved_model_base.bin is not in all_model_checkpoint_paths. Manually adding it.
4, 8.25497357986, 0.833936821012
0.253259702408, 0.0723386967016, 0.0796932655933
INFO:tensorflow:./ckpt/saved_model_base.bin is not in all_model_checkpoint_paths. Manually adding it.
5, 8.01133123099, 0.937372676882
0.260469397147, 0.0753592186595, 0.0830772757265

### Reload the proposed model for significance tests

In [30]:
saver.restore(sess, './ckpt/saved_model_proposed.bin')
test_hit_ = []
test_recall_ = []
test_ndcg_ = []
for i in range(0, len(test_set_u), batch_size):
    u_batch = test_set_u[i:i+batch_size]
    mask_batch = test_set_mask[i:i+batch_size]
    set_batch = test_set_set[i:i+batch_size]
    y_val = sess.run(y_inference, feed_dict={ux: u_batch})
    y_val *= np.array(mask_batch)
    hit_, recall_, ndcg_ = evaluate(u_batch, mask_batch, set_batch, y_val)
    test_hit_ += hit_
    test_recall_ += recall_
    test_ndcg_ += ndcg_
test_hit_ = np.array(test_hit_)
test_recall_ = np.array(test_recall_)
test_ndcg_ = np.array(test_ndcg_)
print("{}, {}, {}".format(test_hit_.mean(), test_recall_.mean(), test_ndcg_.mean()))

INFO:tensorflow:Restoring parameters from ./ckpt/saved_model_proposed.bin
0.355269213069, 0.102983825328, 0.109304468154


In [34]:
from scipy import stats
print(stats.ttest_rel(test_hit, test_hit_))
print(stats.ttest_rel(test_recall, test_recall_))
print(stats.ttest_rel(test_ndcg, test_ndcg_))

Ttest_relResult(statistic=-8.013781477612055, pvalue=1.307992963708771e-15)
Ttest_relResult(statistic=-7.86187488249895, pvalue=4.3979860011059035e-15)
Ttest_relResult(statistic=-9.053271973976338, pvalue=1.802576566104731e-19)
