## BPR: Bayesian Personalized Ranking from Implicit Feedback

Ref: 
* https://arxiv.org/pdf/1205.2618
* https://medium.com/radon-dev/implicit-bayesian-personalized-ranking-in-tensorflow-b4dfa733c478

In [1]:
import tensorflow as tf
tf.enable_eager_execution()
import tensorflow.contrib.eager as tfe
import pandas as pd
import numpy as np
import scipy.sparse as sp
import math

from tqdm import tqdm

In [2]:
#---------------------------
# LOAD AND PREPARE THE DATA
#---------------------------

# Load the dataframe from a tab separated file.
df = pd.read_csv('data/movielens/ml-latest-small/ratings.csv', sep=',')
    
# Add column names
df = df.drop(df.columns[3], axis=1)
df_movie = pd.read_csv('data/movielens/ml-latest-small/movies.csv')

In [3]:
df.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [4]:
df_movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:

# Drop any rows with missing values
df = df.dropna()

# Drop any rows with 0 rating
df = df.loc[df.rating != 0]

# Convert movies into numerical IDs
df['user_id'] = df['userId'].astype("category").cat.codes
df['movie_id'] = df['movieId'].astype("category").cat.codes

# Create a lookup frame so we can get the movie
# names back in readable form later.
item_lookup = df[['movie_id', 'movieId']].drop_duplicates()
item_lookup['movie_id'] = item_lookup.movie_id.astype(str)

# We drop our old user and item columns
df = df.drop(['userId', 'movieId'], axis=1)

# Drop any rows with 0 rating
df = df.loc[df.rating != 0]

# Create lists of all users, movies and ratings
users = list(np.sort(df.user_id.unique()))
movies = list(np.sort(df.movie_id.unique()))
ratings = list(df.rating)
print(f"#users: {len(users):,}, #items: {len(movies):,} #ratings: {len(ratings):,}" )

# Get the rows and columns for our new matrix
rows = df.user_id.astype(float)
cols = df.movie_id.astype(float)

# Contruct a sparse matrix for our users and items containing number of ratings
data_sparse = sp.csr_matrix((ratings, (rows, cols)), shape=(len(users), len(movies)))

# Get the values of our matrix as a list of user ids
# and item ids. Note that our litsts have the same length
# as each user id repeats one time for each rated movie.
uids, iids = data_sparse.nonzero()

#users: 610, #items: 9,724 #ratings: 100,836


In [6]:
#-------------
# HYPERPARAMS
#-------------

epochs = 50
# How many (u,i,j) triplets we sample for each batch
samples = 1500
batches = math.ceil(uids.shape[0]/samples)
# batches = min(batches,30)
num_factors = 64 # Number of latent features

# Independent lambda regularization values 
# for user, items and bias.
lambda_user = 0.0000001
lambda_item = 0.0000001
lambda_bias = 0.0000001

# Our learning rate 
lr = 0.005



## Eager way to iterator through dataset

In [7]:
# import tensorflow as tf
# tf.enable_eager_execution()
# import tensorflow.contrib.eager as tfe
# uids=list(range(10))
# iids=list(range(10,20))
# train_data = tf.data.Dataset.from_tensor_slices((uids, iids))
# train_data = train_data.shuffle(len(uids))
# train_data = train_data.batch(2)

# for eps in range(3):
#     batch_id = 0
#     for x, y in train_data:
#         print(f"eps: {eps}, batch: {batch_id}, data: {x}, {y} ")
#         batch_id+=1 

In [8]:
#-------------------------
# TENSORFLOW GRAPH
#-------------------------

def init_variable(size, dim, name=None):
    '''
    Helper function to initialize a new variable with
    uniform random values.
    '''
    std = np.sqrt(2 / dim)
    return tf.Variable(tf.random_uniform([size, dim], -std, std), name=name)




In [9]:
user_factors = init_variable(len(users), num_factors, "user_factors") # V matrix# U matrix
item_factors = init_variable(len(movies), num_factors, "item_factors") # V matrix
item_bias = init_variable(len(movies), 1, "item_bias")
def compute_loss(u,i,j):

    '''
    Loss function: 
    -SUM ln σ(xui - xuj) + λ(w1)**2 + λ(w2)**2 + λ(w3)**2 ...
    ln = the natural log
    σ(xuij) = the sigmoid function of xuij.
    λ = lambda regularization value.
    ||W||**2 = the squared L2 norm of our model parameters.

    '''


    # User feature embedding
    u_factors = tf.nn.embedding_lookup(user_factors, u)
    # Known and unknown item embeddings
    i_factors = tf.nn.embedding_lookup(item_factors, i)
    j_factors = tf.nn.embedding_lookup(item_factors, j)

    # i and j bias embeddings.
    
    i_bias = tf.nn.embedding_lookup(item_bias, i)
    i_bias = tf.squeeze(i_bias)
    j_bias = tf.nn.embedding_lookup(item_bias, j)
    j_bias = tf.squeeze(j_bias)

    # Calculate the dot product + bias for known and unknown
    # item to get xui and xuj.
    ui = tf.reduce_sum(u_factors * i_factors, axis=1)
    xui = i_bias + ui
    uj = tf.reduce_sum(u_factors * j_factors, axis=1)
    xuj = j_bias + uj

    # We calculate xuij.
    xuij = xui - xuj

    # Calculate the mean AUC (area under curve).
    # if xuij is greater than 0, that means that 
    # xui is greater than xuj (and thats what we want).
    u_auc = tf.reduce_mean(tf.cast(xuij > 0,tf.float32))

#     # Output the AUC value to tensorboard for monitoring.
#     tf.summary.scalar('auc', u_auc)

    # Calculate the squared L2 norm ||W||**2 multiplied by λ.
    l2_norm = tf.add_n([
        lambda_user * tf.reduce_sum(tf.multiply(u_factors, u_factors)),
        lambda_item * tf.reduce_sum(tf.multiply(i_factors, i_factors)),
        lambda_item * tf.reduce_sum(tf.multiply(j_factors, j_factors)),
        lambda_bias * tf.reduce_sum(tf.multiply(i_bias, i_bias)),
        lambda_bias * tf.reduce_sum(tf.multiply(j_bias, j_bias))
        ])

    # Calculate the loss as ||W||**2 - ln σ(Xuij)
    #loss = l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(xuij)))
    loss = -tf.reduce_mean(tf.log(tf.sigmoid(xuij))) + l2_norm
    return loss, u_auc
grad_fn = tfe.implicit_value_and_gradients(compute_loss)
opt = tf.train.AdamOptimizer(learning_rate=lr)
train_data = tf.data.Dataset.from_tensor_slices((uids, iids))
train_data = train_data.batch(samples)

In [10]:
%%time
#------------------
# GRAPH EXECUTION
#------------------

# This has noting to do with tensorflow but gives
# us a nice progress bar for the training.
progress = tqdm(total=batches*epochs)

for _ in range(epochs):
    for u, i in train_data:
        j = tf.random_uniform(tf.shape(i), minval=0, maxval=len(movies), dtype=tf.int32)
        (l, auc), grads = grad_fn(u, i, j)
        opt.apply_gradients(grads)
    progress.update(batches)
    progress.set_description('Loss: %.3f | AUC: %.3f' % (l, auc))

progress.close()

Loss: 0.123 | AUC: 0.944:   8%|▊         | 2692/33650 [01:02<11:53, 43.41it/s]

KeyboardInterrupt: 