In [40]:
import logging
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from torch.distributions import constraints
from sklearn.model_selection import train_test_split
from scipy import sparse

import pyro
import pyro.distributions as dist
import pyro.optim as optim

from pyro.infer import EmpiricalMarginal, SVI, Trace_ELBO, Predictive
from pyro.contrib.autoguide import AutoMultivariateNormal
from pyro.infer.mcmc.api import MCMC
from pyro.infer.mcmc import NUTS

pyro.set_rng_seed(1)
assert pyro.__version__.startswith('1.4.0')
np.random.seed(seed=1)

In [61]:
# load data
long_data = pd.read_csv('ratings_100_200_real.csv') # need to transform to array when fitting the model
# long_data_array = long_data.to_numpy()
# data = long_data.pivot(index='user_id', columns='anime_id', values='rating')
# data preparation
# long_data = pd.read_csv('ratings_hu_200.csv')
# data = long_data.pivot(index='user_id', columns='anime_id', values='rating')

In [62]:
print(long_data.shape)

(15491, 3)


In [63]:
# some important parameters
num_users = long_data.user_id.nunique() # Number of users: 69600 
num_anime = long_data.anime_id.nunique() # Number of anime: 9927
num_feats = 30 # 10 should be faster

print("Number of Users", str(num_users))
print("Number of Anime", str(num_anime))

Number of Users 100
Number of Anime 200


In [35]:
# train/test split
train_df, valid_df = train_test_split(long_data, test_size=0.2)

#resetting indices to avoid indexing errors in the future
train_df = train_df.reset_index()[['user_id', 'anime_id', 'rating']]
valid_df = valid_df.reset_index()[['user_id', 'anime_id', 'rating']]

MF
Users feature (uf): 69600 * 30
Anime feature (af): 9927 * 30 or 30 * 9927

In [36]:
def encode_column(column):
    """ Encodes a pandas column with continous IDs"""
    keys = column.unique()
    key_to_id = {key:idx for idx,key in enumerate(keys)}
    return key_to_id, np.array([key_to_id[x] for x in column]), len(keys)

def encode_df(anime_df):
    """Encodes rating data with continuous user and anime ids"""
    
    anime_ids, anime_df['anime_id'], num_anime = encode_column(anime_df['anime_id'])
    user_ids, anime_df['user_id'], num_users = encode_column(anime_df['user_id'])
    return anime_df, num_users, num_anime, user_ids, anime_ids

In [37]:
anime_df, num_users, num_anime, user_ids, anime_ids = encode_df(train_df)
print("Number of users :", num_users)
print("Number of anime :", num_anime)
anime_df.head()

Number of users : 68854
Number of anime : 9733


Unnamed: 0,user_id,anime_id,rating
0,0,0,7
1,1,1,8
2,2,2,8
3,3,3,8
4,4,4,9


In [38]:
def create_sparse_matrix(df, rows, cols, column_name="rating"):
    """ Returns a sparse utility matrix""" 
    return sparse.csc_matrix((df[column_name].values,(df['user_id'].values, df['anime_id'].values)),shape=(rows, cols))

In [41]:
anime_df, num_users, num_anime, user_ids, anime_ids = encode_df(train_df)
Y = create_sparse_matrix(anime_df, num_users, num_anime)

In [44]:
# to view matrix
Y.todense()
Y

<68854x9733 sparse matrix of type '<class 'numpy.int64'>'
	with 5069788 stored elements in Compressed Sparse Column format>

In [45]:
def cost(df, emb_user, emb_anime):
    """ Computes RMSE"""
    Y = create_sparse_matrix(df, emb_user.shape[0], emb_anime.shape[0])
    predicted = create_sparse_matrix(predict(df, emb_user, emb_anime), emb_user.shape[0], emb_anime.shape[0], 'prediction')
#     return np.sum((Y-predicted).power(2))/df.shape[0] 
    return np.sqrt(np.mean(np.square(predicted-Y)))

In [47]:
def model_1():
    """
    num_u/a: number of users/anime
    num_feats: number of latent features
    """
    uf_mean0 = torch.zeros([num_users, num_feats])
    uf_std0 = torch.ones([num_users, num_feats])
    # Anime
    af_mean0 = torch.zeros([num_feats, num_anime])
    af_std0 = torch.ones([num_feats, num_anime])

    uf = 0.1*(pyro.sample("s", pyro.distributions.Normal(loc = uf_mean0, scale = uf_std0).to_event(2)).numpy())
    af = 0.1*(pyro.sample("e", pyro.distributions.Normal(loc = af_mean0, scale = af_std0).to_event(2)).numpy())
    
    return uf, af

In [48]:
def guide_1(): 
    q_uf_mean = pyro.param("q_uf_mean", torch.zeros([num_users, num_feats]))
    q_uf_stddv = pyro.param("q_uf_stddv", torch.ones([num_users, num_feats]), constraint=constraints.positive)
    
    q_af_mean = pyro.param("q_af_mean", torch.zeros([num_feats, num_anime]))
    q_af_stddv = pyro.param("q_af_stddv", torch.ones([num_feats, num_anime]), constraint=constraints.positive)
    
    uf = pyro.sample("s", pyro.distributions.Normal(loc = q_uf_mean, scale = q_uf_stddv).to_event(2))
    af = pyro.sample("e", pyro.distributions.Normal(loc = q_af_mean, scale = q_af_stddv).to_event(2))
    
    expectation = torch.mm(uf, af)

In [53]:
def train_gd(model, guide): 
    pyro.clear_param_store()
    svi = SVI(model, guide_map, optim.SGD({"lr": 50, "momentum":0.8}), loss=Trace_ELBO())

    n_steps = 1000
    for step in range(n_steps):
        loss = svi.step(data)
        if step % 50 == 0:
            print('[iter {}]  loss: {:.4f}'.format(step, loss))

In [54]:
train_gd(model_1, guide_1)

TypeError: model_1() takes 0 positional arguments but 1 was given

In [20]:
def model(data):
    # Users
    uf_mean0 = torch.zeros([num_users, num_feats])
    uf_std0 = torch.ones([num_users, num_feats])
    # Anime
    af_mean0 = torch.zeros([num_feats, num_anime])
    af_std0 = torch.ones([num_feats, num_anime])

    uf = pyro.sample("s", pyro.distributions.Normal(loc = uf_mean0, scale = uf_std0).to_event(2))
    af = pyro.sample("e", pyro.distributions.Normal(loc = af_mean0, scale = af_std0).to_event(2))
    
    expectation = torch.mm(uf, af)
    
    sigma = pyro.sample("sigma", dist.Uniform(0., 10.))
    is_observed = (~np.isnan(data))
    is_observed = torch.tensor(is_observed.values)
    valid_matrix = torch.tensor(data.values).clone()
    valid_matrix[~is_observed] = 0  # ensure all values are valid
    
    with pyro.plate("user", num_users, dim=-2): 
        with pyro.plate("anime", num_anime, dim=-3):
            with pyro.poutine.mask(mask=is_observed):
                pyro.sample("obs", dist.Normal(expectation, sigma), 
                        obs = valid_matrix)

In [5]:
def guide_map(data):
    q_uf_mean = pyro.param("q_uf_mean", torch.zeros([num_users, num_feats]))
    q_uf_stddv = pyro.param("q_uf_stddv", torch.ones([num_users, num_feats]), constraint=constraints.positive)
    
    q_af_mean = pyro.param("q_af_mean", torch.zeros([num_feats, num_anime]))
    q_af_stddv = pyro.param("q_af_stddv", torch.ones([num_feats, num_anime]), constraint=constraints.positive)
    
    uf = pyro.sample("s", pyro.distributions.Normal(loc = q_uf_mean, scale = q_uf_stddv).to_event(2))
    af = pyro.sample("e", pyro.distributions.Normal(loc = q_af_mean, scale = q_af_stddv).to_event(2))
    
    expectation = torch.mm(uf, af)

In [6]:
def train_via_opt(model, guide, lr=5):
    pyro.clear_param_store()
    svi = SVI(model, guide_map, optim.Adam({"lr": lr}), loss=Trace_ELBO())

    n_steps = 1000
    for step in range(n_steps):
        loss = svi.step(data)
        if step % 50 == 0:
            print('[iter {}]  loss: {:.4f}'.format(step, loss))

In [21]:
train_via_opt(model, guide_map)

[iter 0]  loss: 12105759.2395


  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, 

[iter 50]  loss: nan


  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")
  warn_if_nan(loss, "loss")


KeyboardInterrupt: 