In [1]:
import time
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from scipy.linalg import svd

In [2]:
%%time
dataset = pd.read_csv('http://files.grouplens.org/datasets/movielens/ml-100k/u.data', names=["user_id", "item_id", "rating", "timestamp"], sep="\t")
uq_users = np.sort(dataset.user_id.unique().tolist())
uq_items = np.sort(dataset.item_id.unique().tolist())
n_users = len(uq_users)
n_items = len(uq_items)

CPU times: user 42.9 ms, sys: 87.6 ms, total: 131 ms
Wall time: 295 ms


In [3]:
topk = 10
rank_list = [i+1 for i in range(topk)]
latent = 50

In [4]:
%%time
matrix_data = csr_matrix((dataset.rating, (dataset.user_id, dataset.item_id)))
u, s, vh = svd(matrix_data.toarray(), full_matrices=False)

CPU times: user 2.09 s, sys: 443 ms, total: 2.54 s
Wall time: 1.54 s


In [5]:
print(u.shape)
print(s.shape)
print(vh.shape)

(944, 944)
(944,)
(944, 1683)


In [6]:
s_k = np.array(s)
s_k[latent:] = 0
u_s_vh = np.dot(np.dot(u, np.diag(s_k)),vh)

In [7]:
u_s_vh[1:5, 1:5].round(3)

array([[ 6.448e+00,  2.950e+00,  1.626e+00,  3.045e+00],
       [ 2.334e+00,  1.160e-01, -1.010e-01,  3.100e-01],
       [ 3.280e-01, -2.740e-01, -1.440e-01, -1.570e-01],
       [ 3.830e-01, -4.350e-01,  4.400e-02,  6.000e-03]])

In [8]:
recommend_matrix = np.where(matrix_data.toarray(), 0 ,u_s_vh)

In [9]:
%%time
df_recommend_list = pd.DataFrame(columns=['user_id', 'item_id', 'score', 'rank'])
for user_id in uq_users:
  item_scores = recommend_matrix[user_id]
  df_recommend = pd.DataFrame()
  df_recommend['user_id'] = [user_id] * topk
  df_recommend['item_id'] = np.argsort(item_scores)[::-1][:topk]
  df_recommend['score'] = np.sort(item_scores)[::-1][:topk]
  df_recommend["rank"] = rank_list
  df_recommend_list = df_recommend_list.append(df_recommend, ignore_index=True)

CPU times: user 7.38 s, sys: 74.2 ms, total: 7.45 s
Wall time: 9.68 s


In [10]:
df_recommend_list

Unnamed: 0,user_id,item_id,score,rank
0,1,423,3.480956,1
1,1,403,3.107652,2
2,1,732,2.901051,3
3,1,357,2.767992,4
4,1,385,2.608116,5
...,...,...,...,...
9425,943,735,2.562035,6
9426,943,684,2.447764,7
9427,943,742,2.282232,8
9428,943,357,2.246284,9
