## Matrix Factorization: SVD

In [1]:
import math
import numpy as np
from numpy import linalg as LA
from scipy.sparse import coo_matrix
from scipy.linalg import sqrtm
import pandas as pd
from IPython.display import HTML, display

In [2]:
np.set_printoptions(precision=2)
pd.set_option('display.precision', 2)

Read Movies and Define displayMovies

In [3]:
movies = pd.read_csv('data/movies_w_imgurl.csv')

In [4]:
def displayMovies(movieIds, ratings=[]):
    i = 0
    html = ""
    for movieId in movieIds:
        mov = movies[movies['movieId'] == movieId].iloc[0]        
        html += "<div style='display:inline-block;min-width:150px;max-width:150px;vertical-align: top;'>"
        html += "<img src='%s' width='120'><br/>" % mov.imgurl
        if i < len(ratings):
            html += "<span>%.4f</span><br/>" % ratings[i]
        html += "%s<br/>" % mov.title
        if mov.genres != '':
            ul = "<ul>"
            for genre in mov.genres.split('|'):
                ul += "<li>%s</li>" % genre
            ul += "</ul>"
            html += "%s<br/>" % ul
        html += "</div>"
        i += 1
    display(HTML(html))

Read Rating Data

In [5]:
ratings = pd.read_csv('data/ratings-9_1.csv')
train = ratings[ratings['type'] == 'train'][['userId', 'movieId', 'rating']]
test = ratings[ratings['type'] == 'test'][['userId', 'movieId', 'rating']]

### Convert Ratings to User-Item Sparse Matrix
Create Index to Id Maps

In [6]:
movieIdToIndex = {}
indexToMovieId = {}
colIdx = 0
for movieId in movies.movieId:
    movieIdToIndex[movieId] = colIdx
    indexToMovieId[colIdx] = movieId
    colIdx += 1

In [7]:
userIdToIndex = {}
indexToUserId = {}
rowIdx = 0
for userId in ratings.userId.unique():
    userIdToIndex[userId] = rowIdx
    indexToUserId[rowIdx] = userId
    rowIdx += 1

Create User-Item Sparse Matrix

In [8]:
rows = []
cols = []
vals = []
for row in ratings.itertuples():
    rows.append(userIdToIndex[row.userId])
    cols.append(movieIdToIndex[row.movieId])
    vals.append(row.rating)
coomat = coo_matrix((vals, (rows, cols)), shape=(rowIdx, colIdx))

### Singular Value Decomposition

In [9]:
U, s, V = LA.svd(coomat.toarray(), full_matrices=False)

Define user and item feature matrix

In [10]:
dim = 671
sqrtS = sqrtm(np.matrix(np.diag(s[0:dim])))

In [11]:
userFeatures = np.matmul(U.compress(np.ones(dim), axis=1), sqrtS)
itemFeatures = np.matmul(V.T.compress(np.ones(dim), axis=1), sqrtS.T)

Compute user and item similarity matrices

In [12]:
itemNorms = LA.norm(itemFeatures, ord=2, axis=1)
userNorms = LA.norm(userFeatures, ord=2, axis=1)

In [13]:
normalizedItemFeatures = np.divide(itemFeatures.T, itemNorms).T
normalizedUserFeatures = np.divide(userFeatures.T, userNorms).T

  normalizedItemFeatures = np.divide(itemFeatures.T, itemNorms).T


In [14]:
itemSims = pd.DataFrame(data=np.matmul(normalizedItemFeatures, normalizedItemFeatures.T), index=movies.movieId, columns=movies.movieId)
userSims = np.matmul(normalizedUserFeatures, normalizedUserFeatures.T)

Example 

In [15]:
movieIdx = 7
rels = itemSims.iloc[movieIdx,:].sort_values(ascending=False).head(6)
displayMovies([indexToMovieId[movieIdx]])
displayMovies(rels.index, rels.values)