In [1]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

In [2]:
def getratingmat( df_rating ):
    df = df_rating.pivot(index='movieId', columns='userId', values='rating')
    df = df.fillna(0)
    df.index.name = 'Movie ID'
    df.columns.name = 'User ID'
    Y = df.as_matrix()
    R = ( Y > 0. ).astype(int)
    print( 'convert rating dataframe to rating matrix for gradient descent')
    return( df, Y, R );

def normalizeratings2( Y, R ):
    Y_sum = np.sum( Y, axis = 0 )
    R_sum = np.sum( R, axis = 0 )
    Y_mean = Y_sum / R_sum
    Y_norm = np.zeros( Y.shape )
    for i in range( Y.shape[1] ):
        j = np.where( R[:,i] == 1 )
        Y_norm[j,i] = Y[j,i] - Y_mean[i]       
    return( Y_norm, Y_mean );

def getrecommendation( df_pred, df_mov, df_small, userId, top_n = 15 ):
    df_user = ( df_small.loc[ df_small['userId'] == userId ].
               join(df_mov.set_index('movieId'),on = 'movieId').
               reset_index(drop = True).
               sort_values('rating', ascending=False) )
    display( df_user )
    df_userp = df_pred[ userId ].reset_index()
    df_userp.columns = [ 'movieId', 'prediction' ]
    df_p = ( df_userp[~df_userp['movieId'].isin(df_user['movieId'])].
            join(df_mov.set_index('movieId'), on = 'movieId' ).
            sort_values('prediction', ascending=False) )
    return( df_p.head(top_n) )

In [3]:
df_small = pd.read_csv('../movielens_small/ratings_small.csv').drop('timestamp', axis=1)
df_mov = pd.read_csv( '../movielens_small/movies.csv' )

In [4]:
df, Y, R = getratingmat( df_small ); 

convert rating dataframe to rating matrix for gradient descent


In [5]:
Y_norm, Y_mean = normalizeratings2( Y, R );

In [6]:
U, sigma, Vt = svds(Y_norm, k = 100)
sigma = np.diag(sigma)

In [7]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + Y_mean
df_pred = pd.DataFrame(all_user_predicted_ratings, columns = df.columns, index = df.index)

In [8]:
getrecommendation( df_pred, df_mov, df_small, 668 )

Unnamed: 0,userId,movieId,rating,title,genres
0,668,296,5.0,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
2,668,593,5.0,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
3,668,608,5.0,Fargo (1996),Comedy|Crime|Drama|Thriller
14,668,2997,5.0,Being John Malkovich (1999),Comedy|Drama|Fantasy
13,668,2908,5.0,Boys Don't Cry (1999),Drama
6,668,1213,5.0,Goodfellas (1990),Crime|Drama
7,668,1221,5.0,"Godfather: Part II, The (1974)",Crime|Drama
11,668,2324,5.0,Life Is Beautiful (La Vita è bella) (1997),Comedy|Drama|Romance|War
1,668,318,4.0,"Shawshank Redemption, The (1994)",Crime|Drama
9,668,1358,4.0,Sling Blade (1996),Drama


Unnamed: 0,movieId,prediction,title,genres
695,858,4.231737,"Godfather, The (1972)",Crime|Drama
48,50,3.98404,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
455,509,3.947973,"Piano, The (1993)",Drama|Romance
472,527,3.935225,Schindler's List (1993),Drama|War
15,16,3.906124,Casino (1995),Crime|Drama
203,231,3.892004,Dumb & Dumber (Dumb and Dumber) (1994),Adventure|Comedy
1116,1376,3.885298,Star Trek IV: The Voyage Home (1986),Adventure|Comedy|Sci-Fi
2532,3148,3.877128,"Cider House Rules, The (1999)",Drama
2288,2858,3.875766,American Beauty (1999),Drama|Romance
232,260,3.873094,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
