In [9]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

In [10]:
def getratingmat( df_rating ):
    df = df_rating.pivot(index='movieId', columns='userId', values='rating')
    df = df.fillna(0)
    df.index.name = 'Movie ID'
    df.columns.name = 'User ID'
    Y = df.as_matrix()
    R = ( Y > 0. ).astype(int)
    return( df, Y, R );

def normalizeratings2( Y, R ):
    Y_sum = np.sum( Y, axis = 0 )
    R_sum = np.sum( R, axis = 0 )
    Y_mean = Y_sum / R_sum
    Y_norm = np.zeros( Y.shape )
    for i in range( Y.shape[1] ):
        j = np.where( R[:,i] == 1 )
        Y_norm[j,i] = Y[j,i] - Y_mean[i]       
    return( Y_norm, Y_mean );

def getrecommendation( df_pred, df_mov, df_small, userId, top_n = 10 ):
    df_user = ( df_small.loc[ df_small['userId'] == userId ].
               merge(df_mov, on = 'movieId').
               reset_index(drop = True).
               sort_values('rating', ascending=False) )
    df_userp = df_pred[ userId ].reset_index()
    df_userp.columns = [ 'movieId', 'prediction' ]
    df_p = ( df_userp[~df_userp['movieId'].isin(df_user['movieId'])].
            merge(df_mov, on = 'movieId' ).
            sort_values('prediction', ascending=False) )
    return( df_p.head(top_n) )

In [11]:
df_small = pd.read_csv('../movielens_small/ratings_clean.csv')
df_mov = pd.read_csv( '../movielens_small/metadata_clean.csv' )

In [12]:
df, Y, R = getratingmat( df_small ); 

In [13]:
Y_norm, Y_mean = normalizeratings2( Y, R );

In [14]:
U, sigma, Vt = svds(Y_norm, k = 50)
sigma = np.diag(sigma)

In [15]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + Y_mean
df_pred = pd.DataFrame(all_user_predicted_ratings, columns = df.columns, index = df.index)

In [16]:
getrecommendation( df_pred, df_mov, df_small, 50 )

Unnamed: 0,movieId,prediction,index,tmdbId,title,vote_average,vote_count,cast,director,keywords,genres
265,318,3.737711,284,278,The Shawshank Redemption,8.5,8358,"['Tim Robbins', 'Morgan Freeman', 'Bob Gunton'...",Frank Darabont,"['prison', 'corruption', 'police brutality', '...","['Drama', 'Crime']"
482,593,3.499678,525,274,The Silence of the Lambs,8.1,4549,"['Jodie Foster', 'Anthony Hopkins', 'Scott Gle...",Jonathan Demme,"['based on novel', 'psychopath', 'horror', 'su...","['Crime', 'Drama', 'Thriller']"
1540,2028,3.478345,1586,857,Saving Private Ryan,7.9,5148,"['Tom Hanks', 'Matt Damon', 'Vin Diesel', 'Tom...",Steven Spielberg,"['war crimes', 'self sacrifice', 'war veteran'...","['Drama', 'History', 'War']"
184,223,3.435785,196,2292,Clerks,7.4,769,"[""Brian O'Halloran"", 'Jeff Anderson', 'Jason M...",Kevin Smith,"['salesclerk', 'loser', 'aftercreditsstinger']",['Comedy']
1430,1917,3.429687,1476,95,Armageddon,6.5,2540,"['Bruce Willis', 'Billy Bob Thornton', 'Ben Af...",Michael Bay,"['saving the world', 'paris', 'moon', 'catacly...","['Action', 'Thriller', 'Science Fiction', 'Adv..."
247,293,3.422716,263,101,Leon: The Professional,8.2,4293,"['Jean Reno', 'Natalie Portman', 'Gary Oldman'...",Luc Besson,"['new york', 'corruption', 'assassin', 'police...","['Thriller', 'Crime', 'Drama']"
218,260,3.414686,232,11,Star Wars,8.1,6778,"['Mark Hamill', 'Harrison Ford', 'Carrie Fishe...",George Lucas,"['android', 'galaxy', 'hermit', 'death star', ...","['Adventure', 'Action', 'Science Fiction']"
2012,2571,3.408879,2058,603,The Matrix,7.9,9079,"['Keanu Reeves', 'Laurence Fishburne', 'Carrie...",Lana Wachowski,"['saving the world', 'artificial intelligence'...","['Action', 'Science Fiction']"
1282,1676,3.402277,1328,563,Starship Troopers,6.7,1584,"['Casper Van Dien', 'Dina Meyer', 'Denise Rich...",Paul Verhoeven,"['moon', 'asteroid', 'space marine', 'intellig...","['Adventure', 'Action', 'Thriller', 'Science F..."
2480,3147,3.40192,2526,497,The Green Mile,8.2,4166,"['Tom Hanks', 'Michael Clarke Duncan', 'David ...",Frank Darabont,"['southern usa', 'black people', 'mentally dis...","['Fantasy', 'Drama', 'Crime']"
