In [68]:
import pandas as pd
from collections import defaultdict
from surprise import SVD
from surprise import Dataset
from surprise import Reader
import ast

In [69]:
def getprediction( predictions, userId ):
    user_pred = []
    for uid, iid, true_r, est, _ in predictions:
        if uid == userId :
            user_pred.append( (iid, est) )
    df_userp = pd.DataFrame( user_pred, columns = [ 'movieId', 'prediction'] )
    df_userp['movieId'] = df_userp['movieId'].astype(int)
    return( df_userp )

In [70]:
#load data
df_small = pd.read_csv('../movielens_small/ratings_clean.csv')
df_md_small = pd.read_csv( '../movielens_small/metadata_clean.csv' )
#df_small.head()

In [71]:
df_md_small['genres'] = df_md_small['genres'].apply(ast.literal_eval)
df_md_small['cast'] = df_md_small['cast'].apply(ast.literal_eval)
df_md_small['keywords'] = df_md_small['keywords'].apply(ast.literal_eval)

In [72]:
reader = Reader()
rating_data = Dataset.load_from_df(df_small[['userId', 'movieId', 'rating']], reader)
#rating_data.split(n_folds=5)

In [73]:
algo = SVD( biased = True, n_factors = 50, n_epochs = 50 )
trainset = rating_data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1b51fa9e7f0>

In [74]:
# Then predict ratings for all pairs (u, i) that are not in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

In [75]:
user_id = 50

In [76]:
df_userp = getprediction( predictions, user_id )
#df_userp.head()
df_userp = ( df_userp. join(df_md_small.set_index('movieId'), on = 'movieId' ).
            sort_values('prediction', ascending=False) )
df_userp.head(10)

Unnamed: 0,movieId,prediction,index,tmdbId,title,vote_average,vote_count,cast,director,keywords,genres
1392,8961,4.599858,5616,9806,The Incredibles,7.4,5290,"[Craig T. Nelson, Holly Hunter, Samuel L. Jack...",Brad Bird,"[secret identity, secret, hero, island, wretch...","[Action, Adventure, Animation, Family]"
1586,76093,4.416577,7480,10191,How to Train Your Dragon,7.5,4319,"[Jay Baruchel, Gerard Butler, Craig Ferguson, ...",Chris Sanders,"[flying, blacksmith, arena, island, night, shi...","[Fantasy, Adventure, Animation, Family]"
938,3030,4.411761,2428,11878,Yojimbo,8.0,334,"[Toshirō Mifune, Tatsuya Nakadai, Yôko Tsukasa...",Akira Kurosawa,"[japan, swordplay, samurai, sword, bodyguard, ...","[Drama, Thriller]"
880,2542,4.382018,2034,100,"Lock, Stock and Two Smoking Barrels",7.5,1671,"[Jason Flemyng, Dexter Fletcher, Nick Moran, J...",Guy Ritchie,"[ambush, alcohol, shotgun, tea, joint, machism...","[Comedy, Crime]"
101,7153,4.338706,5018,122,The Lord of the Rings: The Return of the King,8.1,8226,"[Elijah Wood, Ian McKellen, Viggo Mortensen, L...",Peter Jackson,"[elves, orcs, middle-earth (tolkien), based on...","[Adventure, Fantasy, Action]"
2089,475,4.334349,423,7984,In the Name of the Father,7.6,363,"[Daniel Day-Lewis, Pete Postlethwaite, Emma Th...",Jim Sheridan,"[bomb, prison, father son relationship, based ...",[Drama]
71,318,4.332277,284,278,The Shawshank Redemption,8.5,8358,"[Tim Robbins, Morgan Freeman, Bob Gunton, Clan...",Frank Darabont,"[prison, corruption, police brutality, prison ...","[Drama, Crime]"
293,1221,4.325678,973,240,The Godfather: Part II,8.3,3418,"[Al Pacino, Robert Duvall, Diane Keaton, Rober...",Francis Ford Coppola,"[italo-american, cuba, vororte, melancholy, pr...","[Drama, Crime]"
21,50,4.325109,48,629,The Usual Suspects,8.1,3334,"[Stephen Baldwin, Gabriel Byrne, Chazz Palmint...",Bryan Singer,"[law, relatives, theft, criminal, criminal mas...","[Drama, Crime, Thriller]"
77,1197,4.319519,950,2493,The Princess Bride,7.6,1518,"[Cary Elwes, Robin Wright, Mandy Patinkin, And...",Rob Reiner,"[swashbuckler, evil prince, reference to socra...","[Adventure, Family, Fantasy, Comedy, Romance]"


In [78]:
df_user = ( df_small.loc[ df_small['userId'] == user_id ].
           merge(df_md_small,on = 'movieId').
           sort_values('rating', ascending=False) )
df_user[ ['userId', 'title', 'rating', 'genres', 'vote_average', 'cast', 'director'] ]

Unnamed: 0,userId,title,rating,genres,vote_average,cast,director
41,50,Terminator 2: Judgment Day,5.0,"[Action, Thriller, Science Fiction]",7.7,"[Arnold Schwarzenegger, Linda Hamilton, Robert...",James Cameron
0,50,GoldenEye,4.0,"[Adventure, Action, Thriller]",6.6,"[Pierce Brosnan, Sean Bean, Izabella Scorupco,...",Martin Campbell
15,50,Outbreak,4.0,"[Action, Drama, Science Fiction, Thriller]",6.4,"[Dustin Hoffman, Rene Russo, Morgan Freeman, K...",Wolfgang Petersen
44,50,Independence Day,4.0,"[Action, Adventure, Science Fiction]",6.7,"[Will Smith, Bill Pullman, Jeff Goldblum, Mary...",Roland Emmerich
37,50,Schindler's List,4.0,"[Drama, History, War]",8.3,"[Liam Neeson, Ben Kingsley, Ralph Fiennes, Car...",Steven Spielberg
35,50,Jurassic Park,4.0,"[Adventure, Science Fiction]",7.6,"[Sam Neill, Laura Dern, Jeff Goldblum, Richard...",Steven Spielberg
31,50,Dave,4.0,[Comedy],6.3,"[Kevin Kline, Sigourney Weaver, Frank Langella...",Ivan Reitman
26,50,Maverick,4.0,"[Action, Adventure, Comedy, Drama, Western]",6.6,"[Mel Gibson, Jodie Foster, James Garner, Graha...",Richard Donner
25,50,The Mask,4.0,"[Romance, Comedy, Crime, Fantasy]",6.6,"[Jim Carrey, Cameron Diaz, Nancy Fish, Tim Bag...",Chuck Russell
24,50,Four Weddings and a Funeral,4.0,"[Comedy, Drama, Romance]",6.6,"[Hugh Grant, Andie MacDowell, James Fleet, Sim...",Mike Newell
