In [1]:
import pandas as pd
from collections import defaultdict
from surprise import SVD
from surprise import Dataset
from surprise import Reader
import ast
from surprise.model_selection import cross_validate

In [2]:
def getprediction( predictions, userId ):
    user_pred = []
    for uid, iid, true_r, est, _ in predictions:
        if uid == userId :
            user_pred.append( (iid, est) )
    df_userp = pd.DataFrame( user_pred, columns = [ 'movieId', 'prediction'] )
    df_userp['movieId'] = df_userp['movieId'].astype(int)
    return( df_userp )

In [3]:
#load data
df_small = pd.read_csv('../movielens_small/ratings_clean.csv')
df_md_small = pd.read_csv( '../movielens_small/metadata_clean.csv' )
#df_small.head()

In [4]:
df_md_small['genres'] = df_md_small['genres'].apply(ast.literal_eval)
df_md_small['cast'] = df_md_small['cast'].apply(ast.literal_eval)
df_md_small['keywords'] = df_md_small['keywords'].apply(ast.literal_eval)

In [5]:
reader = Reader()
rating_data = Dataset.load_from_df(df_small[['userId', 'movieId', 'rating']], reader)

In [13]:
algo = SVD( biased = True, n_factors = 60, n_epochs = 30 )
# Run 5-fold cross-validation and print results.
cross_validate(algo, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0010  0.9990  0.9932  0.9899  0.9892  0.9944  0.0048  
MAE (testset)     0.7725  0.7726  0.7709  0.7673  0.7650  0.7697  0.0030  
Fit time          13.09   13.48   13.66   13.39   7.44    12.21   2.39    
Test time         0.34    0.35    0.27    0.19    0.13    0.26    0.09    


{'fit_time': (13.085323095321655,
  13.475642919540405,
  13.656957149505615,
  13.385633945465088,
  7.438598871231079),
 'test_mae': array([ 0.77245702,  0.77262057,  0.77087089,  0.76734679,  0.76502367]),
 'test_rmse': array([ 1.00100738,  0.99896549,  0.99316143,  0.98986894,  0.98919122]),
 'test_time': (0.335906982421875,
  0.35300397872924805,
  0.27235913276672363,
  0.19127702713012695,
  0.12679719924926758)}

In [6]:
# build trainset
trainset = rating_data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x205e43fb7b8>

In [7]:
# predict ratings for all pairs (u, i) that are not in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

In [8]:
user_id = 50

In [9]:
df_userp = getprediction( predictions, user_id )
#df_userp.head()
df_userp = ( df_userp. join(df_md_small.set_index('movieId'), on = 'movieId' ).
            sort_values('prediction', ascending=False) )
df_userp.head(10)

Unnamed: 0,movieId,prediction,index,tmdbId,title,vote_average,vote_count,cast,director,keywords,genres
71,318,4.037748,284,278,The Shawshank Redemption,8.5,8358,"[Tim Robbins, Morgan Freeman, Bob Gunton, Clan...",Frank Darabont,"[prison, corruption, police brutality, prison ...","[Drama, Crime]"
880,2542,3.77628,2034,100,"Lock, Stock and Two Smoking Barrels",7.5,1671,"[Jason Flemyng, Dexter Fletcher, Nick Moran, J...",Guy Ritchie,"[ambush, alcohol, shotgun, tea, joint, machism...","[Comedy, Crime]"
394,745,3.770731,620,532,A Close Shave,7.5,223,"[Peter Sallis, Anne Reid]",Nick Park,"[prison, inventor, loyalty, sheep, innocence, ...","[Family, Animation, Comedy]"
397,1148,3.760771,912,531,The Wrong Trousers,7.6,266,[Peter Sallis],Nick Park,"[robbery, inventor, penguin, telecontrol, surr...","[Animation, Comedy, Family]"
65,593,3.722161,525,274,The Silence of the Lambs,8.1,4549,"[Jodie Foster, Anthony Hopkins, Scott Glenn, T...",Jonathan Demme,"[based on novel, psychopath, horror, suspense,...","[Crime, Drama, Thriller]"
125,858,3.691987,692,238,The Godfather,8.5,6024,"[Marlon Brando, Al Pacino, James Caan, Richard...",Francis Ford Coppola,"[italy, love at first sight, loss of father, p...","[Drama, Crime]"
2089,475,3.626447,423,7984,In the Name of the Father,7.6,363,"[Daniel Day-Lewis, Pete Postlethwaite, Emma Th...",Jim Sheridan,"[bomb, prison, father son relationship, based ...",[Drama]
419,1704,3.624353,1348,489,Good Will Hunting,7.9,2880,"[Robin Williams, Matt Damon, Ben Affleck, Stel...",Gus Van Sant,"[baseball, boston, professor, m.i.t., harvard ...",[Drama]
735,1246,3.615909,996,207,Dead Poets Society,8.1,2786,"[Robin Williams, Ethan Hawke, Robert Sean Leon...",Peter Weir,"[individual, philosophy, poetry, shakespeare, ...",[Drama]
115,260,3.576888,232,11,Star Wars,8.1,6778,"[Mark Hamill, Harrison Ford, Carrie Fisher, Pe...",George Lucas,"[android, galaxy, hermit, death star, lightsab...","[Adventure, Action, Science Fiction]"


In [10]:
df_user = ( df_small.loc[ df_small['userId'] == user_id ].
           merge(df_md_small,on = 'movieId').
           sort_values('rating', ascending=False) )
df_user[ ['userId', 'title', 'rating', 'genres', 'vote_average', 'cast', 'director'] ]

Unnamed: 0,userId,title,rating,genres,vote_average,cast,director
41,50,Terminator 2: Judgment Day,5.0,"[Action, Thriller, Science Fiction]",7.7,"[Arnold Schwarzenegger, Linda Hamilton, Robert...",James Cameron
0,50,GoldenEye,4.0,"[Adventure, Action, Thriller]",6.6,"[Pierce Brosnan, Sean Bean, Izabella Scorupco,...",Martin Campbell
15,50,Outbreak,4.0,"[Action, Drama, Science Fiction, Thriller]",6.4,"[Dustin Hoffman, Rene Russo, Morgan Freeman, K...",Wolfgang Petersen
44,50,Independence Day,4.0,"[Action, Adventure, Science Fiction]",6.7,"[Will Smith, Bill Pullman, Jeff Goldblum, Mary...",Roland Emmerich
37,50,Schindler's List,4.0,"[Drama, History, War]",8.3,"[Liam Neeson, Ben Kingsley, Ralph Fiennes, Car...",Steven Spielberg
35,50,Jurassic Park,4.0,"[Adventure, Science Fiction]",7.6,"[Sam Neill, Laura Dern, Jeff Goldblum, Richard...",Steven Spielberg
31,50,Dave,4.0,[Comedy],6.3,"[Kevin Kline, Sigourney Weaver, Frank Langella...",Ivan Reitman
26,50,Maverick,4.0,"[Action, Adventure, Comedy, Drama, Western]",6.6,"[Mel Gibson, Jodie Foster, James Garner, Graha...",Richard Donner
25,50,The Mask,4.0,"[Romance, Comedy, Crime, Fantasy]",6.6,"[Jim Carrey, Cameron Diaz, Nancy Fish, Tim Bag...",Chuck Russell
24,50,Four Weddings and a Funeral,4.0,"[Comedy, Drama, Romance]",6.6,"[Hugh Grant, Andie MacDowell, James Fleet, Sim...",Mike Newell
