In [1]:
import pandas as pd
import numpy as np

from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
df = pd.read_csv('C:/Users/victo/Documents/Institute_of_Data/Capstone Project/data_final.csv')

In [3]:
df

Unnamed: 0,User_ID,Movie_ID,Movie_Title,Rating,Genres,Timestamp
0,1,2,Jumanji (1995),3.5,Adventure|Children|Fantasy,2/04/2005 23:53
1,1,29,"City of Lost Children, The (CitÃ© des enfants ...",3.5,Adventure|Drama|Fantasy|Mystery|Sci-Fi,2/04/2005 23:31
2,1,32,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),3.5,Mystery|Sci-Fi|Thriller,2/04/2005 23:33
3,1,47,Seven (a.k.a. Se7en) (1995),3.5,Mystery|Thriller,2/04/2005 23:32
4,1,50,"Usual Suspects, The (1995)",3.5,Crime|Mystery|Thriller,2/04/2005 23:29
...,...,...,...,...,...,...
1048570,7120,168,First Knight (1995),5.0,Action|Drama|Romance,2/04/2007 19:44
1048571,7120,253,Interview with the Vampire: The Vampire Chroni...,4.0,Drama|Horror,2/04/2007 19:30
1048572,7120,260,Star Wars: Episode IV - A New Hope (1977),5.0,Action|Adventure|Sci-Fi,2/04/2007 19:27
1048573,7120,261,Little Women (1994),4.0,Drama,2/04/2007 19:49


In [4]:
reader = Reader()
data = Dataset.load_from_df(df[['User_ID', 'Movie_ID', 'Rating']], reader)

In [5]:
svd = SVD(verbose=True, n_epochs=10)
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8628  0.8628  0.8611  0.8622  0.0008  
MAE (testset)     0.6634  0.6645  0.6625  0.6635  0.0008  
Fit time          13.22   13.42   13.65   13.43   0.17    
Test time         2.45    2.45    2.45    2.45    0.00    


{'test_rmse': array([0.86278443, 0.86283333, 0.86107928]),
 'test_mae': array([0.66337794, 0.66451194, 0.66248064]),
 'fit_time': (13.22235631942749, 13.422930479049683, 13.6492018699646),
 'test_time': (2.4528629779815674, 2.45279598236084, 2.452867031097412)}

In [6]:
trainset = data.build_full_trainset()
svd.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13421525370>

In [7]:
svd.predict(uid=5, iid=80)

Prediction(uid=5, iid=80, r_ui=None, est=4.278168078400911, details={'was_impossible': False})

In [8]:
def collab_filter_recommendation(userID):
    
    #find number of movies that userID have already rated
    df_rated = df[df['User_ID'] == userID]
    print('User {0} has already rated {1} movies.'.format(userID, df_rated.shape[0]))
    
    #create new dataframe with movies that have not been rated by userID
    df_not_rated = df[df['User_ID'] != userID]
    
    
    # Estimated prediction using svd
    df_not_rated['Estimated Prediction'] = df_not_rated['Movie_ID'].apply(lambda x: svd.predict(userID, x).est)
    # Rank movies according to the predicted values
    df_not_rated = df_not_rated.sort_values('Estimated Prediction', ascending=False)
    
    #drop duplicated movies
    df_final = df_not_rated.drop_duplicates(subset=['Movie_Title'], keep='first')
    df_final = df_final.reset_index(drop=True)
    
    print('================================================')
    
    print('Top 10 recommended movies that User {0} will hopefully enjoy: '.format(userID))
    
    # Display top 10 recommended movies user may enjoy 
    return df_final[{'Movie_Title', 'Estimated Prediction', 'Genres'}].head(10)

In [9]:
collab_filter_recommendation(5)

User 5 has already rated 66 movies.
Top 10 recommended movies that User 5 will hopefully enjoy: 


Unnamed: 0,Movie_Title,Genres,Estimated Prediction
0,"Shawshank Redemption, The (1994)",Crime|Drama,5.0
1,"Lord of the Rings: The Return of the King, The...",Action|Adventure|Drama|Fantasy,5.0
2,"Matrix, The (1999)",Action|Sci-Fi|Thriller,5.0
3,Schindler's List (1993),Drama|War,5.0
4,"Lord of the Rings: The Two Towers, The (2002)",Adventure|Fantasy,5.0
5,Band of Brothers (2001),Action|Drama|War,5.0
6,Cosmos (1980),Documentary,5.0
7,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy,4.992455
8,Saving Private Ryan (1998),Action|Drama|War,4.97513
9,Stop Making Sense (1984),Documentary|Musical,4.96729


In [11]:
collab_filter_recommendation(540)

User 540 has already rated 172 movies.
Top 10 recommended movies that User 540 will hopefully enjoy: 


Unnamed: 0,Genres,Movie_Title,Estimated Prediction
0,Adventure|Comedy|Fantasy,Monty Python and the Holy Grail (1975),4.295091
1,Comedy|War,Dr. Strangelove or: How I Learned to Stop Worr...,4.281151
2,Crime|Drama,"Godfather, The (1972)",4.271102
3,Comedy|Drama|Romance,City Lights (1931),4.269132
4,Comedy|Crime|Drama|Thriller,Pulp Fiction (1994),4.251288
5,Drama,One Flew Over the Cuckoo's Nest (1975),4.226419
6,Drama,12 Angry Men (1957),4.215578
7,Crime|Mystery|Thriller,"Usual Suspects, The (1995)",4.197413
8,Crime|Drama,"Shawshank Redemption, The (1994)",4.190592
9,Drama|Film-Noir|Romance,Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),4.186359


In [12]:
collab_filter_recommendation(710)

User 710 has already rated 958 movies.
Top 10 recommended movies that User 710 will hopefully enjoy: 


Unnamed: 0,Genres,Movie_Title,Estimated Prediction
0,Mystery|Thriller,Rear Window (1954),5.0
1,Crime|Film-Noir|Thriller,M (1931),4.988193
2,Drama,12 Angry Men (1957),4.98279
3,Drama|Mystery,Citizen Kane (1941),4.981735
4,Crime|Film-Noir|Mystery|Thriller,Chinatown (1974),4.981639
5,Crime|Drama,"Godfather, The (1972)",4.980816
6,Comedy|War,Dr. Strangelove or: How I Learned to Stop Worr...,4.975795
7,Crime|Drama|Mystery,Rashomon (RashÃ´mon) (1950),4.960415
8,Action|Adventure,Yojimbo (1961),4.956589
9,Drama|Romance,Casablanca (1942),4.948584
