In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import codecs
%matplotlib inline

# set path
data_path = "ml-100k/u.data"
item_path = "item.txt"

# set data
df_data = pd.read_csv(data_path, sep='\t', names=['user_id','movie_id', 'rating', 'timestamp'])
df_movie = pd.read_csv("item.txt",sep='|', names=['movie_id','movie_title','release date','video release date',
                                          'IMDb URL','unknown','Action','Adventure','Animation',
                                          'Childrens','Comedy','Crime','Documentary','Drama','Fantasy',
                                          'Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller','War','Western'])
df_movie['movie_id'] = df_movie['movie_id'].apply(pd.to_numeric)
df_data = df_data.sort_values(by = ["user_id"], ascending = True)

#R(user_id, movie_id) = rating
R_df = df_data.pivot(index = 'user_id', columns = 'movie_id', values = 'rating').fillna(0)

# to numpy array
R = R_df.as_matrix()

# normalize by each users mean
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

from scipy.sparse.linalg import svds

# Singular value decomposition
U, sigma, Vt = svds(R_demeaned, k = 50)

# diagonal matrix
sigma_ = np.diag(sigma)

# Approximate matrix
"""
Originarlly, I should consider the change of RMSE for k, but since I consider that RMSE also
monotonically decreases as k approaches  the rank of R, I approximate k = 50 here.
""" 
all_user_predicted_ratings = np.dot(np.dot(U, sigma_), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)


def recommend_movies(predictions_df, user_id, df_movie, original_ratings_df, num_recommendations = 10):
    
    # Get and sort the user's predictions
    user_row_number = user_id - 1
    sorted_user_predictions = preds_df.iloc[user_row_number].sort_values(ascending = False)
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.user_id == (user_id)]
    user_full = (user_data.merge(df_movie, how = 'left', left_on = 'movie_id', right_on = 'movie_id').
                sort_values(['rating'], ascending = False)
                )
    
    print('User {0} has already rated {1} movies.'.format(user_id, user_full.shape[0]))
    print('Recommending highest {0} predicted ratings movies not already rated.'.format(num_recommendations))
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (df_movie[~df_movie['movie_id'].isin(user_full['movie_id'])].
                      merge(pd.DataFrame(sorted_user_predictions).reset_index(),how = 'left',
                           left_on = 'movie_id',
                           right_on = 'movie_id').
                      rename(columns = {user_row_number:'Predictions'}).
                      sort_values('Predictions', ascending = False).
                      iloc[:num_recommendations, :-1]
                      )
    return user_full, recommendations

In [3]:
already_rated, predictions = recommend_movies(preds_df, 100, df_movie, df_data, 10)

User 100 has already rated 59 movies.
Recommending highest 10 predicted ratings movies not already rated.


In [6]:
already_rated.head(10) # For comparison with predictions 

Unnamed: 0,user_id,movie_id,rating,timestamp,movie_title,release date,video release date,IMDb URL,unknown,Action,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
30,100,313,5,891374706,Titanic (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?imdb-title-12...,0,1,...,0,0,0,0,0,1,0,0,0,0
57,100,316,5,891375313,As Good As It Gets (1997),23-Dec-1997,,http://us.imdb.com/Title?As+Good+As+It+Gets+(1...,0,0,...,0,0,0,0,0,0,0,0,0,0
50,100,315,5,891375557,Apt Pupil (1998),23-Oct-1998,,http://us.imdb.com/Title?Apt+Pupil+(1998),0,0,...,0,0,0,0,0,0,0,1,0,0
27,100,750,4,891375016,Amistad (1997),18-Dec-1997,,http://us.imdb.com/M/title-exact?imdb-title-11...,0,0,...,0,0,0,0,0,0,0,0,0,0
21,100,691,4,891375260,Dark City (1998),09-Jan-1998,,http://us.imdb.com/M/title-exact?imdb-title-11...,0,0,...,0,1,0,0,0,0,1,1,0,0
20,100,751,4,891374868,Tomorrow Never Dies (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?imdb-title-12...,0,1,...,0,0,0,0,0,1,0,1,0,0
19,100,355,4,891375313,Sphere (1998),13-Feb-1998,,http://us.imdb.com/M/title-exact?Sphere+(1998),0,0,...,0,0,0,0,0,0,1,1,0,0
37,100,879,4,891374946,"Peacemaker, The (1997)",01-Jan-1997,,http://us.imdb.com/M/title-exact?Peacemaker%2C...,0,1,...,0,0,0,0,0,0,0,1,1,0
36,100,272,4,891375629,Good Will Hunting (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?imdb-title-11...,0,0,...,0,0,0,0,0,0,0,0,0,0
40,100,258,4,891374675,Contact (1997),11-Jul-1997,,http://us.imdb.com/Title?Contact+(1997/I),0,0,...,0,0,0,0,0,0,1,0,0,0


In [7]:
predictions

Unnamed: 0,movie_id,movie_title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
292,307,"Devil's Advocate, The (1997)",01-Jan-1997,,http://us.imdb.com/M/title-exact?Devil's+Advoc...,0,0,0,0,0,...,0,0,1,0,1,0,0,1,0,0
287,301,In & Out (1997),19-Sep-1997,,http://us.imdb.com/Title?In+%26+Out+(1997),0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
308,331,"Edge, The (1997)",26-Sep-1997,,http://us.imdb.com/M/title-exact?Edge%2C+The+(...,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
711,748,"Saint, The (1997)",14-Mar-1997,,http://us.imdb.com/M/title-exact?Saint%2C%20Th...,0,1,0,0,0,...,0,0,0,0,0,1,0,1,0,0
309,332,Kiss the Girls (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Kiss+the+Girl...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
317,343,Alien: Resurrection (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Alien%3A+Resu...,0,1,0,0,0,...,0,0,1,0,0,0,1,0,0,0
244,245,"Devil's Own, The (1997)",26-Mar-1997,,http://us.imdb.com/M/title-exact?Devil%27s%20O...,0,1,0,0,0,...,0,0,0,0,0,0,0,1,1,0
296,312,Midnight in the Garden of Good and Evil (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Midnight+in+t...,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
305,327,Cop Land (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Cop+Land+(1997),0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
289,304,Fly Away Home (1996),13-Sep-1996,,http://us.imdb.com/M/title-exact?Fly%20Away%20...,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
