In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

from pandas import Series, DataFrame
from matplotlib import rcParams

In [2]:
%matplotlib inline
rcParams['figure.figsize'] = 5, 4
sb.set_style('whitegrid')

In [3]:
ratings_list = [i.strip().split("::") for i in open('/home/dl/recommend/ml-1m/ratings.dat', 'r').readlines()]
movies_list = [i.strip().split("::") for i in open('/home/dl/recommend/ml-1m/movies.dat', 'r', encoding = "ISO-8859-1").readlines()]
users_list = [i.strip().split("::") for i in open('/home/dl/recommend/ml-1m/users.dat', 'r').readlines()]

In [4]:
ratings = np.array(ratings_list)
movies = np.array(movies_list)
users = np.array(users_list)

In [5]:
ratings_df = pd.DataFrame(ratings, columns = ['UserID', 'MovieID', 'Rating', 'Timestamp'], dtype= int)
movies_df = pd.DataFrame(movies, columns = ['MovieID', 'Title', 'Genres'])
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)

In [6]:
ratings_df.head()

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [7]:
R_df = ratings_df.pivot(index = 'UserID', columns ='MovieID', values = 'Rating').fillna(0)
R_df.head()

MovieID,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
R = R_df.as_matrix()
user_ratings_mean = np.mean(R, axis = 1)
R_normalized = R - user_ratings_mean.reshape(-1, 1)

In [9]:
from scipy.sparse.linalg import svds
U, sigma, V = svds(R_normalized, k = 50)

In [10]:
sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), V) + user_ratings_mean.reshape(-1, 1)

In [11]:
pred_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)
pred_df.head()

MovieID,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
0,4.288861,0.143055,-0.19508,-0.018843,0.012232,-0.176604,-0.07412,0.141358,-0.059553,-0.19595,...,0.027807,0.00164,0.026395,-0.022024,-0.085415,0.403529,0.105579,0.031912,0.05045,0.08891
1,0.744716,0.169659,0.335418,0.000758,0.022475,1.35305,0.051426,0.071258,0.161601,1.567246,...,-0.056502,-0.013733,-0.01058,0.062576,-0.016248,0.15579,-0.418737,-0.101102,-0.054098,-0.140188
2,1.818824,0.456136,0.090978,-0.043037,-0.025694,-0.158617,-0.131778,0.098977,0.030551,0.73547,...,0.040481,-0.005301,0.012832,0.029349,0.020866,0.121532,0.076205,0.012345,0.015148,-0.109956
3,0.408057,-0.07296,0.039642,0.089363,0.04195,0.237753,-0.049426,0.009467,0.045469,-0.11137,...,0.008571,-0.005425,-0.0085,-0.003417,-0.083982,0.094512,0.057557,-0.02605,0.014841,-0.034224
4,1.574272,0.021239,-0.0513,0.246884,-0.032406,1.552281,-0.19963,-0.01492,-0.060498,0.450512,...,0.110151,0.04601,0.006934,-0.01594,-0.05008,-0.052539,0.507189,0.03383,0.125706,0.199244


In [24]:
def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5):
    
    # Get and sort the user's predictions
    user_row_number = userID - 1 # UserID starts at 1, not 0
    sorted_user_predictions = pred_df.iloc[user_row_number].sort_values(ascending=False) # UserID starts at 1
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.UserID == (userID)]
    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'MovieID', right_on = 'MovieID').
                     sort_values(['Rating'], ascending=False)
                 )

    print ("User {0} has already rated {1} movies.".format(userID, user_full.shape[0]))
    print ("Recommending highest {0} predicted ratings movies not already rated.".format(num_recommendations))
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies_df[~movies_df['MovieID'].isin(user_full['MovieID'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'MovieID',
               right_on = 'MovieID').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations

In [26]:
already_rated, predictions = recommend_movies(pred_df, 773, movies_df, ratings_df, 25)

User 773 has already rated 49 movies.
Recommending highest 25 predicted ratings movies not already rated.


In [27]:
already_rated.head(25)

Unnamed: 0,UserID,MovieID,Rating,Timestamp,Title,Genres
1,773,589,5,975617233,Terminator 2: Judgment Day (1991),Action|Sci-Fi|Thriller
32,773,2571,5,975438546,"Matrix, The (1999)",Action|Sci-Fi|Thriller
40,773,2762,5,975441294,"Sixth Sense, The (1999)",Thriller
37,773,3554,5,975441454,Love and Basketball (2000),Drama|Romance
0,773,3793,4,975441350,X-Men (2000),Action|Sci-Fi
35,773,2580,4,975441077,Go (1999),Crime
25,773,2394,4,975441161,"Prince of Egypt, The (1998)",Animation|Musical
29,773,1911,4,975441030,Doctor Dolittle (1998),Comedy
30,773,2714,4,975441350,"Wood, The (1999)",Drama
33,773,163,4,975438520,Desperado (1995),Action|Romance|Thriller


In [28]:
predictions

Unnamed: 0,MovieID,Title,Genres
1208,1240,"Terminator, The (1984)",Action|Sci-Fi|Thriller
3465,3578,Gladiator (2000),Action|Drama
3064,3175,Galaxy Quest (1999),Adventure|Comedy|Sci-Fi
2588,2683,Austin Powers: The Spy Who Shagged Me (1999),Comedy
1184,1214,Alien (1979),Action|Horror|Sci-Fi|Thriller
2608,2706,American Pie (1999),Comedy
2271,2355,"Bug's Life, A (1998)",Animation|Children's|Comedy
584,593,"Silence of the Lambs, The (1991)",Drama|Thriller
2971,3081,Sleepy Hollow (1999),Horror|Romance
2533,2628,Star Wars: Episode I - The Phantom Menace (1999),Action|Adventure|Fantasy|Sci-Fi
