In [15]:
# Source: https://github.com/rakeshranjan001/Movie-Recommendation-System-Collaborative-filtering-MovieLens-100k-/blob/master/Python%20Code.py

In [16]:
import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine, correlation

#Loading movielens data

#User's data
users_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv('ml-100k/u.user', sep='|', names=users_cols, parse_dates=True) 
#Ratings
rating_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=rating_cols)
#Movies
movie_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url']
movies = pd.read_csv('ml-100k/u.item', sep='|', names=movie_cols, usecols=range(5),encoding='latin-1')

In [17]:
#Merging movie data with their ratings
movie_ratings = pd.merge(movies, ratings)
#merging movie_ratings data with the User's dataframe
df = pd.merge(movie_ratings, users)

In [18]:
df.head()

Unnamed: 0,movie_id,title,release_date,video_release_date,imdb_url,user_id,rating,unix_timestamp,age,sex,occupation,zip_code
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,308,4,887736532,60,M,retired,95076
1,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,308,5,887737890,60,M,retired,95076
2,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),308,4,887739608,60,M,retired,95076
3,7,Twelve Monkeys (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Twelve%20Monk...,308,4,887738847,60,M,retired,95076
4,8,Babe (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Babe%20(1995),308,5,887736696,60,M,retired,95076


In [19]:
#pre-processing
#dropping colums that aren't needed
df.drop(df.columns[[3,4,7]], axis=1, inplace=True)
ratings.drop( "unix_timestamp", inplace = True, axis = 1 ) 
movies.drop(movies.columns[[3,4]], inplace = True, axis = 1 )

In [20]:
df.head()

Unnamed: 0,movie_id,title,release_date,user_id,rating,age,sex,occupation,zip_code
0,1,Toy Story (1995),01-Jan-1995,308,4,60,M,retired,95076
1,4,Get Shorty (1995),01-Jan-1995,308,5,60,M,retired,95076
2,5,Copycat (1995),01-Jan-1995,308,4,60,M,retired,95076
3,7,Twelve Monkeys (1995),01-Jan-1995,308,4,60,M,retired,95076
4,8,Babe (1995),01-Jan-1995,308,5,60,M,retired,95076


In [21]:
#Pivot Table(This creates a matrix of users and movie_ratings)
ratings_matrix = ratings.pivot_table(index=['movie_id'],columns=['user_id'],values='rating').reset_index(drop=True)
ratings_matrix.fillna( 0, inplace = True )


In [23]:
#Cosine Similarity(Creates a cosine matrix of similaraties ..... which is the pairwise distances
# between two items )

movie_similarity = 1 - pairwise_distances( ratings_matrix.values, metric="cosine" )
np.fill_diagonal( movie_similarity, 0 ) 
ratings_matrix = pd.DataFrame( movie_similarity )

In [24]:
ratings_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.000000,0.402382,0.330245,0.454938,0.286714,0.116344,0.620979,0.481114,0.496288,0.273935,...,0.035387,0.0,0.000000,0.000000,0.035387,0.0,0.0,0.0,0.047183,0.047183
1,0.402382,0.000000,0.273069,0.502571,0.318836,0.083563,0.383403,0.337002,0.255252,0.171082,...,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.078299,0.078299
2,0.330245,0.273069,0.000000,0.324866,0.212957,0.106722,0.372921,0.200794,0.273669,0.158104,...,0.000000,0.0,0.000000,0.000000,0.032292,0.0,0.0,0.0,0.000000,0.096875
3,0.454938,0.502571,0.324866,0.000000,0.334239,0.090308,0.489283,0.490236,0.419044,0.252561,...,0.000000,0.0,0.094022,0.094022,0.037609,0.0,0.0,0.0,0.056413,0.075218
4,0.286714,0.318836,0.212957,0.334239,0.000000,0.037299,0.334769,0.259161,0.272448,0.055453,...,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.094211
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.0,1.0,1.0,0.000000,0.000000
1678,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,1.0,0.0,1.0,0.000000,0.000000
1679,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,1.0,1.0,0.0,0.000000,0.000000
1680,0.047183,0.078299,0.000000,0.056413,0.000000,0.000000,0.051498,0.082033,0.057360,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000


In [48]:
movies.sample(10)

Unnamed: 0,movie_id,title,release_date,similarity
678,679,Conan the Barbarian (1981),01-Jan-1981,0.091899
1128,1129,Chungking Express (1994),16-Feb-1996,0.067402
949,950,Georgia (1995),01-Jan-1995,0.074231
349,350,Fallen (1998),16-Jan-1998,0.048087
1666,1667,"Next Step, The (1995)",13-Jun-1997,0.0
947,948,Booty Call (1997),28-Feb-1997,0.018525
1595,1596,Nemesis 2: Nebula (1995),01-Jan-1995,0.0
254,255,My Best Friend's Wedding (1997),20-Jun-1997,0.120875
1097,1098,Flirting With Disaster (1996),22-Mar-1996,0.288783
253,254,Batman & Robin (1997),20-Jun-1997,0.039369


In [53]:
#Recommender

try:
    #user_inp=input('Enter the reference movie title based on which recommendations are to be made: ')
    user_inp="Star Wars (1977)"
    inp=movies[movies['title']==user_inp].index.tolist()
    inp=inp[0]
    
    movies['similarity'] = ratings_matrix.iloc[inp]
    movies.columns = ['movie_id', 'title', 'release_date','similarity']
    movies.head(5)
    
except:
    print("Sorry, the movie is not in the database!")

In [54]:
print("Recommended movies based on your choice of ",user_inp ,": \n", movies.sort_values( ["similarity"], ascending = False )[1:10])

Recommended movies based on your choice of  Star Wars (1977) : 
      movie_id                                      title release_date  \
173       174             Raiders of the Lost Ark (1981)  01-Jan-1981   
171       172            Empire Strikes Back, The (1980)  01-Jan-1980   
0           1                           Toy Story (1995)  01-Jan-1995   
126       127                      Godfather, The (1972)  01-Jan-1972   
120       121              Independence Day (ID4) (1996)  03-Jul-1996   
209       210  Indiana Jones and the Last Crusade (1989)  01-Jan-1989   
99        100                               Fargo (1996)  14-Feb-1997   
97         98           Silence of the Lambs, The (1991)  01-Jan-1991   
221       222            Star Trek: First Contact (1996)  22-Nov-1996   

     similarity  
173    0.764885  
171    0.749819  
0      0.734572  
126    0.697332  
120    0.692837  
209    0.689343  
99     0.686533  
97     0.676428  
221    0.673975  
