## Item based Recommendation system using cosine metric

In [1]:
import pandas as pd
import numpy as np

In [3]:
users = pd.read_csv('u.user', sep='|', names=['user_id','age','gender','occupation','zip_code'])
ratings = pd.read_csv('u.data', sep='\t', names=['user_id','item_id','rating','timestamp'])
movies = pd.read_csv('u.item', sep='|', names=['movie_id','movie_title','release_date','video_release_date','IMDb URL','unknown','Action','Adventure','Animation','Children','Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller','War','Western'], encoding='latin-1')
movies = movies.iloc[:,[0,1]]

In [4]:
users.head()

Unnamed: 0,user_id,age,gender,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [5]:
ratings.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [6]:
movies.head()

Unnamed: 0,movie_id,movie_title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


Ratings dataframe looks like Association matrix

In [10]:
#build association matrix - we need pivot table

learningmatrix = ratings.pivot_table(index=['item_id'],columns=['user_id'],values='rating').reset_index(drop=True)
learningmatrix

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
0,5.0,4.0,,,4.0,4.0,,,,4.0,...,2.0,3.0,4.0,,4.0,,,5.0,,
1,3.0,,,,3.0,,,,,,...,4.0,,,,,,,,,5.0
2,4.0,,,,,,,,,,...,,,4.0,,,,,,,
3,3.0,,,,,,5.0,,,4.0,...,5.0,,,,,,2.0,,,
4,3.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,,,,,,,,,,,...,,,,,,,,,,
1678,,,,,,,,,,,...,,,,,,,,,,
1679,,,,,,,,,,,...,,,,,,,,,,
1680,,,,,,,,,,,...,,,,,,,,,,


In [12]:
# Treat NaN (NULL) values

learningmatrix.fillna(0,inplace=True)
learningmatrix.head()


user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
0,5.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,4.0,...,2.0,3.0,4.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0
1,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,...,5.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
4,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
## Figure out simialrity between items and come up with similarity matrix

from sklearn.metrics import pairwise_distances

movie_similarity = 1- pairwise_distances(learningmatrix,metric='cosine')

np.fill_diagonal(movie_similarity,0)

ratings_matrix = pd.DataFrame(movie_similarity)
ratings_matrix.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.0,0.402382,0.330245,0.454938,0.286714,0.116344,0.620979,0.481114,0.496288,0.273935,...,0.035387,0.0,0.0,0.0,0.035387,0.0,0.0,0.0,0.047183,0.047183
1,0.402382,0.0,0.273069,0.502571,0.318836,0.083563,0.383403,0.337002,0.255252,0.171082,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.078299,0.078299
2,0.330245,0.273069,0.0,0.324866,0.212957,0.106722,0.372921,0.200794,0.273669,0.158104,...,0.0,0.0,0.0,0.0,0.032292,0.0,0.0,0.0,0.0,0.096875
3,0.454938,0.502571,0.324866,0.0,0.334239,0.090308,0.489283,0.490236,0.419044,0.252561,...,0.0,0.0,0.094022,0.094022,0.037609,0.0,0.0,0.0,0.056413,0.075218
4,0.286714,0.318836,0.212957,0.334239,0.0,0.037299,0.334769,0.259161,0.272448,0.055453,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.094211


In [16]:
try:
    user_inp=input('Enter the reference movie title based on which recommendations are to be made: ')
    inp=movies[movies['movie_title']==user_inp].index.tolist()
   
    inp=inp[0]
   
   
    movies['similarity'] = ratings_matrix.iloc[inp]
    #print(movies.head(5))
    print("Recommended movies based on your choice of ",user_inp ,": \n", movies.sort_values( ["similarity"], ascending = False )[1:10])
   
except:
    print("The movie name you have entered does not exist in the list, however, below are the top movies recommended in general")
    print(movies.head(10))

Enter the reference movie title based on which recommendations are to be made: Copycat (1995)
Recommended movies based on your choice of  Copycat (1995) : 
      movie_id                        movie_title  similarity
218       219  Nightmare on Elm Street, A (1984)    0.472725
53         54                    Outbreak (1995)    0.472399
233       234                        Jaws (1975)    0.450780
52         53        Natural Born Killers (1994)    0.445242
97         98   Silence of the Lambs, The (1991)    0.440996
671       672                    Candyman (1992)    0.435349
199       200                Shining, The (1980)    0.430292
664       665                     Alien 3 (1992)    0.425298
558       559  Interview with the Vampire (1994)    0.424652
