In [67]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [68]:
#!wget http://files.grouplens.org/datasets/movielens/ml-latest.zip
#!unzip ml-latest.zip

In [69]:
movies_df = pd.read_csv('movies.csv',usecols=['movieId','title'])
ratings_df=pd.read_csv('ratings.csv',usecols=['userId', 'movieId', 'rating'])

In [70]:
df = pd.merge(rating_df,movies_df,on='movieId')
movies_df.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [71]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [72]:
df.head()

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,5,1,4.0,Toy Story (1995)
2,7,1,4.5,Toy Story (1995)
3,15,1,2.5,Toy Story (1995)
4,17,1,4.5,Toy Story (1995)


In [73]:
MovieRatings = df.dropna(axis = 0)
MovieRatings.head()

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,5,1,4.0,Toy Story (1995)
2,7,1,4.5,Toy Story (1995)
3,15,1,2.5,Toy Story (1995)
4,17,1,4.5,Toy Story (1995)


In [74]:
NoOfRatings = (MovieRatings.groupby(by = ['title'])['rating'].count().reset_index().rename(columns = {'rating': 'totalRatingCount'})[['title', 'totalRatingCount']])
NoOfRatings.head()

Unnamed: 0,title,totalRatingCount
0,'71 (2014),1
1,'Hellboy': The Seeds of Creation (2004),1
2,'Round Midnight (1986),2
3,'Salem's Lot (2004),1
4,'Til There Was You (1997),2


In [75]:
NoOfRatings['totalRatingCount'].describe()

count    9719.000000
mean       10.375141
std        22.406220
min         1.000000
25%         1.000000
50%         3.000000
75%         9.000000
max       329.000000
Name: totalRatingCount, dtype: float64

In [76]:
MovieRatings_NoOfRatings = MovieRatings.merge(NoOfRatings, left_on='title', right_on='title')
MovieRatings_NoOfRatings.head()

Unnamed: 0,userId,movieId,rating,title,totalRatingCount
0,1,1,4.0,Toy Story (1995),215
1,5,1,4.0,Toy Story (1995),215
2,7,1,4.5,Toy Story (1995),215
3,15,1,2.5,Toy Story (1995),215
4,17,1,4.5,Toy Story (1995),215


In [77]:
popularityThreshold = 50
popularMovies = MovieRatings_NoOfRatings.query('totalRatingCount >= @popularityThreshold')
popularMovies.head()

Unnamed: 0,userId,movieId,rating,title,totalRatingCount
0,1,1,4.0,Toy Story (1995),215
1,5,1,4.0,Toy Story (1995),215
2,7,1,4.5,Toy Story (1995),215
3,15,1,2.5,Toy Story (1995),215
4,17,1,4.5,Toy Story (1995),215


In [78]:
popularMovies.shape

(41362, 5)

In [79]:
#Preparing Pivot table
movieFeatures=popularMovies.pivot_table(index='title',columns='userId',values='rating').fillna(0)

In [80]:
movieFeatures.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10 Things I Hate About You (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,3.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
12 Angry Men (1957),0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001: A Space Odyssey (1968),0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,5.0,0.0,3.0,0.0,4.5
28 Days Later (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,5.0
300 (2007),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,5.0,0.0,4.0


In [81]:
movieFeatures_matrix = csr_matrix(movieFeatures.values)

In [82]:
model = NearestNeighbors(algorithm='brute')

In [83]:
model.fit(movieFeatures_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=None, n_neighbors=5, p=2, radius=1.0)

In [84]:
movieFeatures.shape

(450, 606)

In [86]:
movieFeatures.iloc[1,:].values.reshape(1,-1)

array([[0. , 0. , 0. , 5. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 4. , 0. , 5. , 0. , 0. , 0. , 0. , 3. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 2.5, 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 3. , 0. , 5. , 4. , 5. , 0. , 4.5,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 4. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 4. , 0. , 0. , 0. , 0. , 4.5,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 5. , 0. , 4.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 5. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 5. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 4.5, 0. , 0. , 0. 

In [87]:
distance, suggestion = model.kneighbors(movieFeatures.iloc[1,:].values.reshape(1,-1), n_neighbors=5)

In [88]:
suggestion

array([[  1,  86, 429, 439, 369]])

In [91]:
for i in suggestion[0]:
    print(movieFeatures.index[i])

12 Angry Men (1957)
Charlie and the Chocolate Factory (2005)
War of the Worlds (2005)
Wild Wild West (1999)
Space Jam (1996)


In [92]:
np.where(movieFeatures.index=="12 Angry Men (1957)")[0][0]

1

In [94]:
def suggest_recommendations_for(movieName):
    movieId = np.where(movieFeatures.index==movieName)[0][0]
    distance, suggestion = model.kneighbors(movieFeatures.iloc[movieId,:].values.reshape(1,-1), n_neighbors=5)
    for i in suggestion[0]:
        print(movieFeatures.index[i])

In [95]:
suggest_recommendations_for("Space Jam (1996)")

Space Jam (1996)
Wild Wild West (1999)
Hook (1991)
Day After Tomorrow, The (2004)
Last Action Hero (1993)
