# CA05-B - kNN Based Recommender Engine
### Vania Revelina

In [1]:
# import pandas package
import pandas as pd

# read csv file into dataframe: movies
movies = pd.read_csv('https://github.com/ArinB/CA05-kNN/raw/master/movies_recommendation_data.csv')

# look at first 5 rows of the original dataframe
movies.head()

Unnamed: 0,Movie ID,Movie Name,IMDB Rating,Biography,Drama,Thriller,Comedy,Crime,Mystery,History,Label
0,58,The Imitation Game,8.0,1,1,1,0,0,0,0,0
1,8,Ex Machina,7.7,0,1,0,0,0,1,0,0
2,46,A Beautiful Mind,8.2,1,1,0,0,0,0,0,0
3,62,Good Will Hunting,8.3,0,1,0,0,0,0,0,0
4,97,Forrest Gump,8.8,0,1,0,0,0,0,0,0


In [2]:
# set the index of the dataframe to the Movie Names
movies.set_index('Movie Name', inplace=True)

# drop Movie ID and label
# we don't need these columns as they are not attributes of the movie
movies.drop(['Movie ID','Label'], axis=1, inplace=True)

# look at first 5 rows
movies.head()

Unnamed: 0_level_0,IMDB Rating,Biography,Drama,Thriller,Comedy,Crime,Mystery,History
Movie Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
The Imitation Game,8.0,1,1,1,0,0,0,0
Ex Machina,7.7,0,1,0,0,0,1,0
A Beautiful Mind,8.2,1,1,0,0,0,0,0
Good Will Hunting,8.3,0,1,0,0,0,0,0
Forrest Gump,8.8,0,1,0,0,0,0,0


In [3]:
# import NearestNeighbors package for our model
from sklearn.neighbors import NearestNeighbors

# instantiate the NearestNeighbors model: it's an unsupervised learner for implementing neighbor searches
# our goal is to find the 5 nearest neighbor (5 most similar movies) to 'The Post'
mod = NearestNeighbors(metric='cosine', algorithm='brute')

# fit the dataframe containing all the movies
mod.fit(movies)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [4]:
# for easier readability and reusability, I'll create a function
# the argument will be the (movie name, attribute values)
# attribute values in order of: IMDB Rating, Biography, Drama, Thriller, Comedy, Crime, Mystery, History

def get_similar_movies(movie_name, movie_attributes):
    '''
    movie name type: str
    movie_ attributes type: nested list
    '''
    # look for the 5 nearest neighbors (5 movies with most similar attributes) to 'The Post' [n_neighbors hyperparameter]
    # then assign the distance and index of each movie into variables distances and indices
    distances, indices = mod.kneighbors(movie_attributes, n_neighbors=5)

    # print the 5 most similar movies and their distances
    print('Similar movies to {}...\n'.format(movie_name))
    for i in indices[0]:
        print('  ',movies.index[i])
        
# call the function
get_similar_movies('The Post', [[7.2,1,1,0,0,0,0,1]])

Similar movies to The Post...

   12 Years a Slave
   Hacksaw Ridge
   Queen of Katwe
   The Wind Rises
   A Beautiful Mind
