In [1]:
import numpy as np
import pandas as pd

from scipy.sparse import csr_matrix

import pickle

# machine-learning libraries
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import pairwise
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
R_df = pd.read_csv("./data/movie_matrix.csv", index_col=0)
movies_title_df = pd.read_csv("./data/movie_title.csv", index_col=0)


In [3]:
R_df.columns = movies_title_df["title"]

In [4]:
R_df

title,Toy Story (1995),Grumpier Old Men (1995),Heat (1995),Seven (a.k.a. Se7en) (1995),"Usual Suspects, The (1995)",From Dusk Till Dawn (1996),Bottle Rocket (1996),Braveheart (1995),Rob Roy (1995),Desperado (1995),...,Freaky Friday (2003),Weekend at Bernie's (1989),"Darjeeling Limited, The (2007)","American Werewolf in London, An (1981)",eXistenZ (1999),Terms of Endearment (1983),Little Nicky (2000),Joe Dirt (2001),RoboCop 2 (1990),"Poseidon Adventure, The (1972)"
0,4.0,4.0,4.0,5.0,5.0,3.0,5.0,4.0,5.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,2.5,0.0,0.0,3.0,4.5,4.0,0.0,3.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0
606,4.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,2.5,2.0,0.0,4.5,4.5,3.0,0.0,4.0,0.0,3.0,...,0.0,0.5,0.0,0.0,4.5,0.0,2.5,1.0,1.5,0.0
608,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
def recommend_col(query, k=10):
    """
    Filters and recommends the top k movies for any given input query based on a trained NMF model. 
    Returns a list of k movie ids.
    """
    with open('./distance_recommender.pkl', 'rb') as file:
        loaded_model = pickle.load(file)
    
    # 1. construct new_user-item dataframe given the query
    new_user_df =  pd.DataFrame(query, columns=movies_title_df["title"], index=["new_user"]).fillna(0)

    # 2. scoring
    similarity_scores, neighbor_ids = loaded_model.kneighbors(
    new_user_df,
    n_neighbors=15,
    return_distance=True
    )  
        
    neighbors_df = pd.DataFrame(
    data = {'neighbor_id': neighbor_ids[0], 'similarity_score': similarity_scores[0]}
    )
    # 3. ranking
    neighborhood = R_df.iloc[neighbor_ids[0]]
    neighborhood_filtered = neighborhood.drop(query.keys(), axis=1)
    df_score = neighborhood_filtered.sum()
    df_score_ranked = df_score.sort_values(ascending=False).index.tolist()
    recommendations = df_score_ranked[:3]
    return recommendations #, df_score.sort_values(ascending=False)


In [24]:
query = {"Toy Story (1995)": 5,
         "Grumpier Old Men (1995)":2,
         "Heat (1995)":3.5}

In [25]:
recommend_col(query, k=10)

['Independence Day (a.k.a. ID4) (1996)',
 'Broken Arrow (1996)',
 'Twister (1996)']