In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import model_selection
from sklearn.model_selection import train_test_split
import math
import tensorflow as tf
from scipy.sparse import csr_matrix
from fastpivot import pivot_table, pivot_sparse
from sklearn.metrics import pairwise_distances
from surprise import KNNWithMeans
from scipy.sparse import csr_matrix, issparse 
from scipy.stats import pearsonr

In [2]:
movies = pd.read_csv('../datasets/movielens_original/movies.csv')
ratings = pd.read_csv('../datasets/movielens_original/ratings.csv')
df = ratings.merge(movies, how = 'inner', on='movieId')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,296,5.0,1147880044,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
1,3,296,5.0,1439474476,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
2,4,296,4.0,1573938898,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
3,5,296,4.0,830786155,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
4,7,296,4.0,835444730,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller


In [7]:
def create_X(df): 
    """ 
    Generates a sparse matrix from ratings dataframe. 
        
    Args: 
        df: pandas dataframe 
        
    Returns: 
        X: sparse matrix 
        user_mapper: dict that maps user id's to user indices 
        user_inv_mapper: dict that maps user indices to user id's 
        movie_mapper: dict that maps movie id's to movie indices 
        movie_inv_mapper: dict that maps movie indices to movie id's 
    """ 
    N = ratings['userId'].nunique() 
    M = ratings['movieId'].nunique() 

    user_mapper = dict(zip(np.unique(ratings["userId"]), list(range(N)))) 
    movie_mapper = dict(zip(np.unique(ratings["movieId"]), list(range(M)))) 
        
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"]))) 
    movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"]))) 
        
    user_index = [user_mapper[i] for i in df['userId']] 
    movie_index = [movie_mapper[i] for i in df['movieId']] 

    X = csr_matrix((ratings["rating"], (user_index, movie_index)), shape=(N, M)) 
        
    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper

In [8]:
X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_X(ratings)
user_movie_matrix = pd.DataFrame.sparse.from_spmatrix(X)

In [142]:
sampled = user_movie_matrix.sample(n = 7000, axis = 0)
indexes = sampled.index

In [143]:
user_input = pd.read_csv('../datasets/input/ratings_custom_wilson.csv')

In [144]:
inputted = pd.DataFrame(np.zeros((1, sampled.shape[1])))

for n in user_input.movieId:
    temp = movie_mapper[n]
    inputted[temp] = user_input.iloc[0]['rating']

In [145]:
user_similarity_cosine = cosine_similarity(sampled, inputted)
user_similarity_cosine = pd.DataFrame(user_similarity_cosine, index = indexes+1)

In [146]:
n = 10
similar_users = user_similarity_cosine.sort_values(by = 0, ascending=False)[:n]
# print(f'The similar users for user are', similar_users)

In [149]:
movieId = {}
title = []
watched_movieId = list(user_input.movieId)

for n in range(3):
    user_n = df[df.userId == similar_users.index[n]][['movieId', 'title', 'rating']]
    user_n = user_n[user_n.rating >= 4]
    for i in user_n.index:
        if user_n.loc[i].movieId not in movieId.keys() and user_n.loc[i].movieId not in watched_movieId:
            movieId[user_n.loc[i].movieId] = user_n.loc[i].rating
            title.append(user_n.loc[i].title)
        elif user_n.loc[i].movieId in movieId.keys() and user_n.loc[i].movieId not in watched_movieId:
            movieId[user_n.loc[i].movieId] = (movieId[user_n.loc[i].movieId] + user_n.loc[i].rating) / 2

recommended = pd.DataFrame([movieId.keys(), title,movieId.values()]).T
recommended.columns = ['movieId', 'title', 'rating']
recommended.sort_values(by='rating', ascending= False)

Unnamed: 0,movieId,title,rating
2,527,Schindler's List (1993),5.0
7,110102,Captain America: The Winter Soldier (2014),5.0
17,91974,Underworld: Awakening (2012),4.5
8,115617,Big Hero 6 (2014),4.5
28,69757,(500) Days of Summer (2009),4.5
26,68954,Up (2009),4.5
16,79132,Inception (2010),4.5
12,6754,Underworld (2003),4.5
33,168206,Amor de mis amores (2014),4.5
5,7153,"Lord of the Rings: The Return of the King, The...",4.5
