# Importing Required Modules and loading CSV files into panda dataframes

In [2]:
import pandas as pd
import numpy as np
import pickle
loc = "./Datasets/Movie_Lens_100k(Small)/"
links = pd.read_csv(loc+"links.csv")
movies = pd.read_csv(loc+"movies.csv")
ratings = pd.read_csv(loc+"new_ratings.csv")
tags = pd.read_csv(loc+"tags.csv")

# Creating 'stars' Numpy 2-d array

This 2-d array will be fed into the matrix_factorization function. 
1. Number of rows = number of users 
2. Number of columns = number of movies
3. stars[user][movie] = rating given by the user

In [13]:
rows = ratings['userId'].max() #number of users starts from 1 and ends at 671, changing it to 0-670, so total = 671
columns = len(movies.index)  #number of movies 0 to 9124, so total = 9125
stars = np.zeros((rows,columns))
for r in ratings.index:
    ith= int(ratings.loc[r,'userId'])-1 
    jth = int(ratings.loc[r,'movieId'])
    rated = ratings.loc[r,'rating']
    stars[ith][jth] = rated

# Calculating ' Estimated Ranking ' 2-d Array R

In [59]:
from sklearn.decomposition import NMF
model = NMF(n_components=10,max_iter=1000)
P = model.fit_transform(stars)
Q = model.fit_transform(stars.T)
R = np.dot(P,Q.T)

In [71]:
print(np.argsort(-R[0])[:5])
print(movies.iloc[535]['title'])
print(ratings[['userId','movieId','rating']].head())

[ 744  733  951 1006  535]
Fargo (1996)
   userId  movieId  rating
0       1       30     2.5
1       1      833     3.0
2       1      859     3.0
3       1      906     2.0
4       1      931     4.0


# Pickling Rate matrix and Estimated Rating matrix

In [54]:
save_file = open("rating.pickle","wb")
pickle.dump(stars,save_file)
save_file.close()
save_file = open("estimated_rating.pickle","wb")
pickle.dump(R,save_file)
save_file.close()

# Loading Pickles

In [55]:
saved_file = open("rating.pickle","rb")
stars = pickle.load(saved_file)
saved_file = open("estimated_rating.pickle","rb")
R = pickle.load(saved_file)
saved_file.close()

# Returns recommendation based on user id

In [56]:
def recommend(user_id):
    for_user = np.argsort(-R[user_id])
    i ,j = 0,0
    recommendations = []
    while i<5:
        if stars[user_id][for_user[j]]==0.0:
            recommendations.append(movies.iloc[for_user[j]]['title'])
            i+=1
        j+=1
    return '\n'.join(recommendations)

In [64]:
print("Enter User_id: ")
print("Recommendations:\n"+recommend(int(input())-1))

Enter User_id: 
1
Recommendations:
Citizen Kane (1941)
Casablanca (1942)
One Flew Over the Cuckoo's Nest (1975)
Chinatown (1974)
Fargo (1996)


# Accuracy

In [67]:
msk = np.random.rand(len(ratings)) < 0.7
ratings_train = ratings[msk]
ratings_train = ratings_train[['userId','movieId','rating']]
ratings_test = ratings[~msk]
ratings_test = ratings_test[['userId','movieId','rating']]