# Importing Required Modules and loading CSV files into panda dataframes

In [None]:
import pandas as pd
import numpy as np
import pickle
from mat_fac import matrix_factorization as MF
import scipy.sparse as sp
import nimfa
loc = "./Datasets/Movie_Lens_100k(Small)/"
links = pd.read_csv(loc+"links.csv")
movies = pd.read_csv(loc+"movies.csv")
ratings = pd.read_csv(loc+"new_ratings.csv")
tags = pd.read_csv(loc+"tags.csv")

# Creating 'stars' Numpy 2-d array

This 2-d array will be fed into the matrix_factorization function. 
1. Number of rows = number of users 
2. Number of columns = number of movies
3. stars[user][movie] = rating given by the user

In [None]:
rows = ratings['userId'].max() #number of users starts from 1 and ends at 671, changing it to 0-670*, so total = 671
columns = len(movies.index)  #number of movies 0 to 9124, so total = 9125
stars = np.zeros((rows,columns))
stars_train = np.zeros((rows,columns))
stars_test = np.zeros((rows,columns))

# splitting for training and testing
msk = np.random.rand(len(ratings)) <= 0.7
ratings_train = ratings[msk]
ratings_test = ratings[~msk]

for r in ratings.index:
    ith= int(ratings.loc[r,'userId'])-1 # *changing it to 0-670 
    jth = int(ratings.loc[r,'movieId'])
    rated = ratings.loc[r,'rating']
    stars[ith,jth] = rated
    
for r in ratings_train.index:
    ith= int(ratings_train.loc[r,'userId'])-1 
    jth = int(ratings_train.loc[r,'movieId'])
    rated = ratings_train.loc[r,'rating']
    stars_train[ith,jth] = rated

# Calculating ' Estimated Ranking ' 2-d Array R

# Using NMF, Truncated SVD

Conversion of stars_train to sparse matrix required to be used in the algos

In [None]:
train = sp.lil_matrix(stars_train)

In [None]:
from scipy.sparse.linalg import svds
u,s, vt = svds(train, k = 500)
s_diag_matrix = np.zeros((s.shape[0], s.shape[0]))
for i in range(s.shape[0]):
    s_diag_matrix[i,i] = s[i]
Rt = np.dot(np.dot(u, s_diag_matrix), vt)
R1 = []
for i in Rt:
    i = ((i-min(i))/(max(i)-min(i)))*5
    R1.append(i)
R1 = np.array(R1)
print(R1)

In [None]:
import nmf
P,Q  = nmf.nmf(train,500,max_iter=100)
R2 = np.dot(P,Q)
print(R2)

In [None]:
R = (R1+R2)/2

# Pickling Rate matrix and Estimated Rating matrix

In [None]:
save_file = open("estimated_rating.pickle","wb")
pickle.dump(R,save_file)
save_file.close()

# Loading Pickles

In [None]:
saved_file = open("estimated_rating.pickle","rb")
R = pickle.load(saved_file)
saved_file.close()

# Returns recommendation based on user id

In [None]:
def recommend(user_id):
    for_user = np.argsort(-R[user_id])
    i ,j = 0,0
    recommendations = []
    while i<5:
        if stars[user_id][for_user[j]]==0.0:
            recommendations.append(movies.iloc[for_user[j]]['title'])
            i+=1
        j+=1
    return '\n'.join(recommendations)

In [None]:
print("Enter User_id: ")
print("Recommendations:\n"+recommend(int(input())-1))

# Accuracy

In [None]:
import math
rmse = 0
mae = 0
tot = 0
for i in ratings_train.index:
    ith= int(ratings_train.loc[i,'userId'])-1 
    jth = int(ratings_train.loc[i,'movieId'])
    rated = ratings_train.loc[i,'rating']
    e=abs(rated-R[ith][jth])
    mae+=e
    rmse+=math.pow(e,2)
    tot+=1
rmse = math.sqrt(rmse/tot)
mae/=tot
print(rmse)
print(mae)