# Movies Recommender 

The npz file format is a compressed file format that is used to store multiple numpy arrays in a single file

In [1]:
import numpy as np
import pandas as pd

In [2]:
movie_titles = np.load('movie_titles.npz')
movie_recommend = np.load('movie_recommend.npz')

In [3]:
movie_recommend.files                        #use .files to see the arrays inside

['format', 'shape', 'data', 'row', 'col']

In [4]:
movie_titles.files                            #use .files to see the arrays inside

['titles']

In [5]:
titles = movie_titles['titles']               #extract all the arrays from the given files
print(titles.shape)

(3706,)


In [6]:
format = movie_recommend['format']            #extract all the arrays from the given files
print(format)

coo


In [7]:
shape = movie_recommend['shape']             #extract all the arrays from the given files
print(shape)

[6040 3706]


In [8]:
data = movie_recommend['data']             #extract all the arrays from the given files
print(data.shape)

(1000209,)


In [9]:
row = movie_recommend['row']               #extract all the arrays from the given files
print(row.shape)

(1000209,)


In [10]:
col = movie_recommend['col']              #extract all the arrays from the given files
print(col.shape)

(1000209,)


# COO matrix

Coordinate list format or COO format stores data as a list of tuple with three elements; row, column, value. The first element is row index, the second element is column index, and the third element is the value to be stored in the row and column

In [11]:
from scipy.sparse import coo_matrix
data_matrix = coo_matrix((data, (row, col)))

In [12]:
data_matrix.shape

(6040, 3706)

In [13]:
data_matrix.dtype

dtype('int8')

# Apply SVD

In [14]:
dataset = data_matrix.toarray()      #convert the coo matrix to an array 

In [15]:
dataset_mean = np.mean(dataset, axis=1)            #normalize the dataset substracting the mean of the rows
dataset_norm = dataset - dataset_mean[:, None]

In [16]:
# u, sigma, v = np.linalg.svd(dataset_norm, full_matrices=False)   #calculate the SVD
# s = np.diag(sigma)

In [17]:
# k=51                         #truncate at k
# u_t = u[:, :k]
# s_t = s[0:k, :k]
# v_t = v[:k, :]

# dataset_trunc = u_t @ s_t @ v_t

In [None]:
# dataset_trunc.shape

(6040, 3706)

# The function that recommends

In [63]:
# def recommendator(user_ratings, num_recomendaciones=5):
#     user_calc = dataset_trunc[user_ratings] @ v_t.T
#     user_sort = np.argsort(-user_calc, axis=1)
#     selection = [titles[user_sort[i, :num_recomendaciones]] for i in range(len(user_ratings))]
#     return selection

In [64]:
# recommendator([1,2,3])

[array(["Bug's Life, A (1998)", 'Princess Bride, The (1987)',
        'Airplane! (1980)', 'Bambi (1942)',
        'Wallace & Gromit: The Best of Aardman Animation (1996)'],
       dtype='<U75'),
 array(['Girl, Interrupted (1999)', 'E.T. the Extra-Terrestrial (1982)',
        'Rain Man (1988)', 'Saving Private Ryan (1998)',
        'Airplane! (1980)'], dtype='<U75'),
 array(['My Fair Lady (1964)', 'Tarzan (1999)',
        'Star Wars: Episode IV - A New Hope (1977)',
        'Girl, Interrupted (1999)', 'Princess Bride, The (1987)'],
       dtype='<U75')]

In [18]:
def recommendator(user_ratings, ratio):
    u, sigma, v = np.linalg.svd(dataset_norm, full_matrices=False)   #calculate the SVD
    s = np.diag(sigma)
    
    k=int(ratio * np.shape(s)[0]) #truncate at k
    
    u_t = u[:, :k]
    s_t = s[0:k, :k]
    v_t = v[:k, :]
    
    dataset_trunc = u_t @ s_t @ v_t
    
    user_calc = dataset_trunc[user_ratings] @ v_t.T
    user_sort = np.argsort(-user_calc, axis=1)
    num_rec = 5
    selection = [titles[user_sort[i, :num_rec]] for i in range(len(user_ratings))]
    return selection

In [19]:
recommendator([1,2,3],0.3)

[array(["Bug's Life, A (1998)", 'Princess Bride, The (1987)',
        'Airplane! (1980)', 'Bambi (1942)',
        'Wallace & Gromit: The Best of Aardman Animation (1996)'],
       dtype='<U75'),
 array(['Girl, Interrupted (1999)', 'E.T. the Extra-Terrestrial (1982)',
        'Rain Man (1988)', 'Courage Under Fire (1996)', 'Fargo (1996)'],
       dtype='<U75'),
 array(['My Fair Lady (1964)', 'Tarzan (1999)',
        'Star Wars: Episode IV - A New Hope (1977)',
        'Girl, Interrupted (1999)', 'Yojimbo (1961)'], dtype='<U75')]