## Workshop Surprise

In [1]:
import pandas as pd
import numpy as np

from surprise import SVD
from surprise import accuracy, Dataset
from surprise.model_selection import cross_validate, train_test_split

> **Para este workshop, vamos trabalhar com o dataset do Movielens.**
> * https://grouplens.org/datasets/movielens/100k/
> * https://files.grouplens.org/datasets/movielens/ml-100k-README.txt
> 
> Para fins didáticos, o Surprise faz o download do dataset (MovieLens) automaticamente.

In [2]:
data = Dataset.load_builtin('ml-100k')
data.ratings_file

Dataset ml-100k could not be found. Do you want to download it? [Y/n] 
Trying to download dataset from http://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k


'/root/.surprise_data/ml-100k/ml-100k/u.data'

> **Atenção para o caminho acima. É onde encontra-se o dataset. Se você rodou via Docker, estará dentro do container. Caso contrário, o caminho será outro.**

In [3]:
data

<surprise.dataset.DatasetAutoFolds at 0x7f966406abd0>

In [4]:
data.ratings_file

'/root/.surprise_data/ml-100k/ml-100k/u.data'

> **De posse do dataset, vamos criar um DataFrame para visualizar os registros.**

In [5]:
ratings_df = pd.read_csv(data.ratings_file, sep="\t", names=["userId", "itemId", "rating", "timestamp"])
ratings_df

Unnamed: 0,userId,itemId,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596
...,...,...,...,...
99995,880,476,3,880175444
99996,716,204,5,879795543
99997,276,1090,1,874795795
99998,13,225,2,882399156


> **Como vimos em sala, Sistemas de Recomendação utilizam uma matriz R (Ratings). Vamos criá-la com base no DataFrame.**

In [6]:
ratings_matrix = ratings_df.pivot_table(index=['userId'], columns='itemId', values='rating')
ratings_matrix

itemId,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,...,,,,,,,,,,
940,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,
941,5.0,,,,,,4.0,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


> # É hora de praticar alguns conceitos...

> Quantos e quais filmes o usuário "940" deu nota?

In [7]:
rated_items = [
    (x, y) for x, y 
    in ratings_matrix.loc[940].iteritems() 
    if y > 0
]
rated_items

[(4, 2.0),
 (7, 4.0),
 (8, 5.0),
 (9, 3.0),
 (12, 4.0),
 (14, 3.0),
 (47, 3.0),
 (50, 4.0),
 (56, 5.0),
 (66, 4.0),
 (69, 2.0),
 (70, 3.0),
 (82, 4.0),
 (89, 4.0),
 (95, 5.0),
 (96, 5.0),
 (98, 4.0),
 (100, 3.0),
 (116, 2.0),
 (137, 3.0),
 (147, 4.0),
 (150, 3.0),
 (151, 3.0),
 (153, 2.0),
 (161, 3.0),
 (164, 2.0),
 (168, 3.0),
 (170, 4.0),
 (171, 2.0),
 (172, 4.0),
 (173, 4.0),
 (174, 4.0),
 (176, 4.0),
 (181, 3.0),
 (183, 3.0),
 (191, 4.0),
 (193, 3.0),
 (194, 5.0),
 (200, 3.0),
 (204, 4.0),
 (205, 3.0),
 (209, 4.0),
 (213, 4.0),
 (215, 2.0),
 (216, 4.0),
 (238, 4.0),
 (258, 5.0),
 (259, 4.0),
 (264, 1.0),
 (269, 4.0),
 (271, 2.0),
 (272, 4.0),
 (285, 4.0),
 (286, 3.0),
 (289, 3.0),
 (294, 4.0),
 (300, 5.0),
 (301, 3.0),
 (302, 4.0),
 (310, 3.0),
 (313, 5.0),
 (315, 4.0),
 (316, 4.0),
 (317, 4.0),
 (319, 2.0),
 (321, 4.0),
 (343, 2.0),
 (347, 3.0),
 (354, 5.0),
 (355, 1.0),
 (357, 4.0),
 (358, 1.0),
 (382, 3.0),
 (420, 4.0),
 (427, 5.0),
 (430, 4.0),
 (436, 4.0),
 (471, 4.0),
 (474, 

In [8]:
len(rated_items)

107

> Quais filmes usuário 940 gostou mais (nota > 3.0)?

In [9]:
liked_items = [x for x in rated_items if x[1] > 3.0]
liked_items

[(7, 4.0),
 (8, 5.0),
 (12, 4.0),
 (50, 4.0),
 (56, 5.0),
 (66, 4.0),
 (82, 4.0),
 (89, 4.0),
 (95, 5.0),
 (96, 5.0),
 (98, 4.0),
 (147, 4.0),
 (170, 4.0),
 (172, 4.0),
 (173, 4.0),
 (174, 4.0),
 (176, 4.0),
 (191, 4.0),
 (194, 5.0),
 (204, 4.0),
 (209, 4.0),
 (213, 4.0),
 (216, 4.0),
 (238, 4.0),
 (258, 5.0),
 (259, 4.0),
 (269, 4.0),
 (272, 4.0),
 (285, 4.0),
 (294, 4.0),
 (300, 5.0),
 (302, 4.0),
 (313, 5.0),
 (315, 4.0),
 (316, 4.0),
 (317, 4.0),
 (321, 4.0),
 (354, 5.0),
 (357, 4.0),
 (420, 4.0),
 (427, 5.0),
 (430, 4.0),
 (436, 4.0),
 (471, 4.0),
 (482, 5.0),
 (508, 5.0),
 (516, 4.0),
 (521, 4.0),
 (628, 4.0),
 (651, 4.0),
 (655, 4.0),
 (657, 4.0),
 (678, 4.0),
 (692, 4.0),
 (709, 5.0),
 (855, 5.0),
 (1167, 4.0)]

> **Quais itens o usuário 940 não deu a nota?**

In [10]:
not_rated_items = [(x, y) for x, y in ratings_matrix.loc[940].iteritems() if np.isnan(y)]
not_rated_items

[(1, nan),
 (2, nan),
 (3, nan),
 (5, nan),
 (6, nan),
 (10, nan),
 (11, nan),
 (13, nan),
 (15, nan),
 (16, nan),
 (17, nan),
 (18, nan),
 (19, nan),
 (20, nan),
 (21, nan),
 (22, nan),
 (23, nan),
 (24, nan),
 (25, nan),
 (26, nan),
 (27, nan),
 (28, nan),
 (29, nan),
 (30, nan),
 (31, nan),
 (32, nan),
 (33, nan),
 (34, nan),
 (35, nan),
 (36, nan),
 (37, nan),
 (38, nan),
 (39, nan),
 (40, nan),
 (41, nan),
 (42, nan),
 (43, nan),
 (44, nan),
 (45, nan),
 (46, nan),
 (48, nan),
 (49, nan),
 (51, nan),
 (52, nan),
 (53, nan),
 (54, nan),
 (55, nan),
 (57, nan),
 (58, nan),
 (59, nan),
 (60, nan),
 (61, nan),
 (62, nan),
 (63, nan),
 (64, nan),
 (65, nan),
 (67, nan),
 (68, nan),
 (71, nan),
 (72, nan),
 (73, nan),
 (74, nan),
 (75, nan),
 (76, nan),
 (77, nan),
 (78, nan),
 (79, nan),
 (80, nan),
 (81, nan),
 (83, nan),
 (84, nan),
 (85, nan),
 (86, nan),
 (87, nan),
 (88, nan),
 (90, nan),
 (91, nan),
 (92, nan),
 (93, nan),
 (94, nan),
 (97, nan),
 (99, nan),
 (101, nan),
 (102, n

In [11]:
len(not_rated_items)

1575

> Qual a porcentagem de items em relação ao catálogo que tiveram notas (usuário 940)?

In [12]:
len(rated_items), len(not_rated_items), (len(rated_items) / len(not_rated_items) * 100)

(107, 1575, 6.7936507936507935)

> **Trabalhar com IDs é pouco semântico. Vamos carregar os dados dos filmes para poder visualizar os títulos**
>
> Para isso, o MovieLens nos fornece um arquivo (u.item) com informações sobre cada título (vide documentação).

In [14]:
items_df = pd.read_csv(
    "/root/.surprise_data/ml-100k/ml-100k/u.item",
    sep="|",
    index_col=["movieId"],
    names=[
        "movieId", "movieTitle", "releaseDate", "videoReleaseDate",
        "IMDbURL", "unknown", "Action", "Adventure", "Animation",
        "Childrens", "Comedy", "Crime", "Documentary", "Drama", "Fantasy",
        "FilmNoir", "Horror", "Musical", "Mystery", "Romance", "SciFi",
        "Thriller", "War", "Western",
    ],
    encoding = "ISO-8859-1"
)
items_df

Unnamed: 0_level_0,movieTitle,releaseDate,videoReleaseDate,IMDbURL,unknown,Action,Adventure,Animation,Childrens,Comedy,...,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,Mat' i syn (1997),06-Feb-1998,,http://us.imdb.com/M/title-exact?Mat%27+i+syn+...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1679,B. Monkey (1998),06-Feb-1998,,http://us.imdb.com/M/title-exact?B%2E+Monkey+(...,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
1680,Sliding Doors (1998),01-Jan-1998,,http://us.imdb.com/Title?Sliding+Doors+(1998),0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1681,You So Crazy (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?You%20So%20Cr...,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [15]:
items_df.loc[1]

movieTitle                                           Toy Story (1995)
releaseDate                                               01-Jan-1995
videoReleaseDate                                                  NaN
IMDbURL             http://us.imdb.com/M/title-exact?Toy%20Story%2...
unknown                                                             0
Action                                                              0
Adventure                                                           0
Animation                                                           1
Childrens                                                           1
Comedy                                                              1
Crime                                                               0
Documentary                                                         0
Drama                                                               0
Fantasy                                                             0
FilmNoir            

In [16]:
items_df.loc[1]["movieTitle"]

'Toy Story (1995)'

> **Vamos visualizar os itens que o usuário gostou (nota > 3.0) por ordem decrescente.**

In [17]:
sorted_rated_items = sorted(rated_items, key=lambda x: x[1], reverse=True)
[
    items_df.loc[idx]["movieTitle"] for (idx, _) in sorted_rated_items
]

['Babe (1995)',
 'Pulp Fiction (1994)',
 'Aladdin (1992)',
 'Terminator 2: Judgment Day (1991)',
 'Sting, The (1973)',
 'Contact (1997)',
 'Air Force One (1997)',
 'Titanic (1997)',
 'Wedding Singer, The (1998)',
 'To Kill a Mockingbird (1962)',
 'Some Like It Hot (1959)',
 'People vs. Larry Flynt, The (1996)',
 'Strictly Ballroom (1992)',
 'Diva (1981)',
 'Twelve Monkeys (1995)',
 'Usual Suspects, The (1995)',
 'Star Wars (1977)',
 'While You Were Sleeping (1995)',
 'Jurassic Park (1993)',
 'Blade Runner (1982)',
 'Silence of the Lambs, The (1991)',
 'Long Kiss Goodnight, The (1996)',
 'Cinema Paradiso (1988)',
 'Empire Strikes Back, The (1980)',
 'Princess Bride, The (1987)',
 'Raiders of the Lost Ark (1981)',
 'Aliens (1986)',
 'Amadeus (1984)',
 'Back to the Future (1985)',
 'This Is Spinal Tap (1984)',
 'Room with a View, A (1986)',
 'When Harry Met Sally... (1989)',
 'Raising Arizona (1987)',
 'George of the Jungle (1997)',
 'Full Monty, The (1997)',
 'Good Will Hunting (1997)',


> **Agora que já conhecemos os dados, vamos à construção do nosso sistema utilizando o Suprise.**

In [18]:
ratings_matrix

itemId,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,...,,,,,,,,,,
940,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,
941,5.0,,,,,,4.0,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


In [19]:
ratings_matrix.index

Int64Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
            ...
            934, 935, 936, 937, 938, 939, 940, 941, 942, 943],
           dtype='int64', name='userId', length=943)

> **Estamos utilizando um modelo de Machine Learning para fazer as predições, por isso vamos dividir nosso dataset em treinamento e teste**

In [20]:
trainset, testset = train_test_split(data, test_size=.25, random_state=10)

In [21]:
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f962a499110>

> **A título de curiosidade, vamos inspecionar esse modelo...**

> Fatores...

In [22]:
algo.n_factors

100

> Matrizes P e Q...

In [23]:
algo.pu

array([[ 0.12547168,  0.01871813,  0.13826191, ...,  0.07126381,
        -0.05251542, -0.09022294],
       [-0.15202205, -0.05911089, -0.20807719, ...,  0.05128999,
         0.06876354,  0.14965173],
       [ 0.10435187,  0.14068715, -0.12642896, ..., -0.03198519,
         0.07390525, -0.04497707],
       ...,
       [ 0.1501897 ,  0.00293355,  0.15919589, ...,  0.02979092,
        -0.12243937, -0.03040622],
       [ 0.05718826, -0.02246994,  0.22225794, ...,  0.00089818,
         0.02104031, -0.04011382],
       [-0.09503986, -0.07802488, -0.27231911, ..., -0.24416456,
         0.01198415,  0.07079724]])

In [24]:
algo.qi

array([[ 0.61183079, -0.05060239, -0.06700465, ...,  0.35646838,
        -0.06628762, -0.25433441],
       [ 0.27505764, -0.06631461,  0.08854153, ..., -0.13629928,
        -0.14272617, -0.10712251],
       [-0.00284414, -0.25942554,  0.0715583 , ...,  0.04940575,
         0.16883801, -0.13729804],
       ...,
       [ 0.22165395,  0.12001195,  0.06686413, ...,  0.09297735,
        -0.00743456, -0.02190067],
       [-0.03122684, -0.22743527,  0.00811472, ..., -0.00956075,
         0.00217829,  0.05379266],
       [-0.26609981, -0.09053117,  0.09688583, ..., -0.12597747,
         0.05277113, -0.00155975]])

> **Ao treinar nosso modelo, ele gera uma Matriz R^ (vide slides) densa.**

In [25]:
predictions = algo.test(testset)

In [26]:
predictions

[Prediction(uid='712', iid='191', r_ui=3.0, est=4.348352087655824, details={'was_impossible': False}),
 Prediction(uid='189', iid='423', r_ui=5.0, est=4.098051084350516, details={'was_impossible': False}),
 Prediction(uid='716', iid='205', r_ui=5.0, est=3.8032763885053837, details={'was_impossible': False}),
 Prediction(uid='860', iid='153', r_ui=4.0, est=3.58991855842232, details={'was_impossible': False}),
 Prediction(uid='234', iid='1133', r_ui=3.0, est=2.808668606946441, details={'was_impossible': False}),
 Prediction(uid='343', iid='55', r_ui=3.0, est=4.226458077768174, details={'was_impossible': False}),
 Prediction(uid='524', iid='742', r_ui=3.0, est=3.6651041532863298, details={'was_impossible': False}),
 Prediction(uid='204', iid='262', r_ui=4.0, est=3.963326722766703, details={'was_impossible': False}),
 Prediction(uid='184', iid='692', r_ui=4.0, est=3.55219263713042, details={'was_impossible': False}),
 Prediction(uid='493', iid='201', r_ui=5.0, est=3.3476823581882793, detai

> **Qual foi nosso erro quadrado médio?**

In [27]:
accuracy.rmse(predictions)

RMSE: 0.9354


0.9354324838533947

> Um usuário aleatório (196) daria qual nota para o filme 302?

In [28]:
uid = str(196)
iid = str(302)

algo.predict(uid, iid, r_ui=4, verbose=True)

user: 196        item: 302        r_ui = 4.00   est = 4.10   {'was_impossible': False}


Prediction(uid='196', iid='302', r_ui=4, est=4.098410460819761, details={'was_impossible': False})

> **Dado nosso usuário 940, quais predições o sistema fez para ele?**

In [29]:
predictions_user_940 = [p for p in predictions if p.uid == "940"]
predictions_user_940

[Prediction(uid='940', iid='70', r_ui=3.0, est=3.1223767245487872, details={'was_impossible': False}),
 Prediction(uid='940', iid='568', r_ui=3.0, est=3.679642068861964, details={'was_impossible': False}),
 Prediction(uid='940', iid='792', r_ui=2.0, est=3.331820138161263, details={'was_impossible': False}),
 Prediction(uid='940', iid='316', r_ui=4.0, est=3.899913296661543, details={'was_impossible': False}),
 Prediction(uid='940', iid='708', r_ui=3.0, est=3.2273442139045843, details={'was_impossible': False}),
 Prediction(uid='940', iid='692', r_ui=4.0, est=3.168782479555907, details={'was_impossible': False}),
 Prediction(uid='940', iid='482', r_ui=5.0, est=3.714559502725002, details={'was_impossible': False}),
 Prediction(uid='940', iid='508', r_ui=5.0, est=3.2323645069389526, details={'was_impossible': False}),
 Prediction(uid='940', iid='527', r_ui=3.0, est=3.6976330056510083, details={'was_impossible': False}),
 Prediction(uid='940', iid='313', r_ui=5.0, est=3.94876433994638, deta

> **OBS: Por que existe esse "est" na predição?**
>
> Resposta: https://github.com/NicolasHug/Surprise/blob/master/surprise/prediction_algorithms/matrix_factorization.pyx#L263

> **Vamos às recomendações! Dado que temos as predições, recomendar é uma questão de ordernar as predições e apresentar os títulos dos filmes.**

In [30]:
sorted_predictions_user_940 = sorted(predictions_user_940, key=lambda x: x.r_ui, reverse=True)
sorted_predictions_user_940

[Prediction(uid='940', iid='482', r_ui=5.0, est=3.714559502725002, details={'was_impossible': False}),
 Prediction(uid='940', iid='508', r_ui=5.0, est=3.2323645069389526, details={'was_impossible': False}),
 Prediction(uid='940', iid='313', r_ui=5.0, est=3.94876433994638, details={'was_impossible': False}),
 Prediction(uid='940', iid='709', r_ui=5.0, est=3.8203140645021874, details={'was_impossible': False}),
 Prediction(uid='940', iid='8', r_ui=5.0, est=3.3884069705300024, details={'was_impossible': False}),
 Prediction(uid='940', iid='316', r_ui=4.0, est=3.899913296661543, details={'was_impossible': False}),
 Prediction(uid='940', iid='692', r_ui=4.0, est=3.168782479555907, details={'was_impossible': False}),
 Prediction(uid='940', iid='147', r_ui=4.0, est=3.335790800341208, details={'was_impossible': False}),
 Prediction(uid='940', iid='191', r_ui=4.0, est=3.5614236293138637, details={'was_impossible': False}),
 Prediction(uid='940', iid='420', r_ui=4.0, est=3.0386935555615366, deta

In [31]:
best_predictions_user_940 = [p for p in sorted_predictions_user_940 if p.r_ui > 3.0]
best_predictions_user_940

[Prediction(uid='940', iid='482', r_ui=5.0, est=3.714559502725002, details={'was_impossible': False}),
 Prediction(uid='940', iid='508', r_ui=5.0, est=3.2323645069389526, details={'was_impossible': False}),
 Prediction(uid='940', iid='313', r_ui=5.0, est=3.94876433994638, details={'was_impossible': False}),
 Prediction(uid='940', iid='709', r_ui=5.0, est=3.8203140645021874, details={'was_impossible': False}),
 Prediction(uid='940', iid='8', r_ui=5.0, est=3.3884069705300024, details={'was_impossible': False}),
 Prediction(uid='940', iid='316', r_ui=4.0, est=3.899913296661543, details={'was_impossible': False}),
 Prediction(uid='940', iid='692', r_ui=4.0, est=3.168782479555907, details={'was_impossible': False}),
 Prediction(uid='940', iid='147', r_ui=4.0, est=3.335790800341208, details={'was_impossible': False}),
 Prediction(uid='940', iid='191', r_ui=4.0, est=3.5614236293138637, details={'was_impossible': False}),
 Prediction(uid='940', iid='420', r_ui=4.0, est=3.0386935555615366, deta

In [32]:
item_ids_user_940 = [
    p.iid for p in best_predictions_user_940
]
item_ids_user_940

['482',
 '508',
 '313',
 '709',
 '8',
 '316',
 '692',
 '147',
 '191',
 '420',
 '66',
 '259']

In [33]:
recommendations = [items_df.loc[int(i)]["movieTitle"] for i in item_ids_user_940]

In [34]:
recommendations

['Some Like It Hot (1959)',
 'People vs. Larry Flynt, The (1996)',
 'Titanic (1997)',
 'Strictly Ballroom (1992)',
 'Babe (1995)',
 'As Good As It Gets (1997)',
 'American President, The (1995)',
 'Long Kiss Goodnight, The (1996)',
 'Amadeus (1984)',
 'Alice in Wonderland (1951)',
 'While You Were Sleeping (1995)',
 'George of the Jungle (1997)']

> **Se quisermos, podemos definir um valor de N para os TOP-N filmes mais relevantes...**

In [35]:
recommendations[:5]

['Some Like It Hot (1959)',
 'People vs. Larry Flynt, The (1996)',
 'Titanic (1997)',
 'Strictly Ballroom (1992)',
 'Babe (1995)']

> **Por hoje é só, pessoal! Boa sorte.**