In [2]:
import pandas as pd

import surprise
from surprise.prediction_algorithms import *
import pandas as pd
import numpy as np
import datetime as dt

from surprise import Dataset
from surprise import Reader

from surprise.prediction_algorithms import knns
from surprise.similarities import cosine, msd, pearson
from surprise import accuracy
from surprise.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from surprise.model_selection import cross_validate

In [3]:
movies = pd.read_csv('./ml-latest-small/movies.csv')

In [4]:
link = pd.read_csv('./ml-latest-small/links.csv')

In [5]:
rating = pd.read_csv('./ml-latest-small/ratings.csv')

In [6]:
tags = pd.read_csv('./ml-latest-small/tags.csv')

In [7]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [8]:
movies.isna().sum()

movieId    0
title      0
genres     0
dtype: int64

In [9]:
movies.title.value_counts()

Emma (1996)                               2
Saturn 3 (1980)                           2
Confessions of a Dangerous Mind (2002)    2
Eros (2004)                               2
War of the Worlds (2005)                  2
                                         ..
Wonder Boys (2000)                        1
Rewrite, The (2014)                       1
42 Up (1998)                              1
Don Juan DeMarco (1995)                   1
Auntie Mame (1958)                        1
Name: title, Length: 9737, dtype: int64

In [10]:
#Check to see if there are any duplicate titles
movies.title.duplicated().sum()

5

In [11]:
#Drop the 5 duplicated movie titles
movies.drop_duplicates(subset='title', inplace=True)

In [12]:
#Sanity check to ensure all duplicates were dropped from title column

movies.title.duplicated().sum()

0

In [13]:
movies[movies.title.duplicated() == True]

Unnamed: 0,movieId,title,genres


In [14]:
#Check length of DataFrame

len(movies)

9737

In [15]:
# Split title column into two new columns: Title and year 

movies['Title'] = movies['title'].str.split('(', n=1, expand=True)[0]

movies['year_released'] = movies['title'].str.split('(', n=1, expand=True)[1]

In [16]:
# Drop original column title

movies.drop(columns='title', inplace=True)

In [17]:
# Sanity Check 
movies

Unnamed: 0,movieId,genres,Title,year_released
0,1,Adventure|Animation|Children|Comedy|Fantasy,Toy Story,1995)
1,2,Adventure|Children|Fantasy,Jumanji,1995)
2,3,Comedy|Romance,Grumpier Old Men,1995)
3,4,Comedy|Drama|Romance,Waiting to Exhale,1995)
4,5,Comedy,Father of the Bride Part II,1995)
...,...,...,...,...
9737,193581,Action|Animation|Comedy|Fantasy,Black Butler: Book of the Atlantic,2017)
9738,193583,Animation|Comedy|Fantasy,No Game No Life: Zero,2017)
9739,193585,Drama,Flint,2017)
9740,193587,Action|Animation,Bungo Stray Dogs: Dead Apple,2018)


In [18]:
# Format year_released column

movies['year_released'] = movies.year_released.str.replace(')', '')

In [19]:
# Sanity Check to ensure formatting was completed
movies

Unnamed: 0,movieId,genres,Title,year_released
0,1,Adventure|Animation|Children|Comedy|Fantasy,Toy Story,1995
1,2,Adventure|Children|Fantasy,Jumanji,1995
2,3,Comedy|Romance,Grumpier Old Men,1995
3,4,Comedy|Drama|Romance,Waiting to Exhale,1995
4,5,Comedy,Father of the Bride Part II,1995
...,...,...,...,...
9737,193581,Action|Animation|Comedy|Fantasy,Black Butler: Book of the Atlantic,2017
9738,193583,Animation|Comedy|Fantasy,No Game No Life: Zero,2017
9739,193585,Drama,Flint,2017
9740,193587,Action|Animation,Bungo Stray Dogs: Dead Apple,2018


In [20]:
link

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
...,...,...,...
9737,193581,5476944,432131.0
9738,193583,5914996,445030.0
9739,193585,6397426,479308.0
9740,193587,8391976,483455.0


In [21]:
rating

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [22]:
#converted timestamp to datetime
rating['timestamp'] = pd.to_datetime(rating['timestamp'], unit='s')

In [23]:
rating.rating.value_counts(normalize=True)

4.0    0.265957
3.0    0.198808
5.0    0.131015
3.5    0.130271
4.5    0.084801
2.0    0.074884
2.5    0.055040
1.0    0.027877
1.5    0.017762
0.5    0.013586
Name: rating, dtype: float64

In [24]:
rating.duplicated().sum()

0

In [25]:
rating.isna().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [26]:
tags

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200
...,...,...,...,...
3678,606,7382,for katie,1171234019
3679,606,7936,austere,1173392334
3680,610,3265,gun fu,1493843984
3681,610,3265,heroic bloodshed,1493843978


In [27]:
movie_rating = movies.join(rating, on='movieId', lsuffix='movie_', rsuffix='rating_', how='outer')

In [28]:
movie_rating

Unnamed: 0,movieId,movieIdmovie_,genres,Title,year_released,userId,movieIdrating_,rating,timestamp
0.0,1,1.0,Adventure|Animation|Children|Comedy|Fantasy,Toy Story,1995,1.0,3.0,4.0,2000-07-30 18:20:47
1.0,2,2.0,Adventure|Children|Fantasy,Jumanji,1995,1.0,6.0,4.0,2000-07-30 18:37:04
2.0,3,3.0,Comedy|Romance,Grumpier Old Men,1995,1.0,47.0,5.0,2000-07-30 19:03:35
3.0,4,4.0,Comedy|Drama|Romance,Waiting to Exhale,1995,1.0,50.0,5.0,2000-07-30 18:48:51
4.0,5,5.0,Comedy,Father of the Bride Part II,1995,1.0,70.0,3.0,2000-07-30 18:40:00
...,...,...,...,...,...,...,...,...,...
,100831,,,,,610.0,166534.0,4.0,2017-05-03 21:53:22
,100832,,,,,610.0,168248.0,5.0,2017-05-03 22:21:31
,100833,,,,,610.0,168250.0,5.0,2017-05-08 19:50:47
,100834,,,,,610.0,168252.0,5.0,2017-05-03 21:19:12


In [29]:
movie_rating.dropna(inplace=True)

In [30]:
movie_rating['userId'].nunique()

397

In [31]:
stats = movie_rating[['rating', 'timestamp']].describe()
stats

Unnamed: 0,rating
count,8110.0
mean,3.461467
std,1.094116
min,0.5
25%,3.0
50%,3.5
75%,4.0
max,5.0


## Split

In [32]:
train, test = train_test_split(movie_rating)

In [33]:
reader = Reader(rating_scale=(1, 5))
train_data = Dataset.load_from_df(train[['userId', 'movieId', 'rating']], reader)

## Tuning

In [34]:
param_grid = {'k':[10, 50, 100],'min_k': [1, 5, 10]}
base_model = GridSearchCV(KNNBasic,param_grid=param_grid,joblib_verbose=5)
base_model.fit(train_data)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s


Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.4s finished


In [35]:
base_model.best_params

{'rmse': {'k': 10, 'min_k': 1}, 'mae': {'k': 10, 'min_k': 1}}

In [36]:
base_model.best_estimator

{'rmse': <surprise.prediction_algorithms.knns.KNNBasic at 0x7fcb386e8a30>,
 'mae': <surprise.prediction_algorithms.knns.KNNBasic at 0x7fcb3899fd00>}

In [37]:
# param_grid = {'n_factors':[20, 100],'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
#               'reg_all': [0.4, 0.6]}
# gs_model = GridSearchCV(SVD,param_grid=param_grid,n_jobs = -1,joblib_verbose=5)
# gs_model.fit(jokes)

## Cross Validation

In [38]:
#instantiate KNN model 
model1=KNNBasic()

In [39]:
#instaniatite and perform cross val with training data
cross_validate(model1, train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0782  1.1302  1.1158  1.0795  1.0583  1.0924  0.0265  
MAE (testset)     0.8674  0.9205  0.8972  0.8693  0.8642  0.8837  0.0219  
Fit time          0.01    0.01    0.00    0.00    0.00    0.00    0.00    
Test time         0.01    0.01    0.01    0.01    0.01    0.01    0.00    


{'test_rmse': array([1.07820267, 1.13020196, 1.11583654, 1.07954275, 1.05831136]),
 'test_mae': array([0.86738649, 0.92054029, 0.89717133, 0.8692828 , 0.86420325]),
 'fit_time': (0.005239963531494141,
  0.006649017333984375,
  0.0030679702758789062,
  0.004076719284057617,
  0.0026178359985351562),
 'test_time': (0.007119178771972656,
  0.00750422477722168,
  0.005301952362060547,
  0.005582094192504883,
  0.0057048797607421875)}

In [40]:
#use model to predict
model1.predict(uid=10, iid = 20)
#this model just predicted what user 10 would predict movie 20

Prediction(uid=10, iid=20, r_ui=None, est=3.458898479243732, details={'was_impossible': True, 'reason': 'User and/or item is unknown.'})

looking at the output above we can determine that our model predicted that use 10 would rate movie 20 a 3.5 give or take our RSME which is 1.09.

## Models

In [41]:
model2 = SVD()

In [42]:
model2

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fcb389e4a30>

In [43]:
cross_validate(model2, train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9737  0.9931  1.0096  1.0146  1.0044  0.9991  0.0146  
MAE (testset)     0.7746  0.7866  0.8080  0.8028  0.8083  0.7961  0.0133  
Fit time          0.27    0.26    0.25    0.24    0.26    0.26    0.01    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([0.97370863, 0.99313281, 1.00962809, 1.01461289, 1.00444494]),
 'test_mae': array([0.77464594, 0.78660107, 0.80802593, 0.80284727, 0.80833984]),
 'fit_time': (0.27146077156066895,
  0.25719618797302246,
  0.25011777877807617,
  0.2410440444946289,
  0.2602722644805908),
 'test_time': (0.004882335662841797,
  0.0038399696350097656,
  0.003962278366088867,
  0.004135847091674805,
  0.004010915756225586)}

In [44]:
param_grid = {'n_factors':[10,20,50],'n_epochs': [5, 10, 15], 'lr_all': [0.002,0.005,0.01],
             'reg_all': [0.2,0.4,0.6]}
gs_model2 = GridSearchCV(SVD,param_grid=param_grid,joblib_verbose=5)
gs_model2.fit(train_data)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 405 out of 405 | elapsed:   21.8s finished


In [45]:
gs_model2.best_params

{'rmse': {'n_factors': 20, 'n_epochs': 15, 'lr_all': 0.01, 'reg_all': 0.2},
 'mae': {'n_factors': 10, 'n_epochs': 15, 'lr_all': 0.01, 'reg_all': 0.2}}

In [46]:
cross_validate(SVD(n_factors=10,n_epochs=15,lr_all=0.01,reg_all=0.2), train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9918  1.0195  0.9908  0.9824  1.0183  1.0005  0.0153  
MAE (testset)     0.7741  0.8212  0.7805  0.7871  0.8069  0.7940  0.0175  
Fit time          0.06    0.05    0.05    0.05    0.05    0.05    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([0.99176513, 1.01954295, 0.99076189, 0.98242811, 1.01825182]),
 'test_mae': array([0.77413775, 0.82119718, 0.78053488, 0.78710212, 0.80685243]),
 'fit_time': (0.056549787521362305,
  0.04671001434326172,
  0.04534602165222168,
  0.04874014854431152,
  0.04575800895690918),
 'test_time': (0.004051923751831055,
  0.004090785980224609,
  0.0039098262786865234,
  0.00394892692565918,
  0.0040471553802490234)}

In [47]:
param_grid = {'n_factors':[5,10,15],'n_epochs': [15, 30, 45], 'lr_all': [0.01,0.05,0.1],
             'reg_all': [0.1,0.2,0.3]}
gs_model2 = GridSearchCV(SVD,param_grid=param_grid,joblib_verbose=5)
gs_model2.fit(train_data)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 405 out of 405 | elapsed:   38.0s finished


In [48]:
gs_model2.best_params

{'rmse': {'n_factors': 15, 'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1},
 'mae': {'n_factors': 15, 'n_epochs': 45, 'lr_all': 0.01, 'reg_all': 0.1}}

In [49]:
cross_validate(SVD(n_factors=5,n_epochs=15,lr_all=0.01,reg_all=0.1), train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9970  0.9961  0.9765  1.0183  0.9995  0.9975  0.0133  
MAE (testset)     0.7921  0.7913  0.7748  0.8028  0.8000  0.7922  0.0098  
Fit time          0.05    0.05    0.04    0.05    0.05    0.05    0.00    
Test time         0.01    0.00    0.01    0.00    0.01    0.01    0.00    


{'test_rmse': array([0.99702742, 0.99610017, 0.97647608, 1.01825412, 0.99951792]),
 'test_mae': array([0.7920753 , 0.79132832, 0.77476382, 0.80278755, 0.80003678]),
 'fit_time': (0.050071001052856445,
  0.04630398750305176,
  0.04473280906677246,
  0.04712510108947754,
  0.04731440544128418),
 'test_time': (0.006273031234741211,
  0.004469871520996094,
  0.009456872940063477,
  0.004004955291748047,
  0.006078004837036133)}

In [50]:
model3 = NMF()

In [51]:
cross_validate(model3, train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0775  1.1308  1.1055  1.0457  1.1034  1.0926  0.0289  
MAE (testset)     0.8537  0.9154  0.8985  0.8538  0.8967  0.8837  0.0252  
Fit time          0.55    0.48    0.49    0.47    0.48    0.49    0.03    
Test time         0.04    0.00    0.00    0.00    0.00    0.01    0.01    


{'test_rmse': array([1.07749653, 1.13083743, 1.10548444, 1.04566033, 1.10335505]),
 'test_mae': array([0.85373777, 0.91542249, 0.89854059, 0.85382646, 0.89672584]),
 'fit_time': (0.549659013748169,
  0.4763298034667969,
  0.49487805366516113,
  0.4738168716430664,
  0.47543883323669434),
 'test_time': (0.03679323196411133,
  0.004319906234741211,
  0.004442930221557617,
  0.004236936569213867,
  0.004230022430419922)}

In [52]:
param_grid = {'n_factors':[10,15,20],'n_epochs': [25, 50, 75]}
gs_model3 = GridSearchCV(NMF,param_grid=param_grid,joblib_verbose=5)
gs_model3.fit(train_data)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:   21.0s finished


In [53]:
gs_model3.best_params

{'rmse': {'n_factors': 10, 'n_epochs': 25},
 'mae': {'n_factors': 10, 'n_epochs': 25}}

In [54]:
cross_validate(NMF(n_factors=10,n_epochs=25), train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0892  1.1202  1.0710  1.0877  1.0946  1.0926  0.0159  
MAE (testset)     0.8898  0.9039  0.8612  0.8771  0.8858  0.8835  0.0141  
Fit time          0.22    0.21    0.18    0.20    0.20    0.20    0.01    
Test time         0.00    0.00    0.00    0.04    0.00    0.01    0.02    


{'test_rmse': array([1.08923839, 1.12019333, 1.07104034, 1.08774778, 1.09463062]),
 'test_mae': array([0.88976845, 0.90387032, 0.86117201, 0.87706505, 0.88577309]),
 'fit_time': (0.22133898735046387,
  0.2052290439605713,
  0.18076586723327637,
  0.19998526573181152,
  0.2022688388824463),
 'test_time': (0.004374980926513672,
  0.004210948944091797,
  0.004118919372558594,
  0.043193817138671875,
  0.0043299198150634766)}

In [55]:
param_grid = {'n_factors':[3,5,10],'n_epochs': [15, 20, 25]}
gs_model3 = GridSearchCV(NMF,param_grid=param_grid,joblib_verbose=5)
gs_model3.fit(train_data)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    5.4s finished


In [56]:
gs_model3.best_params

{'rmse': {'n_factors': 3, 'n_epochs': 15},
 'mae': {'n_factors': 3, 'n_epochs': 15}}

In [63]:
movie_ids = list(train['movieId'].unique())

In [66]:
model3.predict(uid=10, iid=movie_ids)


TypeError: unhashable type: 'list'

In [67]:
train.head()

Unnamed: 0,movieId,movieIdmovie_,genres,Title,year_released,userId,movieIdrating_,rating,timestamp
3032.0,4056,4056.0,Crime|Drama|Mystery|Thriller,"Pledge, The",2001,26.0,457.0,4.0,1996-07-09 22:12:59
7843.0,93443,93443.0,Comedy|Drama,Goon,2011,599.0,2530.0,3.0,2018-02-20 16:38:58
3613.0,4962,4962.0,Adventure|Western,Texas Rangers,2001,32.0,314.0,4.0,1997-02-23 22:30:02
999.0,1301,1301.0,Drama|Sci-Fi,Forbidden Planet,1956,11.0,1518.0,4.0,1998-08-03 14:27:11
7259.0,74508,74508.0,Drama|Romance,Persuasion,2007,474.0,5875.0,2.5,2003-06-09 15:34:53


In [74]:
train['est_rating_user10'] = train['movieId'].apply(lambda x: model3.predict(train.userId[10], x).est)
train.sort_values(by='est_rating', ascending=False, inplace=True)

In [76]:
train.sort_values(by='est_rating_user10', ascending=False, inplace=True)

In [77]:
train

Unnamed: 0,movieId,movieIdmovie_,genres,Title,year_released,userId,movieIdrating_,rating,timestamp,est_rating,est_rating_user10
5506.0,26422,26422.0,Comedy|Drama|Musical,Hair,1979,182.0,2010.0,5.0,2003-06-05 02:25:10,5.0,5.0
1791.0,2390,2390.0,Comedy,Little Voice,1998,19.0,551.0,5.0,2000-08-08 03:42:43,5.0,5.0
5670.0,27664,27664.0,Drama,"Brown Bunny, The",2003,187.0,4640.0,5.0,2006-10-26 08:12:29,5.0,5.0
5653.0,27513,27513.0,Drama,Dog Days,Hundstage (2001,186.0,4813.0,5.0,2002-09-03 19:04:31,5.0,5.0
614.0,779,779.0,Drama|Romance,'Til There Was You,1997,6.0,500.0,5.0,1996-10-17 11:55:54,5.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...
1044.0,1356,1356.0,Action|Adventure|Sci-Fi|Thriller,Star Trek: First Contact,1996,13.0,305.0,1.0,2001-04-16 21:36:08,1.0,1.0
6943.0,65230,65230.0,Comedy|Drama,Marley & Me,2008,418.0,1573.0,0.5,2016-04-28 18:35:28,1.0,1.0
5518.0,26523,26523.0,Horror|Thriller,"Silent Night, Deadly Night",1984,182.0,2720.0,0.5,2003-09-11 12:52:57,1.0,1.0
230.0,267,267.0,Comedy,Major Payne,1995,3.0,914.0,0.5,2011-05-27 02:32:47,1.0,1.0


In [78]:
user = input('userId: ')

userId: 20


In [79]:
train['est_rating_user10'] = train['movieId'].apply(lambda x: model3.predict(user, x).est)
train.sort_values(by='est_rating', ascending=False, inplace=True)

In [80]:
train

Unnamed: 0,movieId,movieIdmovie_,genres,Title,year_released,userId,movieIdrating_,rating,timestamp,est_rating,est_rating_user10
5506.0,26422,26422.0,Comedy|Drama|Musical,Hair,1979,182.0,2010.0,5.0,2003-06-05 02:25:10,5.0,3.467119
251.0,290,290.0,Crime|Drama,Once Were Warriors,1994,3.0,5746.0,5.0,2011-05-27 02:35:08,5.0,3.467119
2327.0,3082,3082.0,Action|Adventure|Thriller,"World Is Not Enough, The",1999,20.0,3034.0,5.0,2003-05-27 12:21:39,5.0,3.467119
2394.0,3176,3176.0,Drama|Mystery|Thriller,"Talented Mr. Ripley, The",1999,20.0,4995.0,5.0,2003-05-27 12:05:01,5.0,3.467119
258.0,298,298.0,Drama,Pushing Hands,Tui shou (1992,3.0,70946.0,5.0,2011-05-27 02:36:55,5.0,3.467119
...,...,...,...,...,...,...,...,...,...,...,...
2424.0,3224,3224.0,Drama,Woman in the Dunes,Suna no onna (1964,21.0,44.0,1.0,2013-08-18 10:49:29,1.0,3.467119
6559.0,55020,55020.0,Comedy,"Ten, The",2007,365.0,56949.0,0.5,2017-03-04 02:33:00,1.0,3.467119
723.0,943,943.0,Drama|Fantasy|Romance,"Ghost and Mrs. Muir, The",1947,7.0,4643.0,1.0,2005-01-25 06:49:09,1.0,3.467119
2750.0,3688,3688.0,Comedy,Porky's,1982,22.0,3949.0,0.5,2010-03-16 07:58:37,1.0,3.467119
