In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.decomposition import NMF

In [2]:
R = pd.read_csv('data/ratings.csv',index_col='movieId')

In [3]:
R

Unnamed: 0_level_0,userId,rating,timestamp
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,4.0,964982703
3,1,4.0,964981247
6,1,4.0,964982224
47,1,5.0,964983815
50,1,5.0,964982931
...,...,...,...
166534,610,4.0,1493848402
168248,610,5.0,1493850091
168250,610,5.0,1494273047
168252,610,5.0,1493846352


In [4]:
R = R[['userId', 'rating']]
R.head()

Unnamed: 0_level_0,userId,rating
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,4.0
3,1,4.0
6,1,4.0
47,1,5.0
50,1,5.0


In [5]:
movies = pd.read_csv('data/movies.csv', index_col='movieId')
movies

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
...,...,...
193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
193585,Flint (2017),Drama
193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [6]:
movies.shape

(9742, 2)

In [7]:
R = R.join(movies[['title']], on='movieId', how='inner')
R

Unnamed: 0_level_0,userId,rating,title
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,4.0,Toy Story (1995)
1,5,4.0,Toy Story (1995)
1,7,4.5,Toy Story (1995)
1,15,2.5,Toy Story (1995)
1,17,4.5,Toy Story (1995)
...,...,...,...
160341,610,2.5,Bloodmoon (1997)
160527,610,4.5,Sympathy for the Underdog (1971)
160836,610,3.0,Hazard (2005)
163937,610,3.5,Blair Witch (2016)


In [8]:
R.shape

(100836, 3)

### 1) Transform the data into the right format

In [9]:
R = R.pivot_table(index='userId', columns='title', values='rating')
R.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [10]:
R.shape

(610, 9719)

### 2) Handle missing data

In [11]:
average_movie_rating = R.mean()
average_movie_rating

title
'71 (2014)                                   4.000000
'Hellboy': The Seeds of Creation (2004)      4.000000
'Round Midnight (1986)                       3.500000
'Salem's Lot (2004)                          5.000000
'Til There Was You (1997)                    4.000000
                                               ...   
eXistenZ (1999)                              3.863636
xXx (2002)                                   2.770833
xXx: State of the Union (2005)               2.000000
¡Three Amigos! (1986)                        3.134615
À nous la liberté (Freedom for Us) (1931)    1.000000
Length: 9719, dtype: float64

In [12]:
R_imputed = R.fillna(average_movie_rating)
R_imputed.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,4.0,1.0
2,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
3,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
4,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
5,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0


### 3) Train NMF

In [13]:
# Instantiate the nmf
nmf = NMF(n_components=100, max_iter=1000) # n_components: # of features

In [14]:
# As usual with scikit-learn Classes, we fit the nmf
nmf.fit(R_imputed)



NMF(max_iter=1000, n_components=100)

### Check out the sub-matrices, and the reconstruction error

In [15]:
R.shape

(610, 9719)

In [16]:
# Extract the movie-feature matrix
Q = nmf.components_
Q.shape # shape of Q should have a shape of ?

(100, 9719)

In [17]:
pd.DataFrame(Q, columns=R.columns)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
0,1.078061,1.078061,0.943304,1.347577,1.068553,0.404273,0.634252,0.808546,0.527215,0.876949,...,0.404273,1.037756,0.959472,0.808546,0.808546,0.801143,0.460784,0.505164,0.471983,0.269515
1,0.000000,0.000000,0.000000,0.000000,0.628751,0.000000,7.019312,0.000000,20.282939,0.380713,...,0.000000,1.552893,1.171589,0.000000,0.000000,6.061217,11.424785,1.585780,16.444274,0.000000
2,0.000000,0.000000,0.000000,0.000000,4.050755,0.000000,61.284626,0.000000,120.540535,1.995799,...,0.000000,18.182710,8.632177,0.000000,0.000000,72.114588,62.543255,4.029954,26.200324,0.000000
3,0.084897,0.084897,0.074285,0.106122,0.082447,0.031836,0.055625,0.063673,0.040525,0.066776,...,0.031836,0.095315,0.075963,0.063673,0.063673,0.122658,0.070033,0.042709,0.138915,0.021224
4,0.082081,0.082081,0.071821,0.102601,0.082467,0.030780,0.000000,0.061561,0.000000,0.065357,...,0.030780,0.066408,0.071852,0.061561,0.061561,0.005410,0.003083,0.036701,0.017362,0.020520
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.007554,0.007554,0.006609,0.009442,0.027597,0.002833,0.159284,0.005665,0.200021,0.015769,...,0.002833,0.020716,0.000000,0.005665,0.005665,0.030150,0.106678,0.021777,0.223720,0.001888
96,0.005719,0.005719,0.005004,0.007149,0.000000,0.002145,0.004783,0.004289,0.193849,0.006815,...,0.002145,0.022345,0.008858,0.004289,0.004289,0.000000,0.045705,0.000183,0.144078,0.001430
97,0.003647,0.003647,0.003191,0.004559,0.011350,0.001368,0.029830,0.002735,0.301437,0.003792,...,0.001368,0.034022,0.013358,0.002735,0.002735,0.028760,0.097392,0.015615,0.210616,0.000912
98,0.017194,0.017194,0.015045,0.021492,0.016422,0.006448,0.034822,0.012895,0.029993,0.012335,...,0.006448,0.011370,0.015812,0.012895,0.012895,0.086132,0.027122,0.024883,0.001867,0.004298


In [18]:
# Extract the user-feature matrix
P = nmf.transform(R_imputed)
P.shape # We expect a shape of ?

(610, 100)

In [19]:
# Make a DataFrame out of it
pd.DataFrame(P, index=R.index)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.070987,0.000000,0.000000,0.638792,2.669484,0.384737,0.252755,0.000000,0.175350,0.000000,...,1.003540,0.063513,0.947701,0.138882,0.000000,0.000287,0.174870,0.398308,0.000000,0.006512
2,3.252141,0.028367,0.000778,0.479843,1.754098,0.057336,0.082457,0.036038,0.139438,0.037730,...,0.077026,0.015444,0.099457,0.041640,0.035663,0.083261,0.107483,0.107831,0.039256,0.000000
3,3.068443,0.000000,0.000000,0.325579,3.596195,0.000000,0.382947,0.000000,0.073373,0.188761,...,0.000000,0.003830,0.000000,0.000000,0.153371,0.644205,0.663745,0.769694,0.486015,0.000000
4,2.331873,0.000000,0.000000,0.000000,12.277354,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,3.106016,0.011107,0.000208,0.353053,2.726462,0.020439,0.095923,0.075160,0.224178,0.030541,...,0.172960,0.008541,0.098027,0.097913,0.057063,0.109345,0.202092,0.121228,0.064138,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.472932,0.000000,0.000000,0.000000,6.046868,0.000000,0.149145,0.000000,0.000000,0.005512,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
607,3.127882,0.000000,0.000419,0.014905,3.568728,0.000000,0.004342,0.017533,0.135871,0.092157,...,1.589698,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.345892,0.129209,0.000000
608,1.519501,0.000000,0.000000,0.000000,19.631860,0.974387,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
609,3.221953,0.023134,0.000672,0.433049,2.068310,0.045648,0.106185,0.105148,0.143802,0.050908,...,0.068287,0.056289,0.082718,0.104646,0.028289,0.058472,0.166648,0.154444,0.086767,0.112771


In [20]:
# Look at the reconstruction error
round(nmf.reconstruction_err_, 2)
# The error can be interpreted relative to the error of other models

169.69

### 4) Reconstruct the original matrix

In [21]:
R_imputed

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,4.000000,1.0
2,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
3,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
4,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
5,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
607,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0
608,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,4.500000,3.500000,2.0,3.134615,1.0
609,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0


In [22]:
# Look at the original matrix R
R.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [23]:
# Calculate R_hat
R_hat = pd.DataFrame(np.matmul(P, Q), index=R.index, columns=R.columns)
R_hat

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.999612,3.999612,3.499661,4.999515,3.972676,1.499855,3.200149,2.999709,3.622082,3.293968,...,1.499855,4.092822,3.631750,2.999709,2.999709,3.933817,2.530585,1.983879,3.742606,0.999903
2,3.999667,3.999667,3.499709,4.999584,4.000837,1.499875,3.174143,2.999750,3.672796,3.286025,...,1.499875,4.071372,3.661540,2.999750,2.999750,3.862091,2.775577,1.991758,3.143004,0.999917
3,3.998837,3.998837,3.498982,4.998546,3.973084,1.499564,3.226529,2.999128,3.579653,3.287143,...,1.499564,4.054152,3.625731,2.999128,2.999128,3.961006,2.705132,1.994633,3.176334,0.999709
4,3.999974,3.999974,3.499977,4.999967,4.053819,1.499990,3.262415,2.999980,3.918242,3.290123,...,1.499990,4.057808,3.680273,2.999980,2.999980,3.806451,2.745934,1.997892,3.141151,0.999993
5,4.000227,4.000227,3.500199,5.000284,3.994784,1.500085,3.149235,3.000171,3.654708,3.279842,...,1.500085,4.070930,3.662434,3.000171,3.000171,3.870559,2.745594,1.997350,3.206091,1.000057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,4.000339,4.000339,3.500296,5.000423,3.987145,1.500127,3.158742,3.000254,3.668355,3.286884,...,1.500127,4.059129,3.650994,3.000254,3.000254,3.871506,2.761761,2.002023,3.126797,1.000085
607,3.999668,3.999668,3.499709,4.999585,4.010094,1.499875,3.158625,2.999751,3.547285,3.285575,...,1.499875,4.041678,3.683555,2.999751,2.999751,3.812050,2.749152,2.012918,3.303630,0.999917
608,4.000364,4.000364,3.500319,5.000455,4.009667,1.500137,3.158516,3.000273,3.635699,3.282355,...,1.500137,4.068648,3.665306,3.000273,3.000273,4.495677,3.465108,1.989559,3.117231,1.000091
609,3.999352,3.999352,3.499433,4.999190,3.999787,1.499757,3.186208,2.999514,3.709822,3.285237,...,1.499757,4.074832,3.662767,2.999514,2.999514,3.875180,2.764891,1.997423,3.159614,0.999838


### 5) Make a prediction based on new user input

In [24]:
# Create a dictionary for a new user
new_user_input = {'Toy Story (1995)': 4, 'Jumanji (1995)': 5} # similar to JSON data that we will have to work with in the end
new_user_input

{'Toy Story (1995)': 4, 'Jumanji (1995)': 5}

In [25]:
# Convert it to a pd.DataFrame
new_user = pd.DataFrame(new_user_input, columns=R.columns, index=[611])
new_user

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
611,,,,,,,,,,,...,,,,,,,,,,


In [26]:
#Fill missing data
new_user = new_user.fillna(average_movie_rating)
new_user

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
611,4.0,4.0,3.5,5.0,4.0,1.5,3.176471,3.0,3.666667,3.285714,...,1.5,4.0625,3.666667,3.0,3.0,3.863636,2.770833,2.0,3.134615,1.0


In [27]:
#Prediction step 1 - generate user_P 
user_P = nmf.transform(new_user)
user_P



array([[3.27144682e+00, 2.79297363e-02, 8.12129915e-04, 5.07796607e-01,
        1.63487800e+00, 5.97845666e-02, 6.84847890e-02, 5.89178493e-02,
        9.14724319e-02, 4.46929990e-02, 2.74274978e-02, 7.77109911e-03,
        3.68204793e-02, 6.81934503e-02, 6.83825568e-03, 3.92814580e-02,
        1.14755213e-02, 1.55791652e-02, 2.72842945e-03, 6.12155221e-03,
        2.06192909e-02, 1.60294713e-02, 2.28746451e-02, 2.46772356e-02,
        1.72136702e-02, 4.74205317e-03, 1.04915672e-02, 2.69564073e-02,
        1.88519804e-03, 1.26577620e-02, 1.48055820e-02, 3.39365359e-02,
        1.64813278e-03, 4.17225943e-03, 1.15361292e-02, 1.29707619e-02,
        8.08909043e-02, 2.02070094e-02, 1.78830995e-02, 1.20448480e-02,
        3.06678683e-02, 3.69752985e-02, 5.09332237e-02, 1.12485841e-01,
        2.86148543e-02, 2.06449553e-02, 4.47188886e-02, 1.96082368e-02,
        3.41987931e-02, 2.95087549e-02, 6.36734917e-03, 7.46254771e-02,
        1.57110405e-02, 2.78716124e-02, 2.15422273e-02, 4.732567

In [28]:
#new user R - reconstruct R but for this new user only
user_R = pd.DataFrame(np.matmul(user_P, Q), index=new_user.index, columns=R.columns)
user_R

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
611,4.000061,4.000061,3.500054,5.000077,4.000442,1.500023,3.179511,3.000046,3.686597,3.291059,...,1.500023,4.062973,3.664247,3.000046,3.000046,3.859583,2.779019,2.002239,3.136266,1.000015


In [29]:
# I have a list of predicted films!! Can I now use this for my recommendations?
# We want to get rid of movies we have already watchend
recommendation = user_R.drop(columns=new_user_input.keys())

In [30]:
recommendation

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
611,4.000061,4.000061,3.500054,5.000077,4.000442,1.500023,3.179511,3.000046,3.686597,3.291059,...,1.500023,4.062973,3.664247,3.000046,3.000046,3.859583,2.779019,2.002239,3.136266,1.000015


In [31]:
# Sort recommendations
recommendation.sort_values(by=611, axis=1, ascending=False)

title,"Awfully Big Adventure, An (1995)",Paper Birds (Pájaros de papel) (2010),Wow! A Talking Fish! (1983),Spellbound (2011),Little Dieter Needs to Fly (1997),Mephisto (1981),Bossa Nova (2000),Willy/Milly (1986),Death Note: Desu nôto (2006–2007),Tales of Manhattan (1942),...,Derailed (2002),Dead of Night (1945),Daddy Day Camp (2007),Cyborg (1989),"Crow, The: Wicked Prayer (2005)",Tooth Fairy 2 (2012),Yongary: Monster from the Deep (1967),"Collector, The (1965)","General Died at Dawn, The (1936)",3 Ninjas Knuckle Up (1995)
611,5.000077,5.000077,5.000077,5.000077,5.000077,5.000077,5.000077,5.000077,5.000077,5.000077,...,0.500008,0.500008,0.500008,0.500008,0.500008,0.500008,0.500008,0.500008,0.500008,0.500008


In [32]:
recommendation.sort_values(by=611, axis=1, ascending=False).unstack()

title                                     
Awfully Big Adventure, An (1995)       611    5.000077
Paper Birds (Pájaros de papel) (2010)  611    5.000077
Wow! A Talking Fish! (1983)            611    5.000077
Spellbound (2011)                      611    5.000077
Little Dieter Needs to Fly (1997)      611    5.000077
                                                ...   
Tooth Fairy 2 (2012)                   611    0.500008
Yongary: Monster from the Deep (1967)  611    0.500008
Collector, The (1965)                  611    0.500008
General Died at Dawn, The (1936)       611    0.500008
3 Ninjas Knuckle Up (1995)             611    0.500008
Length: 9717, dtype: float64