# Latent Factor Models: Matrix Factorization

## Example

In [153]:
R = [
    [5,3,0,1],
    [4,0,0,1],
    [1,1,0,5],
    [1,0,0,4],
    [0,1,5,4],
]

In [154]:
from sklearn.decomposition import NMF

In [155]:
model = NMF(n_components=2, alpha=0.001, beta=0.001, init='random', max_iter=2000, random_state=0)

In [156]:
nP = model.fit_transform(R)
nP

array([[ 0.        ,  1.8759484 ],
       [ 0.        ,  1.25078268],
       [ 1.41676577,  0.46869391],
       [ 1.11011537,  0.35030503],
       [ 2.06394737,  0.        ]])

In [157]:
nQ = model.components_.T
nQ

array([[ 0.        ,  2.80114155],
       [ 0.31493492,  1.06226506],
       [ 1.3758805 ,  0.        ],
       [ 2.52829136,  0.77562929]])

In [158]:
import numpy as np

nR = np.dot(nP, nQ.T)
nR

array([[ 5.254797  ,  1.99275443,  0.        ,  1.45504052],
       [ 3.50361933,  1.32866273,  0.        ,  0.97014368],
       [ 1.31287799,  0.94406618,  1.9493004 ,  3.94552939],
       [ 0.98125398,  0.72173089,  1.52738609,  3.07840195],
       [ 0.        ,  0.65000909,  2.83974494,  5.21826031]])

## Try Wine Ratings Data

In [159]:
import pandas as pd

In [160]:
wine_ratings = pd.read_csv('data/reviews.csv')

In [161]:
wine_ratings.head()

Unnamed: 0,id,username,wine,rating,comment
0,0,jadianes,Manzanilla La Gitana,4,Beautiful Manzanilla. Great price.
1,1,jadianes,Pol Roger Rose 1998,3,Classy Rose. Not great.
2,2,jadianes,Molino Real 2002,4,This can be great with time.
3,3,jadianes,Le Grappin Bagnum Rose 2013,2,Drinkable...
4,4,jadianes,La Bota de Amontillado 1,5,A treasure of a wine


In [162]:
wine_ratings_pivoted = wine_ratings.pivot('username', 'wine', 'rating').fillna(0)

In [163]:
wine_ratings_pivoted

wine,Chateau Latour 1982,JL Chave Hermitage 2001,La Bota de Amontillado 1,Le Grappin Bagnum Rose 2013,Manzanilla La Gitana,Molino Real 2002,Pol Roger Rose 1998,Raveneu Le Clos 1996,Rosseau Chambertin 2001,Vega Sicilia Unico 1989,Viña Tondonia Blanco Reserva 1981
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
carlos,0.0,5.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0
jadianes,0.0,0.0,5.0,2.0,4.0,4.0,3.0,0.0,0.0,0.0,0.0
john,0.0,4.0,2.0,3.0,2.0,2.0,4.0,0.0,0.0,0.0,0.0
lluis,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,5.0
mari,0.0,0.0,0.0,0.0,3.0,5.0,2.0,5.0,0.0,0.0,0.0
pepe,0.0,0.0,5.0,0.0,4.0,0.0,2.0,0.0,4.0,4.0,0.0
teus,0.0,0.0,5.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0,4.0
yasset,0.0,0.0,0.0,0.0,4.0,1.0,2.0,4.0,0.0,5.0,0.0


In [164]:
from sklearn.decomposition import NMF

In [165]:
model = NMF(n_components=2, alpha=0.001, beta=0.001, init='random', max_iter=2000, random_state=0)

In [166]:
nP = model.fit_transform(wine_ratings_pivoted)
nP

array([[ 1.24516062,  0.        ],
       [ 1.54143407,  0.24982783],
       [ 1.23788213,  0.        ],
       [ 0.        ,  1.99782926],
       [ 1.38679969,  0.        ],
       [ 0.48926225,  1.5934595 ],
       [ 1.17777229,  0.90391384],
       [ 0.81387229,  0.54025351]])

In [167]:
nQ = model.components_.T
nQ

array([[ 0.        ,  1.03745842],
       [ 1.15564732,  0.        ],
       [ 1.57291057,  1.23823875],
       [ 0.70270563,  0.        ],
       [ 2.32997139,  1.93727486],
       [ 2.81835122,  0.        ],
       [ 1.51105048,  0.12773221],
       [ 1.0535121 ,  0.        ],
       [ 0.        ,  1.86493052],
       [ 0.32945149,  1.06400437],
       [ 0.        ,  1.766219  ]])

In [168]:
import numpy as np

nR = np.dot(nP, nQ.T)
nR

array([[ 0.        ,  1.43896653,  1.95852629,  0.87498137,  2.90118862,
         3.50929994,  1.88150055,  1.31179177,  0.        ,  0.41022002,
         0.        ],
       [ 0.25918598,  1.78135416,  2.73388444,  1.0831744 ,  4.07548246,
         4.34430259,  2.36109576,  1.62391944,  0.46591154,  0.77364564,
         0.44125065],
       [ 0.        ,  1.43055517,  1.94707789,  0.86986674,  2.88422996,
         3.48878662,  1.8705024 ,  1.3041238 ,  0.        ,  0.40782211,
         0.        ],
       [ 2.07266478,  0.        ,  2.4737896 ,  0.        ,  3.87034439,
         0.        ,  0.25518715,  0.        ,  3.72581276,  2.12569905,
         3.528604  ],
       [ 0.        ,  1.60265135,  2.18131189,  0.97451195,  3.23120361,
         3.90848859,  2.09552434,  1.46101025,  0.        ,  0.45688322,
         0.        ],
       [ 1.65314798,  0.56541461,  2.74264907,  0.34380734,  4.22693608,
         1.37891286,  0.94283607,  0.5154437 ,  2.97169126,  1.85663605,
         2.814