In [41]:
%matplotlib inline

import scipy as sci
import pandas as pd
import numpy as np
import matplotlib as mp
import matplotlib.pylab as plt

userheader = ['user_id', 'gender', 'age', 'occupation', 'zipcode'] 
movieheader  = ['movie_id', 'title', 'genres']
ratingheader = ['user_id', 'movie_id', 'rating','timestamp']

users_all  = pd.read_table(u'data/users.dat', sep='::', header=None, names=userheader, engine = 'python')
movies  = pd.read_table(u'data/movies.dat', sep='::', header=None, names=movieheader, engine = 'python')
ratings_all = pd.read_table(u'data/ratings.dat', sep='::', header=None, names=ratingheader, engine = 'python')

n = 20 #number of users to find SGD for
users = users_all.loc[(users_all.user_id <= n)]
ratings = ratings_all.loc[(ratings_all.user_id <= n)]
R = np.matrix(ratings)

Nk = 2
Nr = n 
Nc = (np.matrix(movies.movie_id).T)[-1,0]
print 'Row count: {}, Column Count: {}'.format(Nr, Nc)

R

Row count: 20, Column Count: 3952


matrix([[         1,       1193,          5,  978300760],
        [         1,        661,          3,  978302109],
        [         1,        914,          3,  978301968],
        ..., 
        [        20,        110,          5,  978143508],
        [        20,       2028,          4,  978143106],
        [        20,       1240,          4, 1009669181]], dtype=int64)

#### User x Ratings Matrix:

In [42]:
Y = np.matrix(np.zeros((Nr,Nc)))
N = R.shape[0]
for k in range(N):
    i = R[k,0]
    j = R[k,1]
    Y[i-1,j-1] = R[i,2]
Y    


matrix([[ 3.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 5.,  5.,  0., ...,  0.,  0.,  0.],
        [ 4.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

#### Mask Matrix:

In [43]:
M = Y.copy()
M[M>1] = 1
M

matrix([[ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 1.,  1.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [47]:
Ys = np.mat(Y)
Ysize = Ys.shape[0]


A = np.mat(np.random.rand(Nr, Nk))
B = np.mat(np.random.rand(Nk, Nc))


EPOCH = 100
Eta = 0.1
eta = Eta

for i in range(EPOCH):

    E = np.array(M)*np.array(Y - A*B)
    Err = np.sum(E*E)/np.sum(np.array(M))
    #print(Err)

    for k in range(Ysize):
              
        u = Ys[k,0] - 1
        m = Ys[k,1] - 1
        
        err = Ys[k,2] - A[u,:]*B[:,m]
        
        temp_A = A[u,:] + eta*err[0,0]*B[:,m].T
        B[:,m]   = B[:,m] + eta*err[0,0]*A[u,:].T
        A[u,:]   = temp_A
    
    eta = Eta*1./(i+1)


result = A*B
print result


[[ 0.54364425  0.31433232  0.14092803 ...,  0.07366911  0.60907579
   0.12888608]
 [ 0.42866623  0.23479491  0.29339468 ...,  0.13265168  0.51666792
  -0.14334124]
 [ 0.99319179  0.56688466  0.36039354 ...,  0.17669342  1.13328976
   0.09712894]
 ..., 
 [ 0.77577784  0.45300513  0.13892555 ...,  0.07968991  0.85672837
   0.26748539]
 [ 0.45113246  0.25916817  0.14031796 ...,  0.07069363  0.51009798
   0.0755428 ]
 [ 0.15352263  0.08648196  0.07168028 ...,  0.03384633  0.17836874
  -0.0064528 ]]




In [51]:
for i in range (20):
    
    userResult = [(0,0)]
    for m in range (Nc):
        movieTuple = (result[i,m],m)
        userResult.append(movieTuple)
        
    userResult = sorted(userResult, key=lambda tup: tup[0], reverse=True)     
    
    print("Movie for user " , i , " : ")
    for n in range(10):
        print (userResult[n])

('Movie for user ', 0, ' : ')
(0.81766601576455966, 2931)
(0.81497843257057978, 780)
(0.81361399464638073, 2498)
(0.80902749724478951, 179)
(0.80862313380509754, 2433)
(0.80720316988792074, 3722)
(0.80694159119382369, 503)
(0.8054380526391689, 2160)
(0.80529974836239437, 1020)
(0.80083598047332472, 567)
('Movie for user ', 1, ' : ')
(0.70327568070987301, 3722)
(0.69997068201194668, 567)
(0.69953783881305531, 170)
(0.69949799537496549, 1902)
(0.69803546212928336, 529)
(0.69731105752037603, 2498)
(0.69710293902352372, 179)
(0.69709115802050348, 135)
(0.69707785625375684, 3437)
(0.69668208152497069, 3882)
('Movie for user ', 2, ' : ')
(1.5178984529404977, 2498)
(1.515294278446556, 2931)
(1.5142819784690231, 780)
(1.5124092390602621, 3722)
(1.5114440289493301, 179)
(1.5043391956169083, 2433)
(1.5030039211873434, 2160)
(1.5020577648532214, 1020)
(1.5017457122155533, 567)
(1.496042766326982, 1526)
('Movie for user ', 3, ' : ')
(0.58780463382434389, 2931)
(0.58604145238979588, 780)
(0.5853220