In [None]:
# 행렬분해로 평점 데이터(Full Matrix) 채우기

import numpy as np

R = np.array([[4, np.NaN, 4, 2, 5],
              [np.NaN, 4, np.NaN, 1, 2],
              [np.NaN, 1, 5, 1, np.NaN],
              [1, 5, np.NaN, np.NaN, 3]])

# 정확도(RMSE)를 계산하는 함수
def RMSE(y_true, y_pred):
  return np.sqrt(np.mean((np.array(y_true) - np.array(y_pred))**2))


In [None]:
num_users, num_movies = R.shape   # R.shape : (높이, 길이) (4,5)
K = 3

P = np.random.normal(size=(num_users,K)) # 정규분포로 랜덤한 값을 size의 행렬에 채움
Q = np.random.normal(size=(K, num_movies))

print("P", P)
print("Q", Q)

print("PxQ", np.dot(P, Q))
print("R", R)

print("user_3-movie_2", np.dot(P[2, :], Q[:, 1]))

P [[ 1.84894671  1.14504517 -2.10025223]
 [ 0.13803113 -0.25617012 -0.34600858]
 [ 1.40907631 -0.82637433  1.67461942]
 [ 0.57279853  0.72934767  0.95350536]]
Q [[-0.03477965  1.49299407 -0.17056543  0.52393427 -0.44699448]
 [ 0.64701892  0.21145197 -0.22304727  0.11930783  0.41766169]
 [-0.56840224 -0.15106505 -0.89750905 -0.18942495  0.42843632]]
PxQ [[ 1.87034825  3.31986324  1.31422979  1.50317956 -1.24805182]
 [ 0.02612446  0.20418178  0.34414054  0.10729879 -0.31693424]
 [-1.53554445  1.67602762 -1.55900526  0.32245575 -0.25752646]
 [-0.08999457  0.86536548 -1.11615832  0.20650797  0.45709912]]
R [[ 4. nan  4.  2.  5.]
 [nan  4. nan  1.  2.]
 [nan  1.  5.  1. nan]
 [ 1.  5. nan nan  3.]]
user_3-movie_2 1.676027624241433


In [None]:
def mf_rmse(R, PQ):
  rows, columns = R.shape

  R_non_zeros = []
  PQ_non_zeros = []

  for i in range(rows):
    for j in range(columns):
      if ~(np.isnan(R[i, j])):
        R_non_zeros.append(R[i, j])
        PQ_non_zeros.append(PQ[i, j])

  return RMSE(R_non_zeros, PQ_non_zeros)


print("RMSE:", mf_rmse(R, np.dot(P, Q)))


RMSE: 3.285087062566014


In [None]:
learning_rate = 0.005
r_rate = 0.005


for epoch in range(500): #학습 반복 횟수
  for i in range(num_users):
    for j in range(num_movies):
      
      true_r = R[i, j]
      if np.isnan(true_r):
        continue
      
      pred_r = np.dot(P[i, :], Q[:, j]) # 예측 평점
      error = true_r - pred_r   # 오차 e

      # SGD 적용, P, Q 갱신
      P[i, :] = P[i, :] + learning_rate * (error * Q[:, j] - r_rate*P[i, :])
      Q[:, j] = Q[:, j] + learning_rate * (error * P[i, :] - r_rate*Q[:, j])
    
  print("RMSE : ", epoch, mf_rmse(R, np.dot(P, Q)))





RMSE :  0 3.191105853912705
RMSE :  1 3.1007452373010995
RMSE :  2 3.0133183239024603
RMSE :  3 2.9282783283470506
RMSE :  4 2.8451943902981207
RMSE :  5 2.763732650547518
RMSE :  6 2.683641104256877
RMSE :  7 2.6047371760804494
RMSE :  8 2.526897260533645
RMSE :  9 2.450047687308397
RMSE :  10 2.3741567313001943
RMSE :  11 2.299227407427839
RMSE :  12 2.2252908814290047
RMSE :  13 2.152400396463741
RMSE :  14 2.0806256659905364
RMSE :  15 2.0100477190642123
RMSE :  16 1.9407542072725044
RMSE :  17 1.8728351949677293
RMSE :  18 1.806379458133989
RMSE :  19 1.7414713140133582
RMSE :  20 1.678187995341523
RMSE :  21 1.6165975715031295
RMSE :  22 1.5567574057684657
RMSE :  23 1.498713124468887
RMSE :  24 1.442498061694629
RMSE :  25 1.3881331327391084
RMSE :  26 1.3356270816421814
RMSE :  27 1.284977043083548
RMSE :  28 1.236169356575152
RMSE :  29 1.189180571223146
RMSE :  30 1.1439785819520134
RMSE :  31 1.1005238425864143
RMSE :  32 1.0587706071037202
RMSE :  33 1.018668157228171
RMSE 

In [None]:
print("R", R)
print("PQ", np.dot(P,Q))

R [[ 4. nan  4.  2.  5.]
 [nan  4. nan  1.  2.]
 [nan  1.  5.  1. nan]
 [ 1.  5. nan nan  3.]]
PQ [[ 3.9937884   7.89835716  3.9969093   1.96144776  4.98264385]
 [ 1.16680552  3.95022951  2.92082969  1.08465121  2.04290069]
 [-1.82321004  1.00798375  4.99104699  0.9736735  -1.18794707]
 [ 1.00402171  5.00420904  2.04812696  0.93561362  2.97930923]]
