# implement Gradient Descent Method

1. the focus is achieving random gradient descent method.

In [1]:
import numpy as np

In [2]:
R = np.array([[4,0,1,0,5],
     [1,2,1,3,5],
     [4,5,3,1,0],
     [2,3,0,2,5],
     [5,1,4,0,0],
     [0,3,2,4,1]])
# 为0的地方并不是评分为0，而是用户并没有对该物品进行评价。没有评分的地方并不用考虑它的误差。
R.shape

(6, 5)

In [3]:
"""_summary_
输入
R 是m*n的评分矩阵
K 隐性特征向量维度。注意这个特征维度是人为定义的。但是按道理这不应该有人为定义。
steps/max_iter 最大迭代步长
alpha 步长
lamda 正则化系数

输出
分解之后的P和Q
P 初始化用户特征矩阵m*k
Q 初始化物品特征矩阵k*n
"""

# 对超参数进行赋值
K=2
max_iter = 5000 #迭代次数多意味着步长比较小。
alpha = 0.0002
lamda = 0.004

def grad(R, K=2, max_iter= 5000, alpha=0.001, lamda= 0.002, cost_threshold = 0.0001):
    m = len(R)
    n = len(R[0])
    
    P = np.random.rand(m, K)
    Q = np.random.rand(K, n)
    
    for step in range(max_iter):
        # 对所有的用户u和物品i做遍历。对对应的Pu和Qi向量进行梯度下降。
        for u in range(m):
            for i in range(n):
                # 对于每一个大于0的评分，求出评分误差。
                if R[u][i] > 0:
                    eui = np.dot(P[u, :],Q[:, i]) - R[u,i]
                    
                    # 带入梯度下降的公式，按照梯度下降算法更新当前的Pu和Qi。也就是按照K个隐藏维度来更新。
                    for k in range(K):
                        # 注意这里和公式不同的地方在于求和公式。由于求和是对i在求和，而本计算是包含在
                        # for i in range(n):当中的，就相对于每个步骤都减去了一个对于i的元素，所以不
                        # 用再求和了。
                        P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] - 2 * lamda * P[u][k])
                        # 同样的
                        Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] - 2 * lamda * Q[k][i])
                
        # u和i遍历完成。所有特征向量都更新完成。可以计算预测评分矩阵。
        # predictR = np.dot(P, Q)
        # 计算当前的损失函数。
        cost = 0
        
        for u in range(m):
            for i in range(n):
                # 在评分矩阵R中为0的不计算损失函数，原因依然是为0的评分可能是用户没有评分。
                if R[u][i] > 0:
                    cost += (np.dot(P[u, :],Q[:, i]) - R[u,i]) ** 2
                    for k in range(K):
                        cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
        # 当损失函数小于某一个特定阈值时退出。
        if cost < cost_threshold:
            break
    return P, Q, cost

In [4]:
P, Q, cost = grad(R, K, max_iter, alpha, lamda)
predictR = np.dot(P, Q)
print("origin R is {}, \n\n predict Matrix is {}, \n\n  User matrix is {}, \n\n Item matrix is {}, \n\n Cost is {}\n\n".format(R, predictR, P, Q, cost))

origin R is [[4 0 1 0 5]
 [1 2 1 3 5]
 [4 5 3 1 0]
 [2 3 0 2 5]
 [5 1 4 0 0]
 [0 3 2 4 1]], 

 predict Matrix is [[ 3.66513682 -0.18662445  1.34042208 -2.76069851  5.07189276]
 [ 1.0135124   2.90327232  1.83106037  2.29530703  4.44340054]
 [ 4.0794778   4.14886755  3.64512258  1.43689195 10.12865476]
 [ 1.5911233   2.88009496  2.04538652  1.86668497  5.24912589]
 [ 5.35723688  1.57665425  2.87331269 -2.12081265  9.31672246]
 [-1.3403608   3.11865969  1.01741102  4.16720774  1.28437307]], 

  User matrix is [[ 1.70043583 -0.81309124]
 [ 0.89391929  1.37712108]
 [ 2.51736216  1.45687872]
 [ 1.16279506  1.25236081]
 [ 2.75067542 -0.18581685]
 [-0.18445913  1.95110671]], 

 Item matrix is [[ 1.91342059  0.68554365  1.08675081 -0.63076237  3.45359251]
 [-0.50607837  1.66321732  0.62419555  2.07618468  0.98478455]], 

 Cost is 7.85094279115375




In [5]:
# 通过上面的结果观察，评分矩阵和实际的评分直接误差有点大。
# 这个时候思考可能分解的维度可能太低了，测试提高分解维度来观察结果。
K= 5
P, Q, cost = grad(R, K, max_iter, alpha, lamda)
predictR = np.dot(P, Q)
print("origin R is {}, \n\n predict Matrix is {}, \n\n  User matrix is {}, \n\n Item matrix is {}, \n\n Cost is {}\n\n".format(R, predictR, P, Q, cost))

origin R is [[4 0 1 0 5]
 [1 2 1 3 5]
 [4 5 3 1 0]
 [2 3 0 2 5]
 [5 1 4 0 0]
 [0 3 2 4 1]], 

 predict Matrix is [[3.94589918 4.47823057 1.09919227 3.62914162 5.00114935]
 [1.12420292 1.93650272 0.89602294 2.94806129 5.04755585]
 [4.06402754 4.99036173 2.96663292 0.9717752  3.64433156]
 [1.91599919 3.06972956 1.0028607  2.06891272 4.97606825]
 [4.99175272 1.00394651 4.03164376 5.58035793 4.52508018]
 [4.92718265 3.00901852 1.96494956 4.02083656 0.99979961]], 

  User matrix is [[ 0.26436002  0.6627263   1.4868938   1.31890862  0.91345764]
 [ 0.66165111  0.58992405  1.75082859 -0.05837241 -0.02501487]
 [ 1.44600016 -0.38340638  0.34524111  1.04670736  1.61505907]
 [ 0.72325549  0.13132189  1.46916957  0.68268247  0.26238478]
 [ 1.60170922  2.19796447  0.32701576  0.49657226  0.18096915]
 [-0.02103907  1.38825056 -0.02185278  0.78085771  1.60284281]], 

 Item matrix is [[ 0.76272816  0.40509458  1.55232354  0.2313212   1.5738888 ]
 [ 1.31746201 -0.38140755  0.67661845  2.1374201   0.3951

## 使用tensorflow来实现特征值分解

In [14]:

import tensorflow as tf
R_square = np.array([[4,0,1,0,5],
     [1,2,1,3,5],
     [4,5,3,1,0],
     [2,3,0,2,5],
     [5,1,4,0,0]])

In [15]:

R_tf = tf.convert_to_tensor(R_square, dtype=float)
# tensorflow进行eig必须是方阵。
# 
M, N = tf.eig(R_tf)


tf.Tensor(
[ 0.5489885-1.8442735e-08j -1.7649044-1.9651887e+00j
 -1.7649044+1.9651892e+00j  2.9616973-3.0399860e-08j
 11.019123 +6.4537744e-09j], shape=(5,), dtype=complex64) tf.Tensor(
[[-0.50880915+0.00434304j  0.28622308-0.30314255j -0.38530356+0.15924889j
  -0.56747574+0.20958522j  0.35919613-0.00087918j]
 [ 0.20886296-0.00178275j  0.05577823-0.29754078j -0.29786214-0.05403553j
   0.42365906-0.15646952j  0.4714589 -0.00115396j]
 [ 0.61511016-0.0052503j  -0.1284342 +0.6106717j   0.61630976+0.09786545j
   0.6015964 -0.2221869j   0.530147  -0.0012976j ]
 [-0.5167199 +0.00441057j  0.38827828+0.04287843j -0.09845588+0.37802777j
   0.12856518-0.04748289j  0.4572356 -0.00111915j]
 [ 0.22815934-0.00194746j -0.42346954+0.11488671j  0.25839487-0.35462296j
  -0.00247677+0.00091478j  0.39821923-0.00097469j]], shape=(5, 5), dtype=complex64)


In [16]:
print(M)

tf.Tensor(
[ 0.5489885-1.8442735e-08j -1.7649044-1.9651887e+00j
 -1.7649044+1.9651892e+00j  2.9616973-3.0399860e-08j
 11.019123 +6.4537744e-09j], shape=(5,), dtype=complex64)


In [17]:
print(N)

tf.Tensor(
[[-0.50880915+0.00434304j  0.28622308-0.30314255j -0.38530356+0.15924889j
  -0.56747574+0.20958522j  0.35919613-0.00087918j]
 [ 0.20886296-0.00178275j  0.05577823-0.29754078j -0.29786214-0.05403553j
   0.42365906-0.15646952j  0.4714589 -0.00115396j]
 [ 0.61511016-0.0052503j  -0.1284342 +0.6106717j   0.61630976+0.09786545j
   0.6015964 -0.2221869j   0.530147  -0.0012976j ]
 [-0.5167199 +0.00441057j  0.38827828+0.04287843j -0.09845588+0.37802777j
   0.12856518-0.04748289j  0.4572356 -0.00111915j]
 [ 0.22815934-0.00194746j -0.42346954+0.11488671j  0.25839487-0.35462296j
  -0.00247677+0.00091478j  0.39821923-0.00097469j]], shape=(5, 5), dtype=complex64)


In [10]:
x = tf.constant([[1,2], [3,4]])
y = tf.add(x, 1)
print(y)

tf.Tensor(
[[2 3]
 [4 5]], shape=(2, 2), dtype=int32)
