# 利用Python实现SVD

In [1]:
import numpy as np

## 利用linalg的线性代数工具想，进行SVD处理

In [2]:
U, Sigma, VT = np.linalg.svd([[1, 1], [7, 7]])

In [3]:
U

array([[-0.14142136, -0.98994949],
       [-0.98994949,  0.14142136]])

In [4]:
Sigma

array([10.,  0.])

仅返回对角元素

In [5]:
VT

array([[-0.70710678, -0.70710678],
       [-0.70710678,  0.70710678]])

### 在一个更大的数据集上进行更多的分解

In [6]:
def loadExData():
    return[[1, 1, 1, 0, 0],
           [2, 2, 2, 0, 0],
           [1, 1, 1, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 0, 2, 2],
           [0, 0, 0, 3, 3],
           [0, 0, 0, 1, 1]]

In [7]:
Data = loadExData()

In [8]:
U, Sigma, VT = np.linalg.svd(Data)

In [9]:
Sigma

array([9.72140007e+00, 5.29397912e+00, 6.84226362e-01, 1.50962387e-15,
       1.15387192e-31])

### 重构原始矩阵

In [10]:
Sig3 = np.mat([[Sigma[0], 0, 0], [0, Sigma[1], 0],[0, 0, Sigma[2]]])

In [12]:
U[:,:3] * Sig3 * VT[:3, :]

matrix([[ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,
         -2.84366098e-16, -2.94015497e-16],
        [ 2.00000000e+00,  2.00000000e+00,  2.00000000e+00,
          4.47489534e-16,  4.28190736e-16],
        [ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,
          3.09573758e-16,  2.99924358e-16],
        [ 5.00000000e+00,  5.00000000e+00,  5.00000000e+00,
         -1.47703573e-16, -1.95842150e-16],
        [ 1.00000000e+00,  1.00000000e+00, -5.70229711e-16,
          2.00000000e+00,  2.00000000e+00],
        [-7.49390630e-17,  9.96896569e-16, -1.34350906e-15,
          3.00000000e+00,  3.00000000e+00],
        [-8.18314124e-17,  2.75447132e-16, -3.13743829e-16,
          1.00000000e+00,  1.00000000e+00]])

## 基于协同过滤的推荐引擎

### 相似度计算

In [20]:
# 欧式距离
def ecludSim(inA,inB):
    return 1.0 / (1.0 + np.linalg.norm(inA - inB))

# 皮尔逊相关系数
def pearsSim(inA,inB):
    if len(inA) < 3 : return 1.0
    return 0.5 + 0.5 * np.corrcoef(inA, inB, rowvar = 0)[0][1]

# 余弦相似度
def cosSim(inA,inB):
    num = float(inA.T*inB)
    denom = np.linalg.norm(inA) * np.linalg.norm(inB)
    return 0.5+0.5*(num/denom)

测试欧式距离

In [21]:
myMat = np.mat(loadExData())

In [22]:
ecludSim(myMat[:, 0], myMat[:, 4])

0.13367660240019172

In [23]:
ecludSim(myMat[:, 0], myMat[:, 0])

1.0

测试余弦相似度

In [24]:
cosSim(myMat[:, 0], myMat[:, 4])

0.5472455591261534

In [26]:
cosSim(myMat[:, 0], myMat[:, 0])

0.9999999999999999

测试皮尔逊相关系数

In [27]:
pearsSim(myMat[:, 0], myMat[:, 4])

0.23768619407595815

In [28]:
pearsSim(myMat[:, 0], myMat[:, 0])

1.0