In [2]:
# 非方阵的奇异值分解

# Singular-value decomposition
from numpy import array
from scipy.linalg import svd
# define a matrix
A = array([[1, 2], [3, 4], [5, 6]])
print(A)

# SVD
U, s, VT = svd(A)
print("SVD result: ")
print(U)
print(s)
print(VT)

[[1 2]
 [3 4]
 [5 6]]
SVD result: 
[[-0.2298477   0.88346102  0.40824829]
 [-0.52474482  0.24078249 -0.81649658]
 [-0.81964194 -0.40189603  0.40824829]]
[9.52551809 0.51430058]
[[-0.61962948 -0.78489445]
 [-0.78489445  0.61962948]]


In [5]:
# SVD 的还原  ==> 行数 大于 列数
# 注意：s 不是方阵，而是 m*n，只是 前n 行 方阵是对角阵，后面都是零；
# U，VT 都是方阵；

from numpy import array
from numpy import diag
from numpy import dot
from numpy import zeros
from scipy.linalg import svd
# define a matrix
A = array([[1, 2], [3, 4], [5, 6]])
print(A)
# Singular-value decomposition
U, s, VT = svd(A)


# 假设 m > n，因为 s 是 n 个，所以 Sigma 有很多全零行，最开始的 n * n 是 对角阵；
# create m x n Sigma matrix
Sigma = zeros((A.shape[0], A.shape[1]))
# populate Sigma with n x n diagonal matrix
Sigma[:A.shape[1], :A.shape[1]] = diag(s)


# reconstruct matrix
B = U.dot(Sigma.dot(VT))
print(B)

[[1 2]
 [3 4]
 [5 6]]
[[1. 2.]
 [3. 4.]
 [5. 6.]]


In [6]:
# 对于方阵，SVD 还原很容易，因为 s 本身就是对角方阵，不需要特殊处理

# Reconstruct SVD
from numpy import array
from numpy import diag
from numpy import dot
from scipy.linalg import svd

# define a matrix
A = array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(A)

# Singular-value decomposition
U, s, VT = svd(A)
# create n x n Sigma matrix
Sigma = diag(s)
# reconstruct matrix
B = U.dot(Sigma.dot(VT))
print(B)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]


In [7]:
# psedoinverse 伪逆，由于 A 不是方阵，所以 逆矩阵没有意义，但是可以定义 伪逆（Psedoinverse）
# 但是通过 SVD 分解后，有 (V * s-1T * UT)* (U * s * VT) = [I，0]  ==> A 的 psedoinverse 为 V * s-1T * UT；


# Pseudoinverse
from numpy import array
from numpy.linalg import pinv
# define matrix
A = array([
    [0.1, 0.2],
    [0.3, 0.4],
    [0.5, 0.6],
    [0.7, 0.8]])
print(A)

# calculate pseudoinverse
B = pinv(A)
print(B)

[[0.1 0.2]
 [0.3 0.4]
 [0.5 0.6]
 [0.7 0.8]]
[[-1.00000000e+01 -5.00000000e+00  1.42385628e-14  5.00000000e+00]
 [ 8.50000000e+00  4.50000000e+00  5.00000000e-01 -3.50000000e+00]]


In [8]:
# 手工计算 psedoinverse

# Pseudoinverse via SVD
from numpy import array
from numpy.linalg import svd
from numpy import zeros
from numpy import diag
# define matrix
A = array([
    [0.1, 0.2],
    [0.3, 0.4],
    [0.5, 0.6],
    [0.7, 0.8]])

print("input: ", A)

# calculate svd
U, s, VT = svd(A)

# reciprocals of s
d = 1.0 / s

# create m x n D matrix
D = zeros(A.shape)
# populate D with n x n diagonal matrix
D[:A.shape[1], :A.shape[1]] = diag(d)

# calculate pseudoinverse
B = VT.T.dot(D.T).dot(U.T)

print("psedoinverse: ", B)

input:  [[0.1 0.2]
 [0.3 0.4]
 [0.5 0.6]
 [0.7 0.8]]
psedoinverse:  [[-1.00000000e+01 -5.00000000e+00  1.42578328e-14  5.00000000e+00]
 [ 8.50000000e+00  4.50000000e+00  5.00000000e-01 -3.50000000e+00]]


In [10]:
# SVD for Dimensionality Reduction

# Data with a large number of features, such as more features (columns) than observations (rows) may be reduced to a smaller subset of features that are most relevant to the prediction problem.

# In natural language processing, this approach can be used on matrices of word occurrences or word frequencies in documents and is called Latent Semantic Analysis or Latent Semantic Indexing.

# 通过 SVD 对数据进行压缩，取 top k largest singluar value, 也即：
# s只保留两列，那么，根据矩阵乘法要求，VT 只保留 两行 ；
# U 还是方阵，s 的行 保持不变；

# 列 大于 行 的情况: 3行， 10列 

from numpy import array
from numpy import diag
from numpy import zeros
from scipy.linalg import svd

# define a matrix
A = array([
    [1,2,3,4,5,6,7,8,9,10],
    [11,12,13,14,15,16,17,18,19,20],
    [21,22,23,24,25,26,27,28,29,30]])
print("input:", A)

# Singular-value decomposition
U, s, VT = svd(A)

# create m x n Sigma matrix
Sigma = zeros((A.shape[0], A.shape[1]))
# populate Sigma with n x n diagonal matrix
Sigma[:A.shape[0], :A.shape[0]] = diag(s)

# 取前两项，select
n_elements = 2
Sigma = Sigma[:, :n_elements]
VT = VT[:n_elements, :]

# reconstruct
B = U.dot(Sigma.dot(VT))
print("output: ", B)


# transform: This is a dense summary of the matrix or a projection.
T = U.dot(Sigma)
print("output2: ", T)

# A = U * s * VT; 带入 U * s * VT * (VT)T = U * s = T  
T = A.dot(VT.T)
print("output3: ", T)


input: [[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27 28 29 30]]
output:  [[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
 [11. 12. 13. 14. 15. 16. 17. 18. 19. 20.]
 [21. 22. 23. 24. 25. 26. 27. 28. 29. 30.]]
output2:  [[-18.52157747   6.47697214]
 [-49.81310011   1.91182038]
 [-81.10462276  -2.65333138]]
output3:  [[-18.52157747   6.47697214]
 [-49.81310011   1.91182038]
 [-81.10462276  -2.65333138]]


In [11]:
from numpy import array
from sklearn.decomposition import TruncatedSVD
# define array
A = array([
    [1,2,3,4,5,6,7,8,9,10],
    [11,12,13,14,15,16,17,18,19,20],
    [21,22,23,24,25,26,27,28,29,30]])
print("input: ", A)

# svd: 指定需要的 top k largest singular values in Sigma，此处是 2
svd = TruncatedSVD(n_components=2)
svd.fit(A)
result = svd.transform(A)
print("out: ", result)

input:  [[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27 28 29 30]]
out:  [[18.52157747  6.47697214]
 [49.81310011  1.91182038]
 [81.10462276 -2.65333138]]
