In [13]:
import numpy as np
# 生成数据
data = np.random.rand(3,5)*5 + 10
data

array([[14.49412114, 12.37059832, 13.11980341, 10.76157212, 14.245108  ],
       [13.22830011, 12.01749795, 11.02090057, 10.64297338, 13.79575407],
       [11.04347467, 14.39160388, 10.15996402, 10.37777733, 10.63542052]])

In [14]:
# 中心化
data_mean = np.mean(data,1)
X = data - data_mean.reshape(3,1)
# 计算协方差矩阵
cov = np.dot(X , X.T) /(X.shape[1]-1)
cov

array([[ 2.30093379,  1.7454179 , -0.30716866],
       [ 1.7454179 ,  1.85859795,  0.22450088],
       [-0.30716866,  0.22450088,  3.0533298 ]])

In [15]:
# 求特征向量和特征值
val , vec = np.linalg.eig(cov)
val , vec

(array([0.27075376, 3.84804291, 3.09406487]),
 array([[-0.65762722,  0.75332049, -0.00588872],
        [ 0.74161235,  0.64874089,  0.17072312],
        [-0.13242948, -0.10790503,  0.98530145]]))

In [16]:
# 按贡献排序
contri = val/np.sum(val)
index = np.argsort(contri)[::-1]

In [17]:
# 输出投影矩阵
W = vec[:,index]
print('投影矩阵：\n' + str(W))

投影矩阵：
[[ 0.75332049 -0.00588872 -0.65762722]
 [ 0.64874089  0.17072312  0.74161235]
 [-0.10790503  0.98530145 -0.13242948]]


# 验证

In [18]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=3)
pca.fit(data.T)

PCA(copy=True, iterated_power='auto', n_components=3, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [19]:
# 验证协方差矩阵
print('计算得到的协方差矩：\n' + str(cov))
print('PCA库函数计算协方差矩阵：\n' + str(pca.get_covariance()))


计算得到的协方差矩：
[[ 2.30093379  1.7454179  -0.30716866]
 [ 1.7454179   1.85859795  0.22450088]
 [-0.30716866  0.22450088  3.0533298 ]]
PCA库函数计算协方差矩阵：
[[ 2.30093379  1.7454179  -0.30716866]
 [ 1.7454179   1.85859795  0.22450088]
 [-0.30716866  0.22450088  3.0533298 ]]


In [20]:
# 验证特征值
print('计算得到的特征值：\n' + str(val[index]))
print('PCA库函数计算特征值：\n' + str(pca.explained_variance_))



计算得到的特征值：
[3.84804291 3.09406487 0.27075376]
PCA库函数计算特征值：
[3.84804291 3.09406487 0.27075376]


In [21]:
# 验证贡献率
print('计算得到的贡献率：\n' + str(contri[index]))
print('PCA库函数计算贡献率：\n' + str(pca.explained_variance_ratio_))

计算得到的贡献率：
[0.5334974  0.42896496 0.03753763]
PCA库函数计算贡献率：
[0.5334974  0.42896496 0.03753763]


In [22]:
# 验证投影矩阵
print('计算得到的投影矩阵：\n' + str(pca.components_.T))
print('PCA库函数计算投影矩阵：\n' + str(W))

计算得到的投影矩阵：
[[-0.75332049 -0.00588872  0.65762722]
 [-0.64874089  0.17072312 -0.74161235]
 [ 0.10790503  0.98530145  0.13242948]]
PCA库函数计算投影矩阵：
[[ 0.75332049 -0.00588872 -0.65762722]
 [ 0.64874089  0.17072312  0.74161235]
 [-0.10790503  0.98530145 -0.13242948]]


可以看到两者的计算结果相同