# 示例：基于SVD的图像压缩

In [1]:
import numpy as np
from numpy import linalg as la

## 图像压缩函数

In [2]:
# 打印矩阵
def printMat(inMat, thresh=0.8):
    for i in range(32):
        for k in range(32):
            # 由于矩阵包含了浮点数，必须定义为深色和浅色
            if float(inMat[i,k]) > thresh:
                print(1, end=''),
            else: print(0, end=''),
        print('') 

# 实现图像压缩
# numSV：奇异值数目, thresh：阈值
def imgCompress(numSV=3, thresh=0.8):
    # 初始化一个列表，存储图像数据
    myl = []
    # 读取文件
    for line in open('0_5.txt').readlines():
        newRow = []
        for i in range(32):
            newRow.append(int(line[i]))
        myl.append(newRow)
    # 得到图像矩阵
    myMat = np.mat(myl)
    print("****original matrix******")
    # 打印原始图像矩阵
    printMat(myMat, thresh)
    # 进行SVD分解
    U,Sigma,VT = la.svd(myMat)
    # 初始化重构的图像矩阵
    SigRecon = np.mat(np.zeros((numSV, numSV)))
    # 得到奇异值矩阵
    for k in range(numSV):
        SigRecon[k,k] = Sigma[k]
    # 重构图像矩阵
    reconMat = U[:,:numSV]*SigRecon*VT[:numSV,:]
    print("****reconstructed matrix using %d singular values******" % numSV)
    # 打印压缩之后的矩阵
    printMat(reconMat, thresh)

In [3]:
imgCompress(2)

****original matrix******
00000000000000110000000000000000
00000000000011111100000000000000
00000000000111111110000000000000
00000000001111111111000000000000
00000000111111111111100000000000
00000001111111111111110000000000
00000000111111111111111000000000
00000000111111100001111100000000
00000001111111000001111100000000
00000011111100000000111100000000
00000011111100000000111110000000
00000011111100000000011110000000
00000011111100000000011110000000
00000001111110000000001111000000
00000011111110000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000001111100000000011111000000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000011111000000
00000000111110000000111111000000
00000000111111000001111110000000
00000000011111111111111110000000
00000000001111111111111110000000
00000000001111111111111110000000
00000000000111111

# 总结
&emsp;&emsp;利用SVD来逼近矩阵并从中提取重要特征。通过保留矩阵80%-90%的能量，就可以得到重要的特征并去掉噪声。在大规模数据集上，可以通过离线方式来进行SVD分解和相似度计算。