* 均值移除
$ \frac{原始样本-样本均值}{样本标准差} $

In [None]:
# 均值移除
from __future__ import unicode_literals
import numpy as np
raw_samples = np.array([
    [3, -1.5, 2, -5.4],
    [0, 4, -0.3, 2.1],
    [1, 3.3, -1.9, -4.3]
])
print(raw_samples)

In [4]:
# 求每一列的平均值
print(raw_samples.mean(axis=0))

[ 1.33333333  1.93333333 -0.06666667 -2.53333333]


In [5]:
# 求每一列的标准差
print(raw_samples.std(axis=0))

[1.24721913 2.44449495 1.60069429 3.30689515]


In [10]:
std_samples = raw_samples.copy()
for col in std_samples.T:
    col_mean = col.mean()
    col_std = col.std()
    col -= col_mean
    col /= col_std
    
print('均值移除后的矩阵：\n',std_samples, end = '\n')
print('均值移除后的矩阵的均值\n', std_samples.mean(axis=0), end = '\n')
print('均值移除后的矩阵的标准差\n', std_samples.std(axis=0), end = '\n')

均值移除后的矩阵：
 [[ 1.33630621 -1.40451644  1.29110641 -0.86687558]
 [-1.06904497  0.84543708 -0.14577008  1.40111286]
 [-0.26726124  0.55907936 -1.14533633 -0.53423728]]
均值移除后的矩阵的均值
 [ 5.55111512e-17 -1.11022302e-16 -7.40148683e-17 -7.40148683e-17]
均值移除后的矩阵的标准差
 [1. 1. 1. 1.]


# 使用sklearn

In [7]:
import sklearn.preprocessing as sp

In [11]:
std_samples = sp.scale(raw_samples)    # 返回均值移除后的样本矩阵
print('sklearn-均值移除后的矩阵：\n',std_samples, end = '\n')
print('sklearn-均值移除后的矩阵的均值\n', std_samples.mean(axis=0), end = '\n')
print('sklearn-均值移除后的矩阵的标准差\n', std_samples.std(axis=0), end = '\n')

sklearn-均值移除后的矩阵：
 [[ 1.33630621 -1.40451644  1.29110641 -0.86687558]
 [-1.06904497  0.84543708 -0.14577008  1.40111286]
 [-0.26726124  0.55907936 -1.14533633 -0.53423728]]
sklearn-均值移除后的矩阵的均值
 [ 5.55111512e-17 -1.11022302e-16 -7.40148683e-17 -7.40148683e-17]
sklearn-均值移除后的矩阵的标准差
 [1. 1. 1. 1.]


# 数值缩放

In [18]:
mms_samples = raw_samples.copy()
for col in mms_samples.T:
    col_min = col.min()
    col_max = col.max()
    a = np.array([
        [col_min, 1], 
        [col_max, 0]
    ])*-
    b = np.array([0, 1])
    x = np.linalg.solve(a, b)   # 解线性方程组
    col *= x[0]
    col += x[1]
print(mms_samples)

[[1.         0.         1.95       0.        ]
 [0.         1.375      0.8        3.57142857]
 [0.33333333 1.2        0.         0.52380952]]


* 使用sklearn

In [15]:
mms = sp.MinMaxScaler(feature_range=(0, 1))
mms_samples = mms.fit_transform(raw_samples)
print(mms_samples)

[[1.         0.         1.         0.        ]
 [0.         1.         0.41025641 1.        ]
 [0.33333333 0.87272727 0.         0.14666667]]
