## 1-1-1. 數值型數據

In [1]:
from sklearn.preprocessing import StandardScaler
import numpy as np

X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])
# 建立標準化縮放器並進行擬合
scaler = StandardScaler().fit(X_train)
scaler.mean_    # 擬合後的平均值

array([1.        , 0.        , 0.33333333])

In [2]:
scaler.scale_   # 擬合後的標準差

array([0.81649658, 0.81649658, 1.24721913])

In [3]:
X_train_std = scaler.transform(X_train)    # 標準化轉換
X_train_std

array([[ 0.        , -1.22474487,  1.33630621],
       [ 1.22474487,  0.        , -0.26726124],
       [-1.22474487,  1.22474487, -1.06904497]])

In [4]:
print("Mean:", X_train_std.mean())
print("Standard deviation:", X_train_std.std())

Mean: 4.9343245538895844e-17
Standard deviation: 1.0


In [5]:
X_test = [[-1., 1., 0.]]
scaler.transform(X_test)    # 轉換其它數據

array([[-2.44948974,  1.22474487, -0.26726124]])

In [6]:
from sklearn.preprocessing import MinMaxScaler

X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])
# 建立最小最大縮放器
min_max_scaler = MinMaxScaler()
# 進行擬合後直接轉換
X_train_minmax = min_max_scaler.fit_transform(X_train)
X_train_minmax

array([[0.5       , 0.        , 1.        ],
       [1.        , 0.5       , 0.33333333],
       [0.        , 1.        , 0.        ]])

In [7]:
# 縮放區間改為 [1, 10]
X_train_minmax = MinMaxScaler(feature_range=[1, 10]).fit_transform(X_train)
X_train_minmax

array([[ 5.5,  1. , 10. ],
       [10. ,  5.5,  4. ],
       [ 1. , 10. ,  1. ]])

In [8]:
from sklearn.preprocessing import RobustScaler

X_train = np.array([[ 1., -2.,  2.],
                    [ -2.,  1.,  3.],
                    [ 4.,  1., -2.]])
# 建立縮放器
scale = RobustScaler().fit(X_train)
scale.transform(X_train)

array([[ 0. , -2. ,  0. ],
       [-1. ,  0. ,  0.4],
       [ 1. ,  0. , -1.6]])

In [9]:
from sklearn.preprocessing import Normalizer

X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])
# 建立正規化縮放器
norm = Normalizer(norm='l2').fit(X_train)
norm.transform(X_train)

array([[ 0.40824829, -0.40824829,  0.81649658],
       [ 1.        ,  0.        ,  0.        ],
       [ 0.        ,  0.70710678, -0.70710678]])

In [10]:
# l1 正規化調整樣本特徵值，其總和為 1
norm = Normalizer(norm='l1').fit(X_train)
norm.transform(X_train)

array([[ 0.25, -0.25,  0.5 ],
       [ 1.  ,  0.  ,  0.  ],
       [ 0.  ,  0.5 , -0.5 ]])