In [1]:
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelBinarizer
from sklearn.decomposition import PCA
from sklearn.datasets import load_digits
# 加载数字图像数据集
digits = load_digits()

from sklearn.base import BaseEstimator, TransformerMixin
# 自定义转换器。
class Data_Value_Selector(BaseEstimator, TransformerMixin):
    def __init__(self, Data_or_Value):
        self.Data_or_Value = Data_or_Value
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[self.Data_or_Value]

from sklearn.base import TransformerMixin     # 可以让类得到fit、transform、fit_transform
class MyLabelBinarizer(TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.encoder = LabelBinarizer(*args, **kwargs)
    def fit(self, x, y=0):
        self.encoder.fit(x)
        return self
    def transform(self, x, y=0):
        return self.encoder.transform(x)

    
# 对数据进行归一化
data_pipeline = Pipeline([
    ("selector", Data_Value_Selector("data")),
    ("mms_scaler", MinMaxScaler()),
    ("reduce_dim", PCA(n_components=0.95)),
])

# 对标签进行编码
target_pipeline = Pipeline([
    ("selector", Data_Value_Selector("target")),
    ("LB", MyLabelBinarizer()),
])


# 将对数据和标签的操作合在一起
full_pipeline = FeatureUnion(transformer_list=[
    ("data_pipeline", data_pipeline),
    ("target_pipeline", target_pipeline),
    ]) 
# 处理书据
digits_preprocessed = full_pipeline.fit_transform(digits)
# 打印出处理后的数据
print("data")
data = digits_preprocessed[:, :30]
print(data)
print("\ntarget")
target = digits_preprocessed[:, 30:]
print(target)


data
[[ 6.11373907e-02  1.37811679e+00 -5.38755428e-01 ... -3.98974797e-02
  -8.94987928e-02 -1.46056457e-01]
 [ 3.75737245e-01 -1.35466438e+00  2.26963701e-01 ... -7.16229078e-02
   1.03928293e-01 -4.78094629e-02]
 [ 3.70527050e-01 -6.75975417e-01  1.68658225e-01 ...  1.44700236e-01
   2.34921622e-02  8.47645586e-05]
 ...
 [ 6.13133843e-01 -5.21929774e-01  3.53804168e-01 ...  1.91234380e-02
   3.30240950e-01  2.26333300e-01]
 [-2.30329063e-01  8.25274547e-01 -5.98480322e-01 ...  8.82088443e-02
  -5.33289972e-02  8.52922499e-02]
 [-2.29021935e-02  3.48186674e-01  7.03695228e-01 ...  3.41370920e-02
   1.93202541e-01  3.69251250e-01]]

target
[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 1. 0.]]
