In [1]:
from sklearn.datasets import load_iris

iris = load_iris()
X, y = iris.data, iris.target
X[:2, :]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2]])

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2)
X_train.shape

(120, 4)

In [3]:
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression

# 產生包含標準化與 PCA 的預處理
preprocess = FeatureUnion([('std', StandardScaler()), 
                           ('pca', PCA())])
# 產生管線
pipe = Pipeline([('preprocess', preprocess), 
                 ('clf', LogisticRegression())])

In [4]:
# 產生候選參數，進行網格搜尋
import numpy as np
from sklearn.model_selection import GridSearchCV

params = [{'preprocess__pca__n_components': [1, 2, 3], 
           'clf__penalty': ['l1', 'l2'], 
           'clf__C': np.logspace(0, 4, 10)}]
model = GridSearchCV(pipe, params, cv=5, verbose=1, scoring='accuracy', n_jobs=-1)
best_model = model.fit(X_train, y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.5s finished


In [5]:
model.best_estimator_

Pipeline(memory=None,
         steps=[('preprocess',
                 FeatureUnion(n_jobs=None,
                              transformer_list=[('std',
                                                 StandardScaler(copy=True,
                                                                with_mean=True,
                                                                with_std=True)),
                                                ('pca',
                                                 PCA(copy=True,
                                                     iterated_power='auto',
                                                     n_components=3,
                                                     random_state=None,
                                                     svd_solver='auto', tol=0.0,
                                                     whiten=False))],
                              transformer_weights=None, verbose=False)),
                ('clf',
                 LogisticRe

In [6]:
print(model.best_params_)
print(model.best_score_)

{'clf__C': 2.7825594022071245, 'clf__penalty': 'l2', 'preprocess__pca__n_components': 3}
0.975


In [7]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
print('預測準確率 =', accuracy_score(y_test, y_pred))

預測準確率 = 1.0
