In [None]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target

In [None]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1, 
                  train_size=0.8, 
                  test_size=0.2, 
                  random_state=0)

train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [None]:
from sklearn.decomposition import PCA

pca = PCA(whiten=True)
pca.fit(X_train)
X_train_pca = pca.transform(X_train)
X_test_pca  = pca.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()

In [None]:
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
clf.fit(X_train_pca, y_train)
clf.score(X_test_pca, y_test)

In [None]:
from sklearn.pipeline import Pipeline

estimators = [('pca', PCA(whiten=True)), 
              ('clf', LogisticRegression())]
pipe = Pipeline(estimators)

In [None]:
pipe.fit(X_train, y_train)

In [None]:
pipe.score(X_test, y_test)

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

estimators = [('mms', MinMaxScaler()), 
              ('clf', SVC(kernel='rbf', C=1e10))]
pipe = Pipeline(estimators)

In [None]:
pipe.fit(X_train, y_train)

In [None]:
pipe.score(X_test, y_test)

In [None]:
estimators = [('pca', PCA(whiten=True)), 
              ('clf', LogisticRegression())]
pipe = Pipeline(estimators)

In [None]:
from sklearn.model_selection import GridSearchCV

param = {'clf__C':[1e-5, 1e-3, 1e-2, 1, 1e2, 1e5, 1e10]} # clf.C

gs = GridSearchCV(pipe, param)
gs.fit(X_train, y_train)

In [None]:
gs.best_params_, gs.best_score_, gs.best_estimator_

In [None]:
gs.score(X_test, y_test)

In [None]:
from sklearn.svm import SVC

C_range = [1e-3, 1e-2, 1, 1e2, 1e3]

param = {'clf__C': C_range, 
         'clf__kernel': ['linear', 'rbf'], 
         'pca__whiten': [True, False], 
         'pca__n_components': [30, 20, 10]}

estimators = [('pca', PCA()),
              ('clf', SVC())]

pipe = Pipeline(estimators)


from sklearn.model_selection import RandomizedSearchCV

gs = RandomizedSearchCV(pipe, param, n_jobs=-1, verbose=2)
gs.fit(X_train, y_train)

In [None]:
gs.best_params_, gs.best_score_, gs.best_estimator_

In [None]:
gs.score(X_test, y_test)

In [None]:
gs.cv_results_