In [50]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display

import seaborn as sns

sns.set_style("whitegrid")

%matplotlib inline

from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_boston

from sklearn.model_selection import train_test_split

from sklearn.pipeline import make_pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import GridSearchCV

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestClassifier

In [24]:
boston = load_boston()

In [25]:
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=0)

In [26]:
pipe = make_pipeline(StandardScaler(), PolynomialFeatures(), Ridge())

In [45]:
param_grid = {
    'polynomialfeatures__degree': list(range(1, 6, 1)),
    'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]
}

In [46]:
grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, verbose=1, n_jobs=1)

In [52]:
%time
grid.fit(X_train, y_train)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:   12.3s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('polynomialfeatures', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('ridge', Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001))]),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'polynomialfeatures__degree': [1, 2, 3, 4, 5], 'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1)

In [48]:
grid.cv_results_['mean_test_score']

array([  7.21693532e-01,   7.21696998e-01,   7.21731264e-01,
         7.22037796e-01,   7.23071429e-01,   7.07721505e-01,
         7.38643533e-01,   7.37567941e-01,   7.44131805e-01,
         7.76821372e-01,   8.21037777e-01,   8.06300643e-01,
        -1.81613964e+02,  -4.92238780e+01,  -3.07615973e+00,
         2.76564395e-01,   6.33627107e-01,   5.87806623e-01,
        -2.84980362e+02,  -2.79733810e+02,  -2.17132538e+02,
        -1.14940302e+01,  -5.59830964e-01,  -2.85967675e+00,
        -2.50364265e+04,  -2.50402522e+04,  -2.48536853e+04,
        -2.11498632e+04,  -6.14718719e+03,  -6.74731419e+02])

In [49]:
print("Best params:\n{}\n".format(grid.best_params_))
print("Best validate score: {:.2f}".format(grid.best_score_))
print("Test score: {:.2f}".format(grid.score(X_test, y_test)))

Best params:
{'polynomialfeatures__degree': 2, 'ridge__alpha': 10}

Best validate score: 0.82
Test score: 0.77
