Grid search with cross validation
---

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(multi_class='ovr', solver='liblinear'))
])

In [None]:
from sklearn.model_selection import GridSearchCV
import numpy as np

# Create cross-validation object
grid = {
    'logreg__C': np.logspace(-4, 4, num=10)
}
grid_cv = GridSearchCV(pipe, grid, cv=5, return_train_score=True)

In [None]:
from sklearn import datasets

# Load data set
iris = datasets.load_iris()

# Create X/y arrays
X = iris['data']
y = iris['target']

# Fit estimator
grid_cv.fit(X, y)

# Get the results with "cv_results_"
grid_cv.cv_results_.keys()

In [None]:
# Standard deviation of test scores
grid_cv.cv_results_['mean_test_score']

In [None]:
import pandas as pd

# Collect results in a DataFrame
cv_results = pd.DataFrame(grid_cv.cv_results_)

# Print a few interesting columns
cols = ['mean_test_score', 'std_test_score', 'mean_train_score', 'std_train_score', 'param_logreg__C']
cv_results[cols].sort_values('mean_test_score', ascending=False)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Plot test curve
plt.semilogx(cv_results['param_logreg__C'], cv_results['mean_test_score'])
plt.legend()
plt.show()

In [None]:
# Compute predictions with the "best_estimator_" attribute
grid_cv.best_estimator_.predict(X)

# Or with the grid search object directly
grid_cv.predict(X)

In [None]:
from sklearn.linear_model import LogisticRegressionCV

# Create estimator
logreg_cv = LogisticRegressionCV(
    Cs=[0.1, 1, 10], cv=5, multi_class='ovr', solver='liblinear')

In [None]:
from sklearn.preprocessing import scale

# Fit the estimator
logreg_cv.fit(scale(X), y);

In [None]:
# Scores for class 1 (setosa)
logreg_cv.scores_[0]

In [None]:
# Scores for class 2 (versicolor)
logreg_cv.scores_[1]

In [None]:
# Mean score per C value across folds
logreg_cv.scores_[1].mean(axis=0)

In [None]:
print(logreg_cv.C_)