In [24]:
# Cross validation
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

iris = load_iris()
logreg = LogisticRegression(solver = 'lbfgs', multi_class='ovr')

scores = cross_val_score(logreg, iris.data, iris.target, cv=5)
print("Cross-validation scores: {}".format(scores))
print("Average cross-validation score: {:.3f}".format(scores.mean()))

Cross-validation scores: [0.86666667 0.96666667 0.93333333 0.93333333 1.        ]
Average cross-validation score: 0.940


In [36]:
# KFold
from sklearn.model_selection import KFold

for n in [3, 5, 7]:
    kfold = KFold(n_splits=n)
    scores = cross_val_score(logreg, iris.data, iris.target, cv=kfold)
    print("*****************n_splits = [{}]*****************\n".format(n))
    print("Cross-validation scores:\n{}".format(scores))
    print("Average: {:.2f}\n".format(scores.mean()))

*****************n_splits = [3]*****************

Cross-validation scores:
[0. 0. 0.]
Average: 0.00

*****************n_splits = [5]*****************

Cross-validation scores:
[1.         0.9        0.5        0.93333333 0.63333333]
Average: 0.79

*****************n_splits = [7]*****************

Cross-validation scores:
[1.         1.         0.81818182 0.52380952 0.95238095 0.80952381
 0.85714286]
Average: 0.85



In [39]:
# Stratify KFold
from sklearn.model_selection import KFold

for n in [3, 5, 7]:
    kfold = KFold(n_splits=n, shuffle=True, random_state=0)
    scores = cross_val_score(logreg, iris.data, iris.target, cv=kfold)
    print("*****************n_splits = [{}]*****************\n".format(n))
    print("Cross-validation scores:\n{}".format(scores))
    print("Average: {:.2f}\n".format(scores.mean()))

*****************n_splits = [3]*****************

Cross-validation scores:
[0.96 0.86 0.96]
Average: 0.93

*****************n_splits = [5]*****************

Cross-validation scores:
[0.96666667 0.83333333 0.93333333 1.         0.93333333]
Average: 0.93

*****************n_splits = [7]*****************

Cross-validation scores:
[0.95454545 0.95454545 0.77272727 0.95238095 1.         1.
 0.9047619 ]
Average: 0.93



In [41]:
# Leave One Out
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
scores = cross_val_score(logreg, iris.data, iris.target, cv=loo)
print("Number of cv iterations: {}".format(len(scores)))
print("Mean accuracy: {:.2f}".format(scores.mean()))

Number of cv iterations: 150
Mean accuracy: 0.94


In [47]:
from sklearn.model_selection import ShuffleSplit
shuffle_split = ShuffleSplit(test_size=.5, train_size=.5, n_splits=10)
scores = cross_val_score(logreg, iris.data, iris.target, cv=shuffle_split)
print("Number of cv iterations: {}".format(len(scores)))
print("Mean accuracy: {:.2f}".format(scores.mean()))

Number of cv iterations: 10
Mean accuracy: 0.92


In [59]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import GroupKFold

logreg = LogisticRegression()
X, y = make_blobs(n_samples=12, random_state=0)

groups = [0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3]
scores = cross_val_score(logreg, X, y, groups, cv=GroupKFold(n_splits=3))
print("Cross-validation scores:\n{}".format(scores))

Cross-validation scores:
[0.75       0.8        0.66666667]




In [70]:
# Grid search
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)
print("Size of training set: {}  size of test set: {}".format(X_train.shape[0], X_test.shape[0]))

best_score = 0
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train, y_train)
        score = svm.score(X_test, y_test)
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}
            
print("Best score: {:.2f}".format(best_score))
print("Best parameters: {}".format(best_parameters))

Size of training set: 112  size of test set: 38
Best score: 0.97
Best parameters: {'C': 100, 'gamma': 0.001}
