# Cross validation on a Model 

In [109]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [137]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (cross_val_score, KFold,
                                     LeaveOneOut, ShuffleSplit, 
                                    )


### K-fold cross-validation

Performing cross validation using the sklearn k-fold cross_val_score method

In [111]:
iris = load_iris()

In [112]:
pd.DataFrame(iris.data, columns=iris.feature_names)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [113]:
iris = load_iris()
X, y = iris['data'], iris['target']

log_reg = LogisticRegression(max_iter=1000)
score = cross_val_score(log_reg, X, y, cv=5)

print(f'Cross validation score : {list(score)}')
print(f'Cross validation mean : {np.mean(score)}')

Cross validation score : [0.9666666666666667, 1.0, 0.9333333333333333, 0.9666666666666667, 1.0]
Cross validation mean : 0.9733333333333334


###  Stratified K-Fold cross validation

Using stratified k-fold cross validation

In [114]:
iris['target']

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [116]:
kfold = KFold(n_splits=5)
score = cross_val_score(log_reg, X, y, cv=kfold)
print(score)
print(np.mean(score))

[1.         1.         0.86666667 0.93333333 0.83333333]
0.9266666666666665


In [127]:
kfold = KFold(n_splits=3)
score = cross_val_score(log_reg, X, y, cv=kfold)
print(score)
print(score.mean())

[0. 0. 0.]
0.0


In [131]:
kfold =KFold(n_splits=5, shuffle=True, random_state=7)
score = cross_val_score(log_reg, X, y, cv=kfold)
print(score)
print(score.mean())

[0.86666667 1.         1.         0.96666667 1.        ]
0.9666666666666668


### Leave-out-one cross validation

In [134]:
loo = LeaveOneOut()
score = cross_val_score(log_reg, X, y, cv=loo)
print(len(score))
print(score.mean())

150
0.9666666666666667


### Shuffle-split cross-validation

In [143]:
splits = ShuffleSplit(n_splits=5, train_size=0.8, test_size=0.1, random_state=7)
score = cross_val_score(log_reg, X, y, cv=splits, n_jobs=-1)
print(score)
print(score.mean())

[0.86666667 1.         1.         1.         1.        ]
0.9733333333333334
