<a href="https://colab.research.google.com/github/waghvaishnav/Model-Evaluation-Fine-Tuning-in-the-Machine-Learning-Toolkit/blob/main/K_Fold_Stratified_K_Fold_Cross_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# K - Fold Cross Validation :

In [34]:
from sklearn.model_selection import KFold
from sklearn.datasets import make_classification
import pandas as pd

model = LogisticRegression()

x, y = make_classification(n_samples=500, n_features=5, n_classes=2, random_state=42)

kf = KFold(n_splits=5,shuffle=True,random_state=42)
scores = []
# manually splitting the data.
for train_index,test_index in kf.split(x,y):
  x_train,x_test = x[train_index],x[test_index]
  y_train,y_test = y[train_index],y[test_index]

model.fit(x_train,y_train)
scores.append(model.score(x_test,y_test))



# Evaluate Logistic Regression :

In [41]:
from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()


score = cross_val_score(model,x,y,cv=kf,scoring="roc_auc")                      # use of cross_val_score().
# np.average(score)
score

array([0.96467282, 0.97454545, 0.93309295, 0.91187271, 0.9644    ])

In [31]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier()

dt_scores = cross_val_score(dt_model,x,y,cv=kf)

np.average(dt_scores)

np.float64(0.916)

# Evaluate Random Forest :

In [32]:
# implementation of the cross_val_score over RandomForestClassifier.

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=51)

rf_scores = cross_val_score(rf_model,x,y,cv=kf)

np.average(rf_scores)

np.float64(0.9460000000000001)

In [33]:
# implementation of the cross_validate(). over RandomForestClassifier.
from sklearn.model_selection import cross_validate

validate_score = cross_validate(rf_model,x,y,cv=kf,scoring=["accuracy","roc_auc"])

print(np.average(validate_score['test_accuracy']))

validate_score

0.9440000000000002


{'fit_time': array([0.07197285, 0.06373549, 0.06878972, 0.06456113, 0.07328367]),
 'score_time': array([0.00925779, 0.00916553, 0.0088203 , 0.00848174, 0.00888467]),
 'test_accuracy': array([0.96, 0.96, 0.93, 0.91, 0.96]),
 'test_roc_auc': array([0.99357688, 0.9779798 , 0.98958333, 0.97144023, 0.9848    ])}

# Stratified K - Fold Cross Validation :

In [44]:
# Evaluate Stratified K - Fold Cross Validation using RandomForestClassifier :

from sklearn.model_selection import StratifiedKFold,cross_val_score
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

RF_model = RandomForestClassifier(n_estimators=100)


x, y = make_classification(n_samples=500, n_features=5, n_classes=2, random_state=42,weights=[0.9,0.1])

skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)

for train_index,test_index in skf.split(x,y):
  x_train,x_test = x[train_index],x[test_index]
  y_train,y_test = y[train_index],y[test_index]

val_sco = cross_val_score(RF_model,x,y,cv=skf,scoring="accuracy")

print(val_sco.mean())

0.966


In [12]:
import numpy as np
np.average(val_sco)

np.float64(0.968)

In [42]:
from sklearn.model_selection import cross_validate


validate = cross_validate(RF_model,x,y,cv=skf,scoring=["roc_auc","accuracy"])
validate

{'fit_time': array([0.16069579, 0.14725852, 0.12398934, 0.12060976, 0.12345648]),
 'score_time': array([0.02661538, 0.01559925, 0.01348567, 0.01288271, 0.01407719]),
 'test_roc_auc': array([0.99459784, 0.988     , 0.9984    , 0.9678    , 0.9872    ]),
 'test_accuracy': array([0.97, 0.95, 0.97, 0.9 , 0.95])}