In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris

In [2]:
def k_fold_cv(model, X, y, k):

    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []

    indices = np.arange(len(y))
    np.random.shuffle(indices)
    X = X[indices]
    y = y[indices]

    fold_length = len(y) // k
    for i in range(k):
        start = i * fold_length
        end = start + fold_length if i != k - 1 else len(y)
        test_data = np.arange(start, end)
        train_data = np.concatenate((np.arange(0, start), np.arange(end, len(y))))

        X_train, X_test = X[train_data], X[test_data]
        y_train, y_test = y[train_data], y[test_data]

        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        accuracies.append(accuracy_score(y_test, y_pred))
        precisions.append(precision_score(y_test, y_pred, average='weighted'))
        recalls.append(recall_score(y_test, y_pred, average='weighted'))
        f1_scores.append(f1_score(y_test, y_pred, average='weighted'))

    metrics = {
        'Accuracy': (np.mean(accuracies), np.std(accuracies)),
        'Precision': (np.mean(precisions), np.std(precisions)),
        'Recall': (np.mean(recalls), np.std(recalls)),
        'F1 Score': (np.mean(f1_scores), np.std(f1_scores))
    }

    return metrics

In [3]:
iris = load_iris()
X = iris.data
y = iris.target
X.shape

(150, 4)

In [4]:
model = DecisionTreeClassifier(random_state=42)

k = 10

metrics = k_fold_cv(model, X, y, k)

In [5]:
model.fit(X, y)
y_pred = model.predict(X)
print(f"Before Validation :{accuracy_score(y, y_pred)}")

Before Validation :1.0


In [7]:
print("After Cross Validation ")
for metric, (mean, std) in metrics.items():
    print(f"{metric}: {mean:.2f} ± {std:.2f}")

After Cross Validation 
Accuracy: 0.95 ± 0.05
Precision: 0.95 ± 0.05
Recall: 0.95 ± 0.05
F1 Score: 0.95 ± 0.05
