In [2]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=200),
    'Decision Tree': DecisionTreeClassifier(max_depth=5, random_state=42)
}

# K-Fold setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform K-Fold CV and calculate bias-variance
for model_name, model in models.items():
    train_errors = []
    test_errors = []

    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model.fit(X_train, y_train)

        # Training and validation error
        train_errors.append(1 - model.score(X_train, y_train))
        test_errors.append(1 - model.score(X_test, y_test))

    mean_train_error = np.mean(train_errors)
    mean_test_error = np.mean(test_errors)
    std_test_error = np.std(test_errors)
    mean_accuracy = 1 - mean_test_error

    print(f"{model_name}:")
    print(f"  Mean Accuracy: {mean_accuracy:.4f}")
    print(f"  Bias (Training Error): {mean_train_error:.4f}")
    print(f"  Variance (Test Error Mean): {mean_test_error:.4f}")
    print(f"  Variance of Test Errors: {std_test_error:.4f}\n")

Logistic Regression:
  Mean Accuracy: 0.9733
  Bias (Training Error): 0.0250
  Variance (Test Error Mean): 0.0267
  Variance of Test Errors: 0.0249

Decision Tree:
  Mean Accuracy: 0.9533
  Bias (Training Error): 0.0033
  Variance (Test Error Mean): 0.0467
  Variance of Test Errors: 0.0267

