In [100]:
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np


def input_prep_fn(x):
    x = x.reshape((x.shape[0], 28, 28, 1)) / 255.
    return np.where(x > .5, 1.0, 0.0).astype('float32')


# use on MNIST data just for demo
(X0, L0), (X1, L1) = tf.keras.datasets.mnist.load_data()

num_classes = 10
L0 = tf.one_hot(L0, num_classes)
L1 = tf.one_hot(L1, num_classes)

X0 = input_prep_fn(X0)
X1 = input_prep_fn(X1)

# code from: https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-k-fold-cross-validation-with-keras.md
MODEL_PERFORMANCE_METRICS = [
    # make sure your classes are one-hot encoded
    tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
    tf.keras.metrics.AUC(name='prc', curve='PR'), # precision recall curve
]

Ignore the below, it is just a basic MLP to demonstrate training

In [101]:
def create_mlp():
    return tf.keras.Sequential(
        [
            tf.keras.layers.Input(shape=(28, 28, 1)),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(units=256, activation='relu'),
            tf.keras.layers.Dropout(rate=0.2),
            tf.keras.layers.Dense(units=10, activation='softmax'),
        ]
    )

def train_mlp(mlp):
    batch_size = 100
    num_epochs = 10
    
    mlp.compile(
        optimizer='adam',
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=MODEL_PERFORMANCE_METRICS,
    )

    history = mlp.fit(
        x=X0,
        y=L0,
        batch_size=batch_size,
        epochs=num_epochs,
        validation_split=0.2,
        verbose=1,
    )
    
    return mlp, history

In [103]:
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np

NUM_FOLDS = 5

# create resources required
x = np.concatenate((X0, X1), axis=0)
y = np.concatenate((L0, L1), axis=0)
kfold = KFold(n_splits=NUM_FOLDS, shuffle=True)

results_table = []

def score_to_json(fold_num, scores):
    # 0th index is the loss
    return {x.name: scores[i+1] for i, x in enumerate(MODEL_PERFORMANCE_METRICS)}

for fold_num, data in enumerate(kfold.split(x, y)):
    # train your model
    train, test = data
    model, history = train_mlp(create_mlp())

    # evaluate your model
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    
    # record it for results
    results_table.append(score_to_json(fold_num + 1, scores))

# view your results over n folds
results_table = pd.json_normalize(results_table)
results_table

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,accuracy,precision,recall,auc,prc
0,0.9895,0.990056,0.9885,0.99916,0.997627
1,0.991214,0.992128,0.990214,0.999299,0.998029
2,0.990786,0.991986,0.990286,0.999485,0.998425
3,0.990429,0.991344,0.989857,0.999243,0.997764
4,0.989643,0.990413,0.988786,0.999416,0.998479


In [104]:
results_table.describe().loc[['mean','std']]

Unnamed: 0,accuracy,precision,recall,auc,prc
mean,0.990314,0.991185,0.989529,0.99932,0.998065
std,0.000735,0.000926,0.000831,0.000131,0.000382
