In [1]:
import tensorflow as tf
from sklearn.model_selection import KFold

In [2]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()


In [3]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [4]:
kf = KFold(n_splits = 5)

In [5]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])


In [6]:
kf.split(x_train)

<generator object _BaseKFold.split at 0x000001EDB91A52C8>

In [7]:
predictions = model(x_train[:5]).numpy()
predictions

array([[-0.6103816 , -0.41997966,  0.14946318, -0.04782834,  0.17167467,
         0.20476808,  0.4076767 , -0.23447783, -0.60223657,  0.35189342],
       [-0.643192  , -0.4727316 ,  0.20201564, -0.08087192,  0.06850795,
        -0.3013882 ,  0.4520133 , -0.21353357, -1.2889848 ,  0.00162144],
       [-0.47593737, -0.39901006, -0.10537577, -0.1590545 , -0.25225252,
        -0.351565  , -0.0794953 ,  0.22365116, -0.7914154 ,  0.08833379],
       [ 0.03803064, -0.01910157,  0.06106504,  0.2536093 ,  0.06013159,
         0.04439706, -0.06494199,  0.378047  , -0.45915103,  0.2240289 ],
       [-0.4285374 , -0.12855946,  0.5438916 , -0.27724633,  0.16613375,
        -0.45871526,  0.05142509,  0.29334736, -0.8370523 ,  0.04908826]],
      dtype=float32)

In [8]:
tf.nn.softmax(predictions).numpy()

array([[0.054353  , 0.06575276, 0.11620368, 0.09539756, 0.11881362,
        0.12281135, 0.15043905, 0.07915465, 0.05479752, 0.14227685],
       [0.05998658, 0.07113513, 0.13967644, 0.1052608 , 0.12221978,
        0.08443017, 0.17934768, 0.09218334, 0.03144778, 0.11431233],
       [0.07534929, 0.0813745 , 0.10914706, 0.10344266, 0.09423762,
        0.08532836, 0.11200871, 0.15167242, 0.0549628 , 0.13247655],
       [0.09655428, 0.09119255, 0.09880417, 0.11978327, 0.09871198,
        0.09717095, 0.08710661, 0.13565592, 0.05872842, 0.11629193],
       [0.06715271, 0.09064467, 0.17757593, 0.07812112, 0.12170991,
        0.06515645, 0.1085197 , 0.13822103, 0.04463215, 0.10826641]],
      dtype=float32)

In [9]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [10]:
loss_fn(y_train[:5], predictions).numpy()

2.3781235

In [11]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [12]:
for train_index, val_index in kf.split(x_train):
    x_trainNew, x_validate = x_train[train_index], x_train[val_index]
    y_trainNew, y_validate = y_train[train_index], y_train[val_index]
    
    model.fit(x_trainNew, y_trainNew, epochs=5, validation_data = (x_validate, y_validate), batch_size = 50)
    

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.0725 - accuracy: 0.9805


[0.07254493981599808, 0.9804999828338623]

In [14]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [15]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[3.25713796e-11, 5.03521006e-14, 1.83191559e-10, 3.90369337e-07,
        1.13428109e-15, 1.36919230e-13, 1.06727958e-22, 9.99999642e-01,
        1.73815996e-12, 2.80400592e-08],
       [6.71014472e-17, 1.70523364e-07, 9.99999881e-01, 4.72340889e-10,
        6.29816849e-31, 5.28639155e-12, 1.51620433e-15, 2.19816399e-26,
        2.78530454e-14, 5.33089007e-24],
       [1.89692720e-10, 9.99962568e-01, 5.01450756e-07, 6.18578655e-09,
        1.39758455e-07, 6.23047436e-09, 2.79122929e-08, 4.88229625e-06,
        3.19210303e-05, 2.51979743e-10],
       [9.99997616e-01, 8.66567490e-19, 2.37905942e-06, 6.90308435e-11,
        4.34628916e-10, 1.83133029e-08, 1.31349935e-08, 2.39479669e-11,
        1.40284055e-14, 3.93844291e-09],
       [2.28458530e-10, 9.11818146e-19, 1.07226977e-10, 1.41241600e-14,
        9.99678254e-01, 7.02919981e-13, 1.69534642e-09, 4.78800030e-06,
        8.18539903e-09, 3.16962891e-04]], dtype=float32)>

In [16]:
#Without KFold: accuracy = .9764, init loss function = 2.689857
#With KFold: accuracy = .9762, init loss function = 2.3394208