In [1]:
import tensorflow as tf
from sklearn.model_selection import KFold

In [2]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()


In [3]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [4]:
kf = KFold(n_splits = 5)

In [5]:
for train_index, val_index in kf.split(x_train):
    x_trainNew, x_validate = x_train[train_index], x_train[val_index]
    y_trainNew, y_validate = y_train[train_index], y_train[val_index]
    

In [6]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])


In [7]:
kf.split(x_train)

<generator object _BaseKFold.split at 0x0000017E8000A848>

In [8]:
predictions = model(x_trainNew[:5]).numpy()
predictions

array([[-0.571072  ,  0.83509773, -0.15391701, -0.39496958,  0.7063258 ,
         0.37201017,  1.4620622 , -0.9143964 , -0.83755136, -0.38084546],
       [-0.9527608 ,  0.14784567, -0.2679388 , -0.07845181, -0.33480912,
         0.19641508,  1.2308495 , -0.6657421 , -0.68703675, -0.80821353],
       [-0.03526473,  0.28834036, -0.32047972,  0.0362225 ,  0.72351694,
         0.14720201,  0.5273224 , -0.41031712, -0.589844  , -0.6482325 ],
       [-0.27235404,  0.3137295 , -0.5234158 , -0.37652516,  0.49805793,
         0.08474437,  1.098599  , -0.26313987, -0.5855714 , -0.59066826],
       [-0.598346  ,  0.18435042, -0.39772657, -0.5722125 ,  0.30693164,
        -0.06262851,  0.6068341 , -0.0366582 , -0.8595244 , -0.22492827]],
      dtype=float32)

In [9]:
tf.nn.softmax(predictions).numpy()

array([[0.0412054 , 0.16813025, 0.06253488, 0.04913992, 0.14781584,
        0.10581068, 0.31472635, 0.02923144, 0.0315663 , 0.0498389 ],
       [0.03836123, 0.11531344, 0.07608639, 0.09196025, 0.07116486,
        0.12105238, 0.34058264, 0.05111439, 0.05003744, 0.04432704],
       [0.09007046, 0.12448651, 0.06771971, 0.09674507, 0.19236101,
        0.10810024, 0.15809235, 0.06190121, 0.05172871, 0.04879484],
       [0.06909589, 0.12416098, 0.05375483, 0.06226031, 0.1492925 ,
        0.09875023, 0.27217585, 0.0697355 , 0.05051539, 0.05025858],
       [0.05906284, 0.12919186, 0.07218422, 0.06062671, 0.14603989,
        0.10091913, 0.19711399, 0.10357437, 0.04548687, 0.08580007]],
      dtype=float32)

In [10]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [11]:
loss_fn(y_trainNew[:5], predictions).numpy()

2.3394208

In [12]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [13]:
model.fit(x_trainNew, y_trainNew, epochs=5, validation_data = (x_validate, y_validate))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x17e803a9308>

In [14]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.0769 - accuracy: 0.9762


[0.07688598334789276, 0.9761999845504761]

In [15]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [16]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[2.25397571e-07, 1.19142718e-09, 3.42984022e-05, 6.62059683e-05,
        1.02392036e-11, 3.96344063e-07, 4.39302491e-14, 9.99892116e-01,
        1.52432449e-08, 6.82486780e-06],
       [2.89757018e-09, 4.54074005e-04, 9.99508023e-01, 2.56610656e-05,
        1.69512373e-15, 1.15225464e-06, 1.62345692e-08, 8.54804712e-14,
        1.11315903e-05, 1.14688415e-11],
       [2.71177578e-06, 9.98369992e-01, 1.16402334e-04, 5.32121476e-06,
        3.82864324e-04, 1.88470040e-05, 5.75354206e-05, 6.99708064e-04,
        3.36046156e-04, 1.06439984e-05],
       [9.99908924e-01, 1.42646250e-09, 5.63766553e-05, 1.76848459e-06,
        1.80404854e-07, 4.18963145e-06, 5.04480977e-06, 3.09938559e-06,
        1.05928546e-08, 2.04251064e-05],
       [3.41913346e-06, 2.97156042e-08, 2.36852247e-06, 2.15189448e-08,
        9.96450067e-01, 2.28523035e-07, 2.42573024e-05, 1.40479897e-04,
        1.56516342e-06, 3.37755168e-03]], dtype=float32)>

In [17]:
#Without KFold: accuracy = .9764, init loss function = 2.689857
#With KFold: accuracy = .9762, init loss function = 2.3394208