In [1]:
import tensorflow as tf
from sklearn.model_selection import KFold

In [2]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()


In [3]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [4]:
kf = KFold(n_splits = 5)

In [5]:
for train_index, val_index in kf.split(x_train):
    x_trainNew, x_validate = x_train[train_index], x_train[val_index]
    y_trainNew, y_validate = y_train[train_index], y_train[val_index]
    

In [6]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])


In [7]:
kf.split(x_train)

<generator object _BaseKFold.split at 0x000002142862A848>

In [8]:
predictions = model(x_trainNew[:5]).numpy()
predictions

array([[-0.85231185, -0.5061152 ,  0.21140946,  0.1127475 , -0.25667885,
        -0.0586899 , -0.04023675, -0.24536544,  0.06112645,  0.4756807 ],
       [-1.0350385 , -0.40580457, -0.93333673,  0.06856631, -0.6305111 ,
         0.2048776 ,  0.00366834,  0.03273303,  0.4099669 , -0.22577204],
       [-0.23865262, -0.42915538, -0.31034172,  0.29246318, -0.12685513,
         0.49829042,  0.26355657,  0.19392394,  0.03699117,  0.3807885 ],
       [-0.60410786,  0.00400908, -0.31472147,  0.48002473, -0.11518032,
        -0.03623477, -0.26611227, -0.14843903, -0.4917173 , -0.31001315],
       [-0.601252  , -0.74800175, -0.12577091,  0.6477207 , -0.19930871,
        -0.130093  ,  0.2809847 ,  0.449478  ,  0.14831081, -0.36909539]],
      dtype=float32)

In [9]:
tf.nn.softmax(predictions).numpy()

array([[0.04481278, 0.06335095, 0.12982856, 0.11763101, 0.0812984 ,
        0.0990986 , 0.10094427, 0.08222339, 0.11171284, 0.16909927],
       [0.04136549, 0.0776088 , 0.04579379, 0.12471756, 0.06199007,
        0.14293115, 0.1168807 , 0.12032764, 0.17546722, 0.09291762],
       [0.07125107, 0.0588921 , 0.06632194, 0.12118588, 0.0796791 ,
        0.14888182, 0.11773296, 0.10981382, 0.09386462, 0.13237661],
       [0.06263799, 0.11506405, 0.08365979, 0.18521264, 0.10213541,
        0.11052537, 0.08782688, 0.09879439, 0.07008877, 0.08405461],
       [0.05345003, 0.04615464, 0.08598977, 0.18636739, 0.07989319,
        0.08561892, 0.12915123, 0.15285309, 0.11310427, 0.06741744]],
      dtype=float32)

In [10]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [11]:
loss_fn(y_trainNew[:5], predictions).numpy()

2.577163

In [12]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [13]:
model.fit(x_train, y_train, epochs=10, validation_data = (x_validate, y_validate), batch_size = 50)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x21429bbb208>

In [14]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.0668 - accuracy: 0.9801


[0.06675589084625244, 0.9800999760627747]

In [15]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [16]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[6.97517211e-10, 6.09249262e-10, 1.00058621e-07, 1.47124074e-05,
        9.19766542e-13, 5.60788660e-09, 1.41871391e-15, 9.99984980e-01,
        3.58333736e-08, 2.13455323e-07],
       [8.29135445e-08, 1.04149640e-05, 9.99986887e-01, 2.46234822e-06,
        9.11909401e-24, 6.95834359e-08, 1.65091374e-09, 2.47209068e-18,
        1.96306038e-09, 2.54175635e-16],
       [1.49227134e-07, 9.99889374e-01, 7.91518869e-06, 4.63742367e-07,
        3.54947178e-06, 1.91749990e-07, 1.24896314e-06, 8.42537338e-05,
        1.26392715e-05, 1.80556228e-07],
       [9.99994516e-01, 1.25199165e-11, 3.35348818e-06, 1.74167722e-10,
        1.82579132e-10, 4.33108438e-09, 4.49571075e-07, 1.44902606e-06,
        2.82718982e-09, 2.78990939e-07],
       [3.34322863e-06, 8.63313518e-11, 7.52770063e-07, 1.31489157e-08,
        9.74148631e-01, 2.60043464e-07, 1.00414617e-07, 4.89943603e-04,
        2.73274054e-07, 2.53568552e-02]], dtype=float32)>

In [17]:
#Without KFold: accuracy = .9764, init loss function = 2.689857
#With KFold: accuracy = .9762, init loss function = 2.3394208