In [2]:
import tensorflow as tf
print ("Tensorflow version:", tf.__version__)

Tensorflow version: 2.12.0


In [17]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
#training set is 60,000 x 28 x 28, with the label a vector of size 60,000. 
#test values are 10,000 x 28 x 28
x_train, x_test = x_train / 255.0, x_test / 255.0
#RGB values normalised to 1

In [18]:
model = tf.keras.models.Sequential([ #sequential useful for stacking layers where each layer has 1 input tensor
    tf.keras.layers.Flatten(input_shape=(28, 28)), 
    tf.keras.layers.Dense(128, activation='relu'), #1: dimensionality of the output space, 2: activation function
    tf.keras.layers.Dropout(0.2), #fraction of input units to drop, to prevent overfitting
    tf.keras.layers.Dense(10) #output size: 10
])

#each layer has 1 input tensor and 1 output tensor

#tensorflow layers are callable objects that takes as input one or 
#more tensors and that outputs one or more tensors

predictions = model(x_train[:1]).numpy() #dimensionality 10 (due to 10 output neurons)
predictions


array([[ 0.2963053 ,  0.14756723, -0.6664161 ,  0.24374151, -0.7157932 ,
         0.37048018, -0.27088916,  0.04335836,  0.32366914,  0.13400002]],
      dtype=float32)

In [19]:
tf.nn.softmax(predictions).numpy() #converts those logits to probabilities for each class

array([[0.12731655, 0.10972074, 0.04861612, 0.12079716, 0.04627389,
        0.13711931, 0.07220303, 0.09886246, 0.13084853, 0.10824219]],
      dtype=float32)

In [20]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) 
#loss function takes a vector of truth values and logits and returns scalar loss for each example
#equal to neg log probability of the true class: 0 is sure of correct class

loss_fn(y_train[:1], predictions).numpy()

1.9869039

In [21]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f8dba28b160>

In [22]:
model.evaluate(x_test, y_test, verbose=2)

313/313 - 0s - loss: 0.0730 - accuracy: 0.9773 - 451ms/epoch - 1ms/step


[0.0729941800236702, 0.9772999882698059]

In [24]:
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

In [25]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[9.8200353e-08, 4.6678917e-08, 8.4296471e-06, 2.2822036e-03,
        2.4834771e-11, 1.2738034e-06, 4.6347023e-12, 9.9768829e-01,
        3.6118693e-06, 1.5959387e-05],
       [1.7208076e-07, 2.4937055e-04, 9.9971122e-01, 3.6132180e-05,
        3.8593336e-17, 3.0620374e-06, 3.1018448e-08, 1.7073157e-12,
        1.4677804e-07, 1.2432409e-14],
       [5.0664295e-07, 9.9835169e-01, 1.5325740e-04, 4.9570488e-05,
        1.3099970e-05, 3.1815975e-07, 1.2902348e-05, 1.2764594e-03,
        1.4195866e-04, 2.5244759e-07],
       [9.9980754e-01, 1.3942762e-10, 2.0430000e-06, 2.8619922e-07,
        5.5715653e-08, 2.3757700e-07, 1.7231685e-04, 1.6946042e-05,
        5.8802413e-10, 6.0339505e-07],
       [5.5125838e-06, 4.5542357e-09, 2.6862447e-06, 1.8159693e-07,
        9.9684876e-01, 1.3176949e-06, 1.5876491e-05, 2.1805114e-04,
        1.6417913e-06, 2.9059348e-03]], dtype=float32)>