In [1]:
# Import necessary libraries:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [2]:
# Load the MNIST dataset and normalize the pixel values:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# CNN -> BatchNorm -> ReLU (common structure)
# x10 (a lot of code to write!)

In [4]:
# Define a CNNBlock subclass of Layer to create a block of Convolutional, BatchNormalization and ReLU layers:
class CNNBlock(layers.Layer):
    def __init__(self, out_channels, kernel_size=3):
        super(CNNBlock, self).__init__()
        self.conv = layers.Conv2D(out_channels, kernel_size, padding="same")
        self.bn = layers.BatchNormalization()

    def call(self, input_tensor, training=False):
        x = self.conv(input_tensor)
        x = self.bn(x, training=training)
        x = tf.nn.relu(x)
        return x

In [5]:
# Define the model as a subclass of Sequential: 
class MyModel(keras.Sequential):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv_block1 = CNNBlock(32)
        self.conv_block2 = CNNBlock(64)
        self.conv_block3 = CNNBlock(128)
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(10)

    def call(self, input_tensor, training=False):
        x = self.conv_block1(input_tensor, training=training)
        x = layers.MaxPooling2D(pool_size=(2, 2))(x)
        x = self.conv_block2(x, training=training)
        x = layers.MaxPooling2D(pool_size=(2, 2))(x)
        x = self.conv_block3(x, training=training)
        x = self.flatten(x)
        x = self.dense(x)
        return x

In [6]:
# Create an instance of the model:
model = MyModel()

In [7]:
# Compile the model with Adam optimizer, SparseCategoricalCrossentropy loss and accuracy metric:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)


In [8]:
# Train the model on the training dataset and evaluate on the test dataset:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Epoch 1/10
938/938 - 11s - loss: 2.6433 - accuracy: 0.1365 - 11s/epoch - 12ms/step
Epoch 2/10
938/938 - 2s - loss: 2.6434 - accuracy: 0.1364 - 2s/epoch - 2ms/step
Epoch 3/10
938/938 - 2s - loss: 2.6435 - accuracy: 0.1361 - 2s/epoch - 2ms/step
Epoch 4/10
938/938 - 3s - loss: 2.6432 - accuracy: 0.1372 - 3s/epoch - 3ms/step
Epoch 5/10
938/938 - 2s - loss: 2.6433 - accuracy: 0.1368 - 2s/epoch - 2ms/step
Epoch 6/10
938/938 - 2s - loss: 2.6433 - accuracy: 0.1361 - 2s/epoch - 2ms/step
Epoch 7/10
938/938 - 2s - loss: 2.6435 - accuracy: 0.1367 - 2s/epoch - 2ms/step
Epoch 8/10
938/938 - 2s - loss: 2.6434 - accuracy: 0.1365 - 2s/epoch - 2ms/step
Epoch 9/10
938/938 - 3s - loss: 2.6433 - accuracy: 0.1368 - 3s/epoch - 3ms/step
Epoch 10/10
938/938 - 2s - loss: 2.6432 - accuracy: 0.1365 - 2s/epoch - 2ms/step
157/157 - 1s - loss: 2.6325 - accuracy: 0.1316 - 640ms/epoch - 4ms/step


[2.6324687004089355, 0.1316000074148178]

In [9]:
# Define the Residual Block for the ResNet-like model
class ResBlock(layers.Layer):
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.channels = channels
        self.cnn1 = CNNBlock(channels[0], 3)
        self.cnn2 = CNNBlock(channels[1], 3)
        self.cnn3 = CNNBlock(channels[2], 3)
        self.pooling = layers.MaxPooling2D()
        self.identity_mapping = layers.Conv2D(channels[1], 3, padding="same")

    def call(self, input_tensor, training=False):
        x = self.cnn1(input_tensor, training=training)
        x = self.cnn2(x, training=training)
        x = self.cnn3(x + self.identity_mapping(input_tensor), training=training,)
        x = self.pooling(x)
        return x

In [10]:
# Define the ResNet-like model
class ResNet_Like(keras.Model):
    def __init__(self, num_classes=10):
        super(ResNet_Like, self).__init__()
        self.block1 = ResBlock([32, 32, 64])
        self.block2 = ResBlock([128, 128, 256])
        self.block3 = ResBlock([128, 256, 512])
        self.pool = layers.GlobalAveragePooling2D()
        self.classifier = layers.Dense(num_classes)

    def call(self, input_tensor, training=False):
        x = self.block1(input_tensor, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.pool(x, training=training)
        x = self.classifier(x)
        return x
    
    def model(self):
        x = keras.Input(shape=(28, 28, 1))
        return keras.Model(inputs=[x], outputs=self.call(x))

In [12]:
# Create an instance of the ResNet-like model and replace the output layer with a new one for classification
model_1 = ResNet_Like().model()
base_input = model_1.layers[0].input
base_output = model_1.layers[2].output
output = layers.Dense(10)(layers.Flatten()(base_output))
model = keras.Model(base_input, output)

In [13]:
# Compile the model with Adam optimizer, SparseCategoricalCrossentropy loss and accuracy metric
model_1.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

In [14]:
print(model_1.summary())

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 res_block_3 (ResBlock)      (None, 14, 14, 64)        28896     
                                                                 
 res_block_4 (ResBlock)      (None, 7, 7, 256)         592512    
                                                                 
 res_block_5 (ResBlock)      (None, 3, 3, 512)         2364032   
                                                                 
 global_average_pooling2d_1   (None, 512)              0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_2 (Dense)             (None, 10)                5130      
                                                           

In [15]:
# Train the model on the training set
model_1.fit(x_train, y_train, batch_size=64, epochs=1, verbose=2)

938/938 - 36s - loss: 0.2188 - accuracy: 0.9292 - 36s/epoch - 38ms/step


<keras.callbacks.History at 0x7f86a5da4d00>

In [16]:
# Evaluate the trained model on the test set
model_1.evaluate(x_test, y_test, batch_size=64, verbose=2)

157/157 - 2s - loss: 0.0363 - accuracy: 0.9893 - 2s/epoch - 12ms/step


[0.03632822260260582, 0.989300012588501]