<a href="https://colab.research.google.com/github/wiso/TutorialML-AtlasItalia2022/blob/main/notebooks/1.1-ImageClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Image classiciation with CNN
Redo the same exercize using CNN

In [None]:
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt

In [None]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
nclasses = len(class_names)
# summarize loaded dataset
print('Train: X=%s, y=%s' % (train_images.shape, train_labels.shape))
print('Test: X=%s, y=%s' % (test_images.shape, test_labels.shape))
print("unique train labels=%s" % np.unique(train_labels))
print("range values first train img = %s, %s" % (train_images[0].min(), train_images[0].max()))

# preprocessing
test_images = test_images / 255.
train_images = train_images / 255.

## Define several CNN models
They are just a sequence of convolutional layers and max pooling layers. At the end the output of the filters is flatten and feeded to a simple dense neural network to do the multi class classification.

In [None]:
# 693,962 parameters, 30s training on GPU, 91.0% accuracy
model_simpler = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', kernel_initializer='he_normal', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),
  
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(nclasses, activation='softmax'),
])



# kerasnet https://arxiv.org/pdf/1801.09403.pdf  594,922 parameters, 92.5% accuracy
model_kerasnet = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', kernel_initializer='he_normal', input_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(nclasses, activation='softmax'),
])


# ConvNet, accuracy 92.1%
model_convnet = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', strides=1, padding='same', 
                 data_format='channels_last', input_shape=(28,28,1)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', strides=1, padding='same', 
                    data_format='channels_last'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', strides=1, padding='same', 
                    data_format='channels_last'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', strides=1, padding='same', 
                    data_format='channels_last'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(nclasses, activation='softmax')
])


# LetNet5 61,706 parameters, accuracy 85%
model_lenet5 = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.Resizing(32, 32, interpolation="bilinear", input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(6, 5, activation='tanh', input_shape=(28,28,1)),
    tf.keras.layers.AveragePooling2D(2),
    tf.keras.layers.Activation('sigmoid'),
    tf.keras.layers.Conv2D(16, 5, activation='tanh'),
    tf.keras.layers.AveragePooling2D(2),
    tf.keras.layers.Activation('sigmoid'),
    tf.keras.layers.Conv2D(120, 5, activation='tanh'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(84, activation='tanh'),
    tf.keras.layers.Dense(nclasses, activation='softmax')
])


# AlexNet 21,598,922 parameters, 15min training, accuracy: 91%
model_alexnet = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.Resizing(224, 224, interpolation="bilinear", input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(96, 11, strides=4, padding='same'),
    tf.keras.layers.Lambda(tf.nn.local_response_normalization),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPooling2D(3, strides=2),
    tf.keras.layers.Conv2D(256, 5, strides=4, padding='same'),
    tf.keras.layers.Lambda(tf.nn.local_response_normalization),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPooling2D(3, strides=2),
    tf.keras.layers.Conv2D(384, 3, strides=4, padding='same'),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Conv2D(384, 3, strides=4, padding='same'),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Conv2D(256, 3, strides=4, padding='same'),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4096, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4096, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(nclasses, activation='softmax'),
])


# as homework you can have a look to:
# https://ai.plainenglish.io/vggnet-with-tensorflow-transfer-learning-with-vgg16-included-7e5f6fa9479a
# https://ai.plainenglish.io/googlenet-inceptionv1-with-tensorflow-9e7f3a161e87
# https://medium.com/swlh/resnet-with-tensorflow-transfer-learning-13ff0773cf0c
# https://medium.com/swlh/essentials-of-convolutional-neural-networks-with-lenet-alexnet-vgg-googlenet-and-resnet-3f9dd477f666



# choose here the model
model = model_simpler


model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'],
)

model.summary()

In [None]:
history = model.fit(train_images, train_labels, batch_size=512,
                    epochs=50, validation_split=0.33,
                    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)])

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(16, 5))
for ax, quantity in zip(axs, ('accuracy', 'loss')):
    ax.plot(history.history[quantity], label='train')
    ax.plot(history.history[f'val_{quantity}'], label='validation')
    ax.legend()
    ax.set_xlabel('epoch', fontsize=15)
    ax.set_ylabel(quantity, fontsize=15)


In [None]:
model.evaluate(test_images, test_labels)

In [None]:
filters, bias = model.layers[0].get_weights()
nfilters = filters.shape[-1]
fig, axs = plt.subplots(4, nfilters // 4, figsize=(6, 3))
for ifilter, ax in zip(range(nfilters), axs.flat):
    ax.imshow(filters[:, :, 0, ifilter], cmap='bwr', vmin=-1, vmax=1)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect('equal')
fig.subplots_adjust(wspace=0.1, hspace=0.1)

In [None]:
example_image = test_images[10]

plt.imshow(example_image, cmap='binary')

model_feature = tf.keras.models.Model(inputs=model.inputs , outputs=model.layers[0].output)
features = model_feature.predict(np.expand_dims(example_image, 0))
print(features.shape)
nfilters = features.shape[-1]
fig, axs = plt.subplots(4, nfilters // 4, figsize=(6, 3))
for ifilter, ax in zip(range(nfilters), axs.flat):
    ax.imshow(features[0, :, :, ifilter], cmap='gray', vmin=0, vmax=0.5)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect('equal')
fig.subplots_adjust(wspace=0.1, hspace=0.1)

In [None]:
output_predictions = model.predict(test_images)
predicted_labels = np.argmax(output_predictions, axis=1)

In [None]:
mask_wrong = test_labels != predicted_labels
wrong_images = test_images[mask_wrong]
wrong_labels = test_labels[mask_wrong]
wrong_predicted_labels = predicted_labels[mask_wrong]
for idx in range(10):
    fig, ax = plt.subplots(1, 2, figsize=(5, 2))
    probs = tf.keras.layers.Softmax()(output_predictions[mask_wrong][idx]).numpy()
    ax[0].bar(np.arange(len(probs)), probs)
    ax[0].set_xticks(np.arange(len(probs)))
    ax[0].set_xticklabels(class_names, rotation=90)
    ax[0].get_xticklabels()[wrong_labels[idx]].set_color("red")
    ax[0].set_ylim(0, 1.1)
    ax[0].set_yticklabels([])

    ax[1].imshow(wrong_images[idx], cmap='binary')
    ax[1].set_xticks([])
    ax[1].set_yticks([])
    fig.subplots_adjust(wspace=0)