# CNN - introduction

In this tutorial we learn how the convolutional networks work and are used in image classification.

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.image as mpimg

## Convolution for border detection

In [None]:
# load eifel image
img = mpimg.imread('eifel.jpg')

# keep RGB channels
img = img[:,:,:3]

fig = plt.figure(figsize=(15,12))
plt.imshow(img)
plt.axis('off')
plt.show()

print('Image shape: {}'.format(img.shape))

In [None]:
img

In [None]:
img.shape

In [None]:
# convert to grayscale
# those are the exact numbers
def rgb_to_grayscale(red, green, blue):
    return (0.3*red + 0.59*green + 0.11*blue)

# use a preprogrammed fucntion from tensorflow
img_gray = tf.image.rgb_to_grayscale(img)

# remove the last dimension and convert to numpy
img_gray = np.array(tf.squeeze(img_gray))

In [None]:
fig = plt.figure(figsize=(15,12))
plt.imshow(img_gray, cmap=plt.get_cmap('gray'))
plt.show()

In [None]:
def detect_borders(img, mask, threshold=0.001):
    mask_size = len(mask)
    img_processed = np.zeros((img_gray.shape[0] - mask_size + 1, img_gray.shape[1] - mask_size + 1))
    
    # Apply convolution
    for row in range(img.shape[0] - mask_size + 1):
        for col in range(img.shape[1] - mask_size + 1):
            img_section = img_gray[row:row+mask_size, col:col+mask_size]
            img_processed[row, col] = sum(sum((img_section * mask)))
    
    
    fig = plt.figure(figsize=(15,12))
    plt.imshow(img_processed, cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.show()
    
    img_processed = 1 - (img_processed > threshold).astype(int)            
    fig = plt.figure(figsize=(15,12))
    plt.imshow(img_processed, cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.show()

In [None]:
# Identify vertical borders
mask = [
    [-1,  0,  1],
    [-2,  0,  2],
    [-1,  0,  1]
]

detect_borders(img, mask, threshold=0.25)

In [None]:
# Identify horizontal borders
mask = [
    [-1,  0,  1],
    [-2,  0,  2],
    [-1,  0,  1]
]

detect_borders(img, np.transpose(mask), threshold=0.25)

In [None]:
# Laplace edge detector, should do both horizontal and vertical

mask = [
    [0,  1,  0],
    [1,  -4, 1],
    [0,  1,  0]
]

detect_borders(img, np.transpose(mask), threshold=0.25)

In [None]:
# now with negative sign
mask = [
    [0, -1, 0],
    [-1, 5, -1],
    [0, -1, 0]
]

detect_borders(img, mask, threshold=0.3)

In [None]:
# Identify changes around a point
mask = [
    [0,   0,  -1,   0,   0],
    [0,  -1,  -2,  -1,   0],
    [-1, -2,  16,  -2,  -1],
    [0,  -1,  -2,  -1,   0],
    [0,   0,  -1,   0,   0]
]

detect_borders(img, mask, threshold=0.3)

In [None]:

mask = [
    [1,   2,  1,],
    [2,   4,  2,] ,
    [1,   2,  1,]  
]

detect_borders(img, mask, threshold=0.3)

<span style="color:red">**TO DO:** Try you own masks e.g. for sharpening an image, bluring it etc.</span>


https://en.wikipedia.org/wiki/Kernel_(image_processing)

## Images and processing

Tensoflow contains several datasets for image classification
 - https://www.tensorflow.org/datasets/catalog/overview
 
Useful guide for image processing:
 - https://www.tensorflow.org/tutorials/images/data_augmentation
 
We have two options:
 1. Use keras processing layers - make them as a part of the network
 2. Apply transformation on tf.data.Dataset using .map()

### Check some datasets

In [None]:
# can be used to supress the progress bar
# tfds.disable_progress_bar()

# Load the cat vs dogs data
cats_vs_dogs = tfds.load(
    "cats_vs_dogs",
    split="train",
    as_supervised=True, # Include labels
    shuffle_files=False # Change to true for training
)

In [None]:
cats_vs_dogs

Documentation for the Tensorflow Dataset
- https://www.tensorflow.org/api_docs/python/tf/data/Dataset

In [None]:
# Plot it
plt.figure(figsize=(10, 10))
for i, (image, label) in enumerate(cats_vs_dogs.take(9)):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image.numpy())
    plt.title(int(label))
    plt.axis('off')
    plt.plot()

In [None]:
# Load the mnist data
mnist_train, mnist_dev = tfds.load(
    "mnist",
    split=["train", "test"],
    as_supervised=True, # Include labels
    shuffle_files=False # Change to true for training
)

In [None]:
# Plot it
plt.figure(figsize=(10, 10))
for i, (image, label) in enumerate(mnist_train.take(9)):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image.numpy())
    plt.title(int(label))
    plt.axis('off')
    plt.plot()

### Image processing

#### Resize an image

In [None]:
a = cats_vs_dogs.take(1)

In [None]:
plt.figure(figsize=(10, 10))

# Plot normal picture
ax = plt.subplot(1, 2, 1)
for image, label in cats_vs_dogs.take(1):
    plt.imshow(image.numpy())
    plt.axis('off')
    plt.plot()
    
# Plot resized picture
size = (128, 128)
resized_cvd = cats_vs_dogs.map(lambda x, y: (tf.image.resize(x, size), y))
ax = plt.subplot(1, 2, 2)
for image, label in resized_cvd.take(1):
    plt.imshow(image.numpy().astype("int32"))
    plt.axis('off')
    plt.plot()

### Create a pre-processing pipeline

Documentation of the pre-processing functions
- https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing

Image data augmentation: These layers apply random augmentation transforms to a batch of images. They are only active during training.

In [None]:
# Define data augemtation
resized_cvd = cats_vs_dogs.map(lambda x, y: (tf.image.resize(x, size), y))

dog_augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
        tf.keras.layers.experimental.preprocessing.RandomZoom(0.2, 0.2),
        tf.keras.layers.experimental.preprocessing.RandomTranslation(0.1, 0.1)
    ]
)

In [None]:
for images, labels in resized_cvd.take(1):
    plt.figure(figsize=(12, 12))
    first_image = images
    for i in range(12):
        ax = plt.subplot(4, 3, i + 1)
        augmented_image = dog_augmentation(
            tf.expand_dims(first_image, 0), training=True
        )
        plt.imshow(augmented_image[0].numpy().astype("int32"))
        plt.axis("off")

In [None]:
mnist_aug = tf.keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.5),
        tf.keras.layers.experimental.preprocessing.RandomZoom(0.1, 0.1),
        tf.keras.layers.experimental.preprocessing.RandomTranslation(0.1, 0.1)
    ]
)

In [None]:
train_ds = mnist_train.cache().batch(5).prefetch(buffer_size=10)

for images, labels in train_ds.take(1):
    for first_image in images[0:5]:
        plt.figure(figsize=(4, 4))
        for i in range(12):
            ax = plt.subplot(4, 3, i + 1)
            augmented_image = mnist_aug(
                tf.expand_dims(first_image, 0), training=True
            )
            plt.imshow(augmented_image[0].numpy().astype("int32"))
            plt.axis("off")
        plt.show()

# Training CNNs

A tutorial dedicated to CNNs and images:
 - https://www.tensorflow.org/tutorials/images/cnn

## Mnist
Low resolution images

### Simple CNN
 - With sequential API
 - Two blocks of convolutions with max pooling and a fully connected layer

In [None]:
# Define the architecture:

model = tf.keras.models.Sequential()

# First convolution block
model.add(tf.keras.layers.Conv2D(filters=16,
                                 kernel_size=(3, 3),
                                 strides=1,
                                 padding='same',
                                 activation='relu',
                                 input_shape=(28, 28, 1)))

model.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'))

# Second convolution block
model.add(tf.keras.layers.Conv2D(filters=32,
                                 kernel_size=(3, 3),
                                 strides=1,
                                 padding='same',
                                 activation='relu'))

model.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'))

# Hidden layer
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(10, activation='softmax'))

model.summary()

In [None]:
7*7*32

In [None]:
# TODO: To make the following line work you need to install graphviz (if you have not done so in one of the previous classes)
# 1) follow the instructions https://graphviz.gitlab.io/download/?fbclid=IwAR1V-lrRhho5rSfBVYXYISsighqRwOCOgMHLmL_DclkQrPtMXQaKj3mFcqs
# 2) this notebook has been tested with version 8.0.3
# 3) make sure you add it to the PATH variable (you are specifically asked during the installation) at least for local user

tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)

In [None]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=tf.keras.metrics.SparseCategoricalAccuracy())

In [None]:
# Load the mnist data
mnist_train, mnist_dev = tfds.load(
    "mnist",
    split=["train", "test"],
    as_supervised=True, # Include labels
    shuffle_files=False # Change to true for training
)

In [None]:
# Create batches from the dataset
batch_size = 128

train_ds = mnist_train.cache().batch(batch_size).prefetch(buffer_size=10)
#train_ds = mnist_train.cache().batch(batch_size).prefetch(buffer_size=10).map(lambda x,y: (mnist_aug(x), y))
validation_ds = mnist_dev.cache().batch(batch_size).prefetch(buffer_size=10)

Note on what should be the numbers that we see during the training: "For training loss, keras does a running average over the batches. For validation loss, a conventional average over all the batches in validation data is performed. The training accuracy is the average of the accuracy values for each batch of training data during training."

In [None]:
epochs = 3

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs/mnist_simple')

model.fit(train_ds,
          epochs=epochs,
          validation_data=validation_ds)

In [None]:
# if we want to get prediction manually
prediction = model.predict(validation_ds)

In [None]:
# we have softmax actiovation, so we get 10 probabilities
prediction.shape

In [None]:
# we probably want to select the label with the highest predicted probability as our prediction
np.argmax(prediction, axis=1)

### Bigger CNN with regularization
 - With functional API
 - Increase number of convolution layers and channels
 - Uses label smoothing, dropout, L2, early stopping

In [None]:
# Label smoothing

def label_smooth_train(image, labels, alpha, n_labels):
    labels = tf.one_hot(tf.cast(labels, tf.int32), n_labels)
    labels = tf.cast(labels, tf.float32)
    labels *= (1 - alpha)
    labels += alpha / n_labels
    return image, labels

def label_smooth_dev(image, labels, n_labels):
    labels = tf.one_hot(tf.cast(labels, tf.int32), n_labels)
    labels = tf.cast(labels, tf.float32)
    return image, labels

In [None]:
# Create a bigger CNN
reg = tf.keras.regularizers.L1L2(l2=0.001)

inputs = tf.keras.layers.Input(shape=[28, 28, 1])

# Augment data - creates distortion in the evaluation data as well (not good for well centered mnist)
# x = mnist_aug(inputs)

# First convolution block
x = tf.keras.layers.Conv2D(filters=16,
                                 kernel_size=(3, 3),
                                 strides=1,
                                 padding='same',
                                 activation='relu',
                                 kernel_regularizer=reg)(inputs)

x = tf.keras.layers.Conv2D(filters=16,
                                 kernel_size=(3, 3),
                                 strides=1,
                                 padding='same',
                                 activation='relu',
                                 kernel_regularizer=reg)(x)

x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

# Second convolution block
x = tf.keras.layers.Conv2D(filters=32,
                                 kernel_size=(3, 3),
                                 strides=1,
                                 padding='same',
                                 kernel_regularizer=reg,
                                 activation='relu')(x)

x = tf.keras.layers.Conv2D(filters=64,
                                 kernel_size=(3, 3),
                                 strides=1,
                                 padding='same',
                                 kernel_regularizer=reg,
                                 activation='relu')(x)

x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

# Hidden layer
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(rate=0.5)(x)

outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)
model.summary()

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)

In [None]:
# Create batches with label smoothing
batch_size = 128
smooth_alpha = 0.05
n_labels = 10

train_ds = mnist_train.map(lambda x, y: label_smooth_train(x, y, smooth_alpha, n_labels))
validation_ds = mnist_dev.map(lambda x, y: label_smooth_dev(x, y, n_labels))

train_ds = train_ds.cache().batch(batch_size).prefetch(buffer_size=10)
validation_ds = validation_ds.cache().batch(batch_size).prefetch(buffer_size=10)

In [None]:
# Compile the model - label smoothing -> no sparse loss and metric
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=tf.keras.metrics.CategoricalAccuracy()
)

In [None]:
epochs = 1

early_call = tf.keras.callbacks.EarlyStopping(
    monitor='val_categorical_accuracy', patience=3, restore_best_weights=True
)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs/mnist_regularized')

model.fit(train_ds,
          epochs=epochs,
          validation_data=validation_ds,
          callbacks=[early_call])

## Cats vs dogs
Higher resolution images

### Deeper network with regularization
 - Uses data_augmentation, label smoothing, dropout, L2, early stopping, batch normalization, global pooling at the end
 - Functional API used
 
Batch norm layer has training and inference mode, and has also trainable and non-trainable params, see https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization

In [None]:
reg = tf.keras.regularizers.L1L2(l2=0.0001)

inputs = tf.keras.layers.Input(shape=[128, 128, 3])

# Apply data augemtantion
x = dog_augmentation(inputs)

# First convolution block (64 x 64)
x = tf.keras.layers.Conv2D(filters=16,
                           kernel_size=3,
                           strides=2,
                           padding='same',
                           kernel_regularizer=reg)(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

# Second convolution block (32 x 32)
x = tf.keras.layers.Conv2D(filters=32,
                           kernel_size=3,
                           strides=1,
                           padding='same',
                           kernel_regularizer=reg)(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

# Third convolution block (16 x 16)
x = tf.keras.layers.Conv2D(filters=64,
                           kernel_size=3,
                           strides=1,
                           padding='same',
                           kernel_regularizer=reg)(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

# Fourth convolution block (8 x 8)
x = tf.keras.layers.Conv2D(filters=128,
                           kernel_size=3,
                           strides=1,
                           padding='same',
                           kernel_regularizer=reg)(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.activations.relu(x)

# Apply global average pooling
x = tf.keras.layers.GlobalAveragePooling2D()(x)

# Dropout and fully connected
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid)(x)

model = tf.keras.Model(inputs, outputs)

# There are some non-trainable parameters
model.summary()

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)

In [None]:
# Create train and validation data and batches
train_size = 10000
valid_size = 2000
batch_size = 128
image_size = (128, 128)

train_ds = cats_vs_dogs.take(train_size)
rest = cats_vs_dogs.skip(train_size)
validation_ds = rest.take(valid_size)

train_ds = train_ds.map(lambda x, y: (tf.image.resize(x, image_size), y))
validation_ds = validation_ds.map(lambda x, y: (tf.image.resize(x, image_size), y))

# Could also apply label smoothing
# smooth_alpha = 0.01
# n_labels = 2
# train_ds = train_ds.map(lambda x, y: label_smooth_train(x, y, smooth_alpha, n_labels))
# validation_ds = validation_ds.map(lambda x, y: label_smooth_dev(x, y, n_labels))

train_ds = train_ds.cache().batch(batch_size).prefetch(buffer_size=10)
validation_ds = validation_ds.cache().batch(batch_size).prefetch(buffer_size=10)

In [None]:
# Compile the model - label smoothing -> no sparse loss and metric
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=tf.keras.metrics.BinaryAccuracy()
)

In [None]:
epochs = 1

early_call = tf.keras.callbacks.EarlyStopping(
    monitor='val_binary_accuracy', patience=3, restore_best_weights=True
)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs/cats_vs_dogs')

model.fit(train_ds,
          epochs=epochs,
          validation_data=validation_ds,
          callbacks=[early_call])

<span style="color:red">**TO DO:** CIFAR 10</span>

Try to build you own convolutional network on CIFAR 10 dataset using residual connection and other features from the presented CNNs

see https://www.tensorflow.org/guide/keras/functional#a_toy_resnet_model - includes example with loading the cifar10 dataset and the usage of functional api for the residual connection

Optionally, you can add the data augmentation at the beginning of the network