# Semantic Segmentation

In [None]:
%load_ext tensorboard

import os
import time
import datetime
import numpy as np
import tensorflow as tf

import models as models
import ops.semantic_segmentation.datasets as datasets
import ops.semantic_segmentation.imageops as imageops
import ops.semantic_segmentation.trains as trains
import ops.semantic_segmentation.tests as tests

Define parameters:

In [None]:
# Paths
cwd = os.getcwd()
model_path = 'models_checkpoints'
dat_path = 'leaderboard/semantic-segmentation'

# CamVid
dataset_name = 'camvid'
img_size = 720 // 2, 960 // 2
crop_size = 720 // 2, 960 // 2
dataset_root = '%s/datasets/camvid' % cwd
seq_root = '%s/datasets/camvid/seq' % cwd

# CityScape
dataset_name = 'cityscape'
img_size = 1024 // 2, 2048 // 2
crop_size = 1024 // 2, 2048 // 2
dataset_root = '%s/datasets/cityscape' % cwd
seq_root = '%s/datasets/cityscape' % cwd


Load label informations, namely color infos and number of classes:

In [None]:
colors = datasets.colors(dataset_name)
num_classes = len(set(colors.values()))
print('%d classes are loaded. ' % num_classes)

Load training and test datasets:

In [None]:
offset = (5, 0)
dataset_train, dataset_val, dataset_test = datasets.dataset(
    dataset_name, dataset_root, img_size, crop_size, cache=True)
dataset_seq = datasets.dataset_seq(
    dataset_name, dataset_root, seq_root, img_size, offset=offset)
dataset_test = dataset_seq.map(lambda image, label: (image[offset[0]], label))

The `offset` indicates how many past and future frames (5 and 0 respectively in this example) to use when evaluating the model by using the video stream.

Since senamtic segmentation datasets such as CamVid and CityScape are imbalanced, we use Median Frequency Balancing to correct them.

In [None]:
# class_weights = datasets.median_freq_weights(dataset_train, num_classes)  # manually calculates weights
class_weights = datasets.memorized_median_freq_weights(dataset_name)  # load memorized weights (cost efficient)
print('Class weights: \n', class_weights.numpy())

We use U-Net and SegNet with MC dropout layers.

In [None]:
# Deterministic NN
# model = models.UNet(num_classes, name='u-net-dnn')
# model = models.SegNet(num_classes, name='segnet-dnn')

# Bayesian NN (MC dropout)
model = models.UNet(num_classes, rate=0.5, name='u-net-bnn')
# model = models.SegNet(num_classes, rate=0.5, name='segnet-bnn')

# model.load_weights('%s/%s_%s' % (model_path, dataset_name, model.name + '_1')) 

Load TensorBoard variables:

In [None]:
current_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
log_dir = 'logs/gradient_tape/%s_%s/%s' % (dataset_name, model.name, current_time)
train_log_dir = '%s/train' % log_dir
test_log_dir = '%s/test' % log_dir
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

print('Create TensorBoard Log dir: ', log_dir)

## Train 

In [None]:
epochs = 100
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999)

epochtime_metric = tf.keras.metrics.Mean(name='epoch_time')
loss_metric = tf.keras.metrics.Mean(name='train_loss')
nll_metric = tf.keras.metrics.Mean(name='train_nll')

for epoch in range(epochs):
    batch_time = time.time()
    loss, nll = trains.train_epoch(optimizer, model, dataset_train, num_classes, class_weights, batch_size=3)
    epochtime_metric(time.time() - batch_time)
    loss_metric(loss)
    nll_metric(nll)
    
    if (epoch + 1) % 1 == 0:
        template = '(%.2f sec) Epoch: %d, Loss: %.4f, NLL: %.4f'
        print(template % (epochtime_metric.result(),
                          epoch,
                          loss_metric.result(),
                          nll_metric.result()))
        
        with train_summary_writer.as_default():
            tf.summary.scalar('loss', loss_metric.result(), step=epoch)
            tf.summary.scalar('nll', nll_metric.result(), step=epoch)
        
        epochtime_metric.reset_states()
        loss_metric.reset_states()
        nll_metric.reset_states()

    if (epoch + 1) % 5 == 0:
        metrics = tests.test_sampling(model, 5, 
                                      dataset_val, num_classes, 
                                      batch_size=1, cutoffs=(0.0, 0.9), verbose=False)
        
        with test_summary_writer.as_default():
            tf.summary.scalar('nll', metrics[0], step=epoch)
            tf.summary.scalar('iou', metrics[2][0], step=epoch)
            tf.summary.scalar('iou-90', metrics[2][1], step=epoch)
            tf.summary.scalar('acc', metrics[3][0], step=epoch)
            tf.summary.scalar('acc-90', metrics[3][1], step=epoch)
            tf.summary.scalar('unc-90', metrics[4][1], step=epoch)
            tf.summary.scalar('freq-90', metrics[5][1], step=epoch)
            tf.summary.scalar('ece', metrics[9], step=epoch)
            tf.summary.image('calibration diagrams', metrics[10], step=epoch)


In [None]:
model.save_weights('%s/%s_%s' % (model_path, dataset_name, model.name))

## Test

The following operation evaluates the model by using _deterministic neural network (DNN)_ prediction, i.e., $p(\textbf{y} \vert \textbf{x}_0, \textbf{w}_0)$ for an observed input $\textbf{x}_0$ and one execution $\textbf{w}_0$:

In [None]:
_ = tests.test_vanilla(model,
                       dataset_test, num_classes, batch_size=3, 
                       cutoffs=(0.0, 0.7, 0.9), verbose=True)

The following operation evaluates the model by using predictive distributon of _Bayesian neural network (BNN)_ with MC estimator, i.e., average of results with several forward passes $p(\textbf{y} \vert \textbf{x}_0, \mathcal{D}) \simeq \sum_{\textbf{w}_i} \frac{1}{N} p(\textbf{y} \vert \textbf{x}_0, \textbf{w}_i)$ where $p(\textbf{y} \vert \textbf{x}_0, \textbf{w}_i)$ is the neural network prediction for an observed input $\textbf{x}_0$ and different executions $\textbf{w}_i$:

In [None]:
_ = tests.test_sampling(model, 30, 
                        dataset_test, num_classes, batch_size=3, 
                        cutoffs=(0.0, 0.7, 0.9), verbose=True)

In this example, we use 30 samples. Since semantic segmentation is a classification task, the prediction $p(\textbf{y} \vert \textbf{x}_0, \textbf{w}_i)$ is a softmax of NN logit.

The following operation evaluates the model by using predictive distribution of _vector quantized Bayesian neural network (VQ-BNN)_ for data stream, i.e., _temporal smoothing_ or _exponential moving average (EMA)_ of recent predictions $p(\textbf{y} \vert \textbf{x}_0, \mathcal{D}) \simeq \sum_{t=0}^{-K} \pi(\textbf{x}_t \vert \mathcal{S}) \, p(\textbf{y} \vert \textbf{x}_t, \textbf{w}_t)$ where $\{ \textbf{x}_0, \textbf{x}_{-1}, \cdots \}$ are recent video frames, $p(\textbf{y} \vert \textbf{x}_t, \textbf{w}_t)$ are recent NN predictions, and $\pi(\textbf{x}_t \vert \mathcal{S}) = \frac{\exp(- \vert t \vert / \tau )}{\sum_{0}^{K} \exp(- \vert t \vert / \tau)}$ are exponentially decaying importances of the predictions:

In [None]:
_ = tests.test_temporal_smoothing(model, 0.8, offset,
                                  dataset_seq, num_classes, batch_size=3, 
                                  cutoffs=(0.0, 0.7, 0.9), verbose=True)

We use $\tau=0.8$ and $K=5$ (`offset[0]`) in this example. We can further improve the predictive performance of VQ-BNN by setting `offset[1]` to 1 or more when loading the data sequence. This means that we use the future prediction as well as the past prediction for VQ-BNN inference.