# Overview

## Data specificatioins

DSen2-CR is trained using all 13 Sentinel-2 bands in their native resolution. The TML data is produced using 10 of 13 Sentinel-2 bands. Aerosol, SWIR Cirrus, and water vapor bands are not used. 

The original training data for DSen2-CR should be used to retrain the model, but numpy operations should be placed within the generator functions to droop the 3 bands that aren't used, and to resize the Sentinel-1 data (from native resolution, which may be 10 or 20 meters (?)) to 40-meters, using a simple mean.

## Model specificatinos

I have taken the liberty to reduce the model size to 64 features and 16 depth to keep the number of parameters to approximately 1 million. We can modify this in the future if need be, but this should be a good starting trade-off between compute complexity and performance. The model is a fairly standard residual network, and you can read more about this type of model here: https://towardsdatascience.com/understanding-and-visualizing-resnets-442284831be8


## Table of data specifications
| Band | Resolution | Used? | Resolution used |
| --- | --- | --- | --- |
| B1 - Aerosol | 60 | |
| B2 - Blue | 10 | X | 10 |
| B3 - Green | 10 | X | 10 |
| B4 - Red | 10 | X | 10 |
| B5 - REedge 1| 20 | X | 20 |
| B6 - REdge 2 | 20 | X | 20 |
| B7 - REdge 3 | 20 | X | 20 | 
| B8 - NIR | 10 | X | 10 | 
| B8A - Narrow NIR | 20 | X | 20 | 
| B9 - Water Vapor | 60 |  |  | 
| B10 - SWIR Cirrus | 20 |  |  | 
| B11 - SWIR | 20 | X | 20 | 
| B12 - SWIR | 20 | X | 20 | 
| Sentinel 1 VV | 20 | X | 40 | 
| Sentinel 1 VH | 20 | X | 40 | 

# Boilerplate starting code
This notebook runs with the following environment:
- numpy 1.18
- tensorflow 1.15.5
- keras 2.2.4
- rasterio 1.2.6

In [1]:
import keras.backend as K
import tensorflow as tf
from keras.layers import Conv2D, Concatenate, Activation, Lambda, Add
from keras.models import Model, Input
from keras.optimizers import Nadam
import numpy as np

K.set_image_data_format('channels_first')
# Verified to compile on Tensorflow 1.15.4

Using TensorFlow backend.


In [2]:

def resBlock(input_l, feature_size, kernel_size, scale=0.1):
    """Definition of Residual Block to be repeated in body of network."""
    tmp = Conv2D(feature_size, kernel_size, kernel_initializer='he_uniform', padding='same')(input_l)
    tmp = Activation('relu')(tmp)
    tmp = Conv2D(feature_size, kernel_size, kernel_initializer='he_uniform', padding='same')(tmp)

    tmp = Lambda(lambda x: x * scale)(tmp)

    return Add()([input_l, tmp])



def DSen2CR_model(input_shape,
                  batch_per_gpu=2,
                  num_layers=16, 
                  feature_size=64,
                  use_cloud_mask=True,
                  include_sar_input=True):
    """Definition of network structure.
    Important that model fits in 4 gb of ram. Ideally < 1M parameters.
    I have set 16 layers x 64 features as a possiblle first pass
    
    """

    global shape_n

    # define dimensions
    input_opt = Input(shape=input_shape[0])
    input_sar = Input(shape=input_shape[1])

    if include_sar_input:
        x = Concatenate(axis=1)([input_opt, input_sar])
    else:
        x = input_opt

    # Treat the concatenation
    x = Conv2D(feature_size, (3, 3), kernel_initializer='he_uniform', padding='same')(x)
    x = Activation('relu')(x)

    # main body of network as succession of resblocks
    for i in range(num_layers):
        x = resBlock(x, feature_size, kernel_size=[3, 3])

    # One more convolution
    x = Conv2D(input_shape[0][0], (3, 3), kernel_initializer='he_uniform', padding='same')(x)

    # Add first layer (long skip connection)
    x = Add()([x, input_opt])

    if use_cloud_mask:
        # the hacky trick with global variables and with lambda functions is needed to avoid errors when
        # pickle saving the model. Tensors are not pickable.
        # This way, the Lambda function has no special arguments and is "encapsulated"

        shape_n = tf.shape(input_opt)

        def concatenate_array(x):
            global shape_n
            return K.concatenate([x, K.zeros(shape=(batch_per_gpu, 1, shape_n[2], shape_n[3]))], axis=1)

        x = Concatenate(axis=1)([x, input_opt])

        x = Lambda(concatenate_array)(x)

    model = Model(inputs=[input_opt, input_sar], outputs=x)

    return model, shape_n

In [3]:
# input_shape[0] is the Sentinel-2, note that it is (10, ...) in shape as we use 10 S2 bands
# input_shape[1] is the Sentinel-1
crop_size = 128
input_shape = ((10, crop_size, crop_size), (2, crop_size, crop_size))

model, shape_n = DSen2CR_model(input_shape,
                               batch_per_gpu=2,
                               num_layers=16,
                               feature_size=64,
                               use_cloud_mask=True,
                               include_sar_input=True)












In [None]:
config = tf.ConfigProto()
# Don't pre-allocate memory; allocate as-needed
config.gpu_options.allow_growth = True

# Only allow a total % of the GPU memory to be allocated
# config.gpu_options.per_process_gpu_memory_fraction = 0.3

# Create a session with the above options specified.
K.tensorflow_backend.set_session(tf.Session(config=config))

# Set random seeds for repeatability
random_seed_general = 42
random.seed(random_seed_general)  # random package
np.random.seed(random_seed_general)  # numpy package
tf.set_random_seed(random_seed_general)  # tensorflow

In [4]:
def carl_error(y_true, y_pred):
    """Computes the Cloud-Adaptive Regularized Loss (CARL)"""
    cloud_cloudshadow_mask = y_true[:, -1:, :, :]
    clearmask = K.ones_like(y_true[:, -1:, :, :]) - y_true[:, -1:, :, :]
    predicted = y_pred[:, 0:13, :, :]
    input_cloudy = y_pred[:, -14:-1, :, :]
    target = y_true[:, 0:13, :, :]

    cscmae = K.mean(clearmask * K.abs(predicted - input_cloudy) + cloud_cloudshadow_mask * K.abs(
        predicted - target)) + 1.0 * K.mean(K.abs(predicted - target))

    return cscmae

In [5]:
lr = 7e-5
optimizer = Nadam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-8, schedule_decay=0.004)
loss = carl_error
metrics = [carl_error]

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
print('Model compiled successfully!')


Model compiled successfully!


In [6]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 11, 128, 128) 0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 2, 128, 128)  0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 13, 128, 128) 0           input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 64, 128, 128) 7552        concatenate_1[0][0]              
__________

# Model parameters
The ones that are commented out still need to be filled in!

In [None]:
# Training parameters
shuffle_train = True
data_augmentation = True
random_crop = True
cloud_threshold = 0.2  # set threshold for binarisation of cloud mask
batch_size = 8
scale = 2000
max_val_sar = 2
initial_epoch = 0  # start at epoch number
epochs_nr = 8  # train for this amount of epochs. Checkpoints will be generated at the end of each epoch
batch_size = 16  # training batch size to distribute over GPUs


clip_min = [[-25.0, -32.5], [0] * 10, [0] * 10]
clip_max = [[0, 0], [10000] * 10, [10000] * 10]

# To figure out
# model_name = 
# base_out_path = 
# resume_file =
# train_filelist = 
# val_filelist = 
# log_step_freq = 
# input_data_folder = 
# max_queue_size = 
# use_multi_processing = 
# workers = 

# Data generator! Needs to be udpdated

In [None]:
# Here needs the data generator
# From: https://github.com/ameraner/dsen2-cr/blob/main/Code/tools/dataIO.py
# This needs to be modified to drop the bands that are not used
# And to downsample (avg) the Sentinel-1 data to 40m

In [None]:
def train_dsen2cr(model, model_name, base_out_path, resume_file,
                  train_filelist, val_filelist, lr, log_step_freq,
                  shuffle_train, data_augmentation, random_crop, 
                  batch_size, scale, clip_max, clip_min, max_val_sar,
                  use_cloud_mask, cloud_threshold, crop_size,
                  epochs_nr, initial_epoch, input_data_folder, input_shape,
                  max_queue_size, use_multi_processing, workers):
    """Start or resume training of DSen2-CR model."""

    print('Training model name: {}'.format(model_name))

    out_path_train = make_dir(os.path.join(base_out_path, model_name, '/'))

    # generate model information and metadata
    plot_model(model, to_file=os.path.join(out_path_train, model_name + 'model.png'), show_shapes=True,
               show_layer_names=True)
    model_yaml = model.to_yaml()
    with open(out_path_train + model_name + "model.yaml", 'w') as yaml_file:
        yaml_file.write(model_yaml)
    print("Model information files created at ", out_path_train)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Initialize callbacks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # instantiate model checkpoint callback
    model_filepath = os.path.join(out_path_train, model_name + '_{epoch:02d}-{val_loss:.4f}' + '.hdf5')
    checkpoint = ModelCheckpoint(model_filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=False,
                                 save_weights_only=False,
                                 mode='auto')

    # instantiate csv logging callback
    csv_filepath = os.path.join(out_path_train, model_name + '_csvlog.csv')
    csv_logger = CSVLogger(csv_filepath, append=True, separator=";")
    csv_batch_logger = CSV_NBatchLogger(1, out_path_train, model_name, initial_epoch, separator=';')

    # instantiate NBatch logger
    batch_logger = NBatchLogger(log_step_freq, out_path_train, model_name, initial_epoch, lr)

    # instantiate Tensorboard logger
    # extract sample from validation dataset
    val_filelist_tensorboard = val_filelist
    shuffle(val_filelist_tensorboard)
    val_filelist_tensorboard = val_filelist_tensorboard[0:batch_size]

    params = {'input_dim': input_shape,
              'batch_size': batch_size,
              'shuffle': shuffle_train,
              'scale': scale,
              'include_target': True,
              'data_augmentation': False,
              'random_crop': False,
              'crop_size': crop_size,
              'clip_min': clip_min,
              'clip_max': clip_max,
              'input_data_folder': input_data_folder,
              'use_cloud_mask': use_cloud_mask,
              'max_val_sar': max_val_sar,
              'cloud_threshold': cloud_threshold}
    val_tensorboard_generator = DataGenerator(val_filelist_tensorboard, **params)

    tensorboard = TensorBoardWrapper(val_tensorboard_generator, input_dim=input_shape, nb_steps=1,
                                     batch_size=batch_size, log_dir=out_path_train, histogram_freq=1,
                                     write_graph=False,
                                     batch_nr=batch_size, write_grads=True, update_freq=500,
                                     learning_phase=False)

    # define callbacks list
    callbacks_list = [checkpoint, csv_logger, batch_logger, csv_batch_logger, tensorboard]

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Initialize training %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    params = {'input_dim': input_shape,
              'batch_size': batch_size,
              'shuffle': shuffle_train,
              'scale': scale,
              'include_target': True,
              'data_augmentation': data_augmentation,
              'random_crop': random_crop,
              'crop_size': crop_size,
              'clip_min': clip_min,
              'clip_max': clip_max,
              'input_data_folder': input_data_folder,
              'use_cloud_mask': use_cloud_mask,
              'max_val_sar': max_val_sar,
              'cloud_threshold': cloud_threshold}
    training_generator = DataGenerator(train_filelist, **params)

    params = {'input_dim': input_shape,
              'batch_size': batch_size,
              'shuffle': shuffle_train,
              'scale': scale,
              'include_target': True,
              'data_augmentation': False,  # keep false
              'random_crop': False,
              'crop_size': crop_size,
              'clip_min': clip_min,
              'clip_max': clip_max,
              'input_data_folder': input_data_folder,
              'use_cloud_mask': use_cloud_mask,
              'max_val_sar': max_val_sar,
              'cloud_threshold': cloud_threshold
              }

    validation_generator = DataGenerator(val_filelist, **params)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Run training %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    print('Training starts...')

    if resume_file is not None:
        print("Will resume from the weights in file {}".format(resume_file))
        model.load_model(resume_file)

    model.fit_generator(generator=training_generator,
                        validation_data=validation_generator,
                        epochs=epochs_nr,
                        verbose=1,
                        callbacks=callbacks_list,
                        shuffle=False,
                        initial_epoch=initial_epoch,
                        use_multiprocessing=use_multi_processing,
                        max_queue_size=max_queue_size,
                        workers=workers)