# Tree segmentation with multitemporal Sentinel 1/2 imagery

## John Brandt
## December 2023

## This notebook finetunes the TTC decoder for a new task

## Package Loading

In [1]:
from tqdm import tqdm_notebook, tnrange
import tensorflow as tf

sess = tf.Session()
from keras import backend as K
K.set_session(sess)

from time import sleep

import keras
from tensorflow.python.keras.layers import *
from tensorflow.python.keras.layers import ELU
from keras.losses import binary_crossentropy
from tensorflow.python.ops import array_ops
from tensorflow.python.keras.layers import Conv2D, Lambda, Dense, Multiply, Add
from tensorflow.initializers import glorot_normal, lecun_normal
from scipy.ndimage import median_filter
from skimage.transform import resize

import pandas as pd
import numpy as np
from random import shuffle
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
import random
import itertools
from tensorflow.contrib.framework import arg_scope
from keras.regularizers import l1
from tensorflow.layers import batch_normalization
from tensorflow.python.util import deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False

os.environ['KMP_DUPLICATE_LIB_OK']='True'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])





  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
2023-12-19 09:30:13.067345: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-19 09:30:13.067818: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.
Using TensorFlow backend.





## Utility scripts

In [2]:
%run ../src/layers/zoneout.py
%run ../src/layers/losses.py
%run ../src/layers/adabound.py
%run ../src/layers/convgru.py
%run ../src/layers/dropblock.py
%run ../src/layers/extra_layers.py
%run ../src/layers/stochastic_weight_averaging.py
%run ../src/preprocessing/indices.py
%run ../src/preprocessing/slope.py
#%run ../src/utils/metrics.py
#%run ../src/utils/lovasz.py




# Hyperparameter definitions

In [3]:
ZONE_OUT_PROB = 0.90
ACTIVATION_FUNCTION = 'swish'

INITIAL_LR = 1e-3
DROPBLOCK_MAXSIZE = 5

N_CONV_BLOCKS = 1
FINAL_ALPHA = 0.33
LABEL_SMOOTHING = 0.03

L2_REG = 0.
BATCH_SIZE = 32
MAX_DROPBLOCK = 0.6

FRESH_START = True
best_val = 0.2

START_EPOCH = 1
END_EPOCH = 100

n_bands = 17
initial_flt = 32
mid_flt = 32 * 2
high_flt = 32 * 2 * 2

temporal_model = True
input_size = 28
output_size = 14

# Custom layer definitions

### Conv GRU Block

In [4]:
def gru_block(inp, length, size, flt, scope, train, normalize = True):
    '''Bidirectional convolutional GRU block with 
       zoneout and CSSE blocks in each time step

         Parameters:
          inp (tf.Variable): (B, T, H, W, C) layer
          length (tf.Variable): (B, T) layer denoting number of
                                steps per sample
          size (int): kernel size of convolution
          flt (int): number of convolution filters
          scope (str): tensorflow variable scope
          train (tf.Bool): flag to differentiate between train/test ops
          normalize (bool): whether to compute layer normalization

         Returns:
          gru (tf.Variable): (B, H, W, flt*2) bi-gru output
          steps (tf.Variable): (B, T, H, W, flt*2) output of each step
    '''
    with tf.variable_scope(scope):
        print(f"GRU input shape {inp.shape}, zoneout: {ZONE_OUT_PROB}")
        
        # normalize is internal group normalization within the reset gate
        # sse is internal SSE block within the state cell

        cell_fw = ConvGRUCell(shape = size, filters = flt,
                           kernel = [3, 3], padding = 'VALID', 
                           normalize = normalize, sse = True)
        cell_bw = ConvGRUCell(shape = size, filters = flt,
                           kernel = [3, 3], padding = 'VALID',
                           normalize = normalize, sse = True)
        
        cell_fw = ZoneoutWrapper(
           cell_fw, zoneout_drop_prob = 0.75, is_training = train)
        cell_bw = ZoneoutWrapper(
            cell_bw, zoneout_drop_prob = 0.75, is_training = train)
        steps, out = convGRU(inp, cell_fw, cell_bw, length)
        gru = tf.concat(out, axis = -1)
        steps = tf.concat(steps, axis = -1)
        print(f"GRU block output shape {gru.shape}")
    return gru, steps

# Model definition

## Placeholders

In [5]:
reg = tf.contrib.layers.l2_regularizer(0.)
temporal_model = True
input_size = 124
n_bands = 17
output_size = input_size - 14

if temporal_model:
    inp = tf.placeholder(tf.float32, shape=(None, 5, input_size, input_size, n_bands))
    length = tf.placeholder_with_default(np.full((1,), 4), shape = (None,))
else:
    inp = tf.placeholder(tf.float32, shape=(None, input_size, input_size, n_bands))
    
labels = tf.placeholder(tf.float32, shape=(None, output_size, output_size))#, 1))
mask = tf.placeholder(tf.float32, shape = (None, output_size, output_size))
keep_rate = tf.placeholder_with_default(1.0, ()) # For DropBlock
is_training = tf.placeholder_with_default(False, (), 'is_training') # For DropBlock
alpha = tf.placeholder(tf.float32, shape = ()) # For loss scheduling
ft_lr = tf.placeholder_with_default(0.001, shape = ()) # For loss scheduling
loss_weight = tf.placeholder_with_default(1.0, shape = ())
beta_ = tf.placeholder_with_default(0.0, shape = ()) # For loss scheduling, not currently implemented

INFO:tensorflow:Scale of 0 disables regularizer.




## Layers

In [6]:
initial_flt = 64
mid_flt = initial_flt * 2
high_flt = 64 * 2 * 2
INPUT_SIZE =124
SIZE_X = 124

gru_input = inp[:, :-1, ...]
gru, steps = gru_block(inp = gru_input, length = length,
                            size = [INPUT_SIZE, SIZE_X, ],
                            flt = initial_flt // 2,
                            scope = 'down_16',
                            train = is_training)
with tf.variable_scope("gru_drop"):
    drop_block = DropBlock2D(keep_prob=keep_rate, block_size=4)
    gru = drop_block(gru, is_training)
    
# Median conv
median_input = inp[:, -1, ...]
median_conv = conv_swish_gn(inp = median_input, is_training = is_training, stride = (1, 1),
            kernel_size = 3, scope = 'conv_median', filters = initial_flt, 
            keep_rate = keep_rate, activation = True, use_bias = False, norm = True,
            csse = True, dropblock = True, weight_decay = None)
print(f"Median conv: {median_conv.shape}")

concat1 = tf.concat([gru, median_conv], axis = -1)
concat = conv_swish_gn(inp = concat1, is_training = is_training, stride = (1, 1),
            kernel_size = 3, scope = 'conv_concat', filters = initial_flt,
            keep_rate = keep_rate, activation = True, use_bias = False, norm = True,
            csse = True, dropblock = True, weight_decay = None, padding = "SAME")
print(f"Concat: {concat.shape}")

# MaxPool-conv-swish-GroupNorm-csse
pool1 = MaxPool2D()(concat)
conv1 = conv_swish_gn(inp = pool1, is_training = is_training, stride = (1, 1),
            kernel_size = 3, scope = 'conv1', filters = mid_flt,
            keep_rate = keep_rate, activation = True, use_bias = False, norm = True, padding = "VALID",
            csse = True, dropblock = True, weight_decay = None)
print(f"Conv1: {conv1.shape}")

# MaxPool-conv-swish-csse-DropBlock
pool2 = MaxPool2D()(conv1)
conv2 = conv_swish_gn(inp = pool2, is_training = is_training, stride = (1, 1),
            kernel_size = 3, scope = 'conv2', filters = high_flt, 
            keep_rate = keep_rate, activation = True, use_bias = False, norm = True,
            csse = True, dropblock = True, weight_decay = None, block_size = 4, padding = "VALID")
print("Encoded", conv2.shape)

# Decoder 4 - 8, upsample-conv-swish-csse-concat-conv-swish
up2 = tf.keras.layers.UpSampling2D((2, 2), interpolation = 'nearest')(conv2)
#up2 = ReflectionPadding2D((1, 1,))(up2)
up2 = conv_swish_gn(inp = up2, is_training = is_training, stride = (1, 1),
                    kernel_size = 3, scope = 'up2', filters = mid_flt, 
                    keep_rate = keep_rate, activation = True, use_bias = False, norm = True,
                    csse = True, dropblock = True, weight_decay = None)
conv1_crop = Cropping2D(2)(conv1)
print(conv1_crop.shape)
up2 = tf.concat([up2, conv1_crop], -1)
#up2 = ReflectionPadding2D((1, 1,))(up2)
up2 = conv_swish_gn(inp = up2, is_training = is_training, stride = (1, 1),
                    kernel_size = 3, scope = 'up2_out', filters = mid_flt, 
                    keep_rate =  keep_rate, activation = True, use_bias = False, norm = True,
                    csse = True, dropblock = True, weight_decay = None)

# Decoder 8 - 14 upsample-conv-swish-csse-concat-conv-swish
up3 = tf.keras.layers.UpSampling2D((2, 2), interpolation = 'nearest')(up2)
#up3 = ReflectionPadding2D((1, 1,))(up3)
up3 = conv_swish_gn(inp = up3, is_training = is_training, stride = (1, 1),
                    kernel_size = 3, scope = 'up3', filters = initial_flt, 
                    keep_rate = keep_rate, activation = True, use_bias = False, norm = True,
                    csse = True, dropblock = True, weight_decay = None)
gru_crop = Cropping2D(6)(concat)

up3 = tf.concat([up3, gru_crop], -1)
up3out = conv_swish_gn(inp = up3, is_training = is_training, stride = (1, 1),
                    kernel_size = 3, scope = 'out', filters = initial_flt, 
                    keep_rate  = keep_rate, activation = True, use_bias = False, norm = True,
                    csse = True, dropblock = False, weight_decay = None, padding = "VALID")

init = tf.constant_initializer([-np.log(0.7/0.3)]) # For focal loss
print(f"The output is {up3out.shape}, with a receptive field of {1}")

fm = tf.layers.Conv2D(filters = 1,
            kernel_size = (1, 1),
            padding = 'valid',
            activation = 'sigmoid',
            bias_initializer = init, name = 'conv2d')(up3out)#,

print(f"The output, sigmoid is {fm.shape}, with a receptive field of {1}")


GRU input shape (?, 4, 124, 124, 17), zoneout: 0.9

(3, 3, 49, 64)
(3, 3, 49, 64)
GRU block output shape (?, 124, 124, 64)

conv_median 3 Conv 2D Group Norm RELU CSSE NoBias DropBlock
The non normalized feats are Tensor("conv_median_conv/conv_median/x/mul:0", shape=(?, 124, 124, 64), dtype=float32)
The non normalized feats are Tensor("swish_f32:0", shape=(?, 124, 124, 64), dtype=float32)

Median conv: (?, 124, 124, 64)
conv_concat 3 Conv 2D Group Norm RELU CSSE NoBias DropBlock
The non normalized feats are Tensor("conv_concat_conv/conv_concat/x/mul:0", shape=(?, 124, 124, 64), dtype=float32)
The non normalized feats are Tensor("swish_f32_1:0", shape=(?, 124, 124, 64), dtype=float32)
Concat: (?, 124, 124, 64)
conv1 3 Conv 2D Group Norm RELU CSSE NoBias DropBlock
The non normalized feats are Tensor("conv1_conv/conv1/ws_conv2d_2/Conv2D:0", shape=(?, 60, 60, 128), dtype=float32)
The non normalized feats are Tensor("swish_f32_2:0", shape=(?, 60, 60, 128), dtype=float32)
Conv1: (?, 60, 60, 

In [7]:
finetune_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,"conv2d_5") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,"conv2d") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "csse_out") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "out") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "up3") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "up3_drop") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "csse_up3") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "up2_out") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "up2_out_drop") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "csse_up2_out") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "up2") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "up2_drop") + \
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "csse_up2")# + \

In [8]:
def grad_norm(gradients):
    norm = tf.compat.v1.norm(
        tf.stack([
            tf.compat.v1.norm(grad) for grad in gradients if grad is not None
        ])
    )
    return norm

FRESH_START = True
#print(f"Starting model with: \n {ZONE_OUT_PROB} zone out \n {L2_REG} l2 \n"
 #     f"{INITIAL_LR} initial LR \n {total_parameters} parameters")  

OUT = 110
if FRESH_START:
    # We use the Adabound optimizer
    optimizer = AdaBoundOptimizer(5e-5, 5e-3)
    #train_loss1 = logcosh(tf.reshape(labels, (-1, 14, 14, 1)), output) 
    
    train_loss2 = bce_surface_loss(tf.reshape(labels, (-1, OUT, OUT, 1)), fm,
                                  weight = loss_weight, 
                             alpha = alpha, beta = beta_, mask = mask)

    train_loss = train_loss2# + train_loss2
    
    # If there is any L2 regularization, add it. Current model does not use
    l2_loss = tf.losses.get_regularization_loss()
    if len(tf.losses.get_regularization_losses()) > 0:
        train_loss = train_loss + l2_loss
        
    test_loss = bce_surface_loss(tf.reshape(labels, (-1, OUT, OUT, 1)),
                            fm, weight = loss_weight, 
                            alpha = alpha, beta = beta_, mask = mask)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(train_loss)#, var_list = finetune_vars)   
        #ft_op = ft_optimizer.minimize(train_loss)
    
    # The following code blocks are for sharpness aware minimization
    # Adapted from https://github.com/sayakpaul/Sharpness-Aware-Minimization-TensorFlow
    # For tensorflow 1.15
    trainable_params = tf.trainable_variables()
    gradients = optimizer.compute_gradients(loss=train_loss, var_list=None)
    gradient_norm = grad_norm(gradients)
    scale = 0.05 / (gradient_norm + 1e-12)
    e_ws = []
    for (grad, param) in gradients:
        e_w = grad * scale
        param.assign_add(e_w)
        e_ws.append(e_w)

    sam_gradients = optimizer.compute_gradients(loss=train_loss, var_list=None)
    for (param, e_w) in zip(trainable_params, e_ws):
        param.assign_sub(e_w)
    train_step = optimizer.apply_gradients(sam_gradients)
    
    # Create a saver to save the model each epoch
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    saver = tf.train.Saver(max_to_keep = 150)#, var_list = all_vars)

(?, 110, 110, 1)
(?, 1)
<unknown>
(?, 110, 110)


(?, 110, 110, 1)
(?, 1)
<unknown>
(?, 110, 110)


2023-12-19 09:30:18.124326: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.





In [17]:
def make_saver_varlist(path):

    current_items = []
    vars_dict = {}
    for var_current in tf.global_variables():
        current_items.append(var_current) 
    names = [x.op.name for x in current_items]
    names = np.argsort(names)
    current_items = [current_items[x] for x in names]
    
    ckpt_items = []
    for var_ckpt in tf.train.list_variables(path):
        if 'BackupVariables' not in var_ckpt[0]:
            if 'StochasticWeightAveraging' not in var_ckpt[0]:
                if 'global_step' not in var_ckpt[0]:
                    if 'is_training' not in var_ckpt[0]:
                        if 'n_models' not in var_ckpt[0]:
                            ckpt_items.append(var_ckpt[0])
    
    ckptdict = {}
    for y, x in zip(ckpt_items, current_items):
        ckptdict[y] = x
    return ckptdict

In [18]:
ckptdict = make_saver_varlist('../models/loss-avg-tf2/')

In [19]:
saver = tf.train.Saver(ckptdict)
model_path  = "../models/loss-avg/"
FRESH_START = False
if not os.path.exists(model_path):
    os.makedirs(model_path)
if os.path.isfile(f"{model_path}metrics.npy"):
    metrics = np.load(f"{model_path}metrics.npy")
    print(f"Loading {model_path}metrics.npy")
else:
    print("Starting anew")
    metrics = np.zeros((6, 300))

if not FRESH_START:
    path = model_path
    saver.restore(sess, tf.train.latest_checkpoint(path))

Starting anew
INFO:tensorflow:Restoring parameters from ../models/loss-avg/-0


In [13]:
saver.save(sess, '../models/loss-avg-tf2/model')

'../models/loss-avg-tf2/model'

In [14]:
FRESH_START = False
model_path  = "../models/loss-avg/"
all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
all_vars = [x for x in all_vars if 'Momentum' not in x.name]
all_vars = [x for x in all_vars if 'Backup' not in x.name]
all_vars = [x for x in all_vars if 'StochasticWeight' not in x.name]
all_vars = [x for x in all_vars if 'is_training' not in x.name]
all_vars = [x for x in all_vars if 'n_models' not in x.name]

    
saver = tf.train.Saver(max_to_keep = 150, var_list = all_vars)
if not os.path.exists(model_path):
    os.makedirs(model_path)
if os.path.isfile(f"{model_path}metrics.npy"):
    metrics = np.load(f"{model_path}metrics.npy")
    print(f"Loading {model_path}metrics.npy")
else:
    print("Starting anew")
    metrics = np.zeros((6, 300))

if not FRESH_START:
    path = model_path
    saver.restore(sess, tf.train.latest_checkpoint(path))

output_node_names = ['conv2d_5/Sigmoid']
frozen_graph_def = tf.graph_util.convert_variables_to_constants(
    sess,
    sess.graph_def,
    output_node_names)

#Save the frozen graph
#with open('../models/epoch31/predict_graph.pb', 'wb') as f:
#    f.write(frozen_graph_def.SerializeToString())

Starting anew
INFO:tensorflow:Restoring parameters from ../models/epoch31/model
INFO:tensorflow:Froze 65 variables.
INFO:tensorflow:Converted 65 variables to const ops.


In [10]:
def initialize_uninitialized(sess):
    global_vars = tf.global_variables()
    is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars])
    not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f]

    if len(not_initialized_vars):
        sess.run(tf.variables_initializer(not_initialized_vars))

"""
ft_optimizer = tf.train.MomentumOptimizer(ft_lr, momentum = 0.8, use_nesterov = True)
train_loss = bce_surface_loss(tf.reshape(labels, (-1, 14, 14, 1)), 
                             fm, weight = loss_weight, 
                             alpha = alpha, beta = beta_)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    
with tf.control_dependencies(update_ops):
    #train_op = optimizer.minimize(train_loss)   
    ft_op = ft_optimizer.minimize(train_loss, var_list = finetune_vars)

trainable_params = tf.trainable_variables()
gradients = ft_optimizer.compute_gradients(loss=train_loss, var_list=None)
gradient_norm = grad_norm(gradients)
scale = 0.05 / (gradient_norm + 1e-12)
e_ws = []
for (grad, param) in gradients:
    e_w = grad * scale
    param.assign_add(e_w)
    e_ws.append(e_w)

sam_gradients = ft_optimizer.compute_gradients(loss=train_loss, var_list=None)
for (param, e_w) in zip(trainable_params, e_ws):
    param.assign_sub(e_w)
train_step_ft = ft_optimizer.apply_gradients(sam_gradients)

initialize_uninitialized(sess)
"""

'\nft_optimizer = tf.train.MomentumOptimizer(ft_lr, momentum = 0.8, use_nesterov = True)\ntrain_loss = bce_surface_loss(tf.reshape(labels, (-1, 14, 14, 1)), \n                             fm, weight = loss_weight, \n                             alpha = alpha, beta = beta_)\nupdate_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n    \nwith tf.control_dependencies(update_ops):\n    #train_op = optimizer.minimize(train_loss)   \n    ft_op = ft_optimizer.minimize(train_loss, var_list = finetune_vars)\n\ntrainable_params = tf.trainable_variables()\ngradients = ft_optimizer.compute_gradients(loss=train_loss, var_list=None)\ngradient_norm = grad_norm(gradients)\nscale = 0.05 / (gradient_norm + 1e-12)\ne_ws = []\nfor (grad, param) in gradients:\n    e_w = grad * scale\n    param.assign_add(e_w)\n    e_ws.append(e_w)\n\nsam_gradients = ft_optimizer.compute_gradients(loss=train_loss, var_list=None)\nfor (param, e_w) in zip(trainable_params, e_ws):\n    param.assign_sub(e_w)\ntrain_step_ft = ft

In [11]:
# Implementation of stochastic weight averaging

   
model_vars = tf.trainable_variables()
swa = StochasticWeightAveraging()
swa_op = swa.apply(var_list=model_vars)
with tf.variable_scope('BackupVariables'):
    # force tensorflow to keep theese new variables on the CPU ! 
    backup_vars = [tf.get_variable(var.op.name, dtype=var.value().dtype, trainable=False,
                                   initializer=var.initialized_value())
                   for var in model_vars]

# operation to assign SWA weights to model
swa_to_weights = tf.group(*(tf.assign(var, swa.average(var).read_value()) for var in model_vars))
# operation to store model into backup variables
save_weight_backups = tf.group(*(tf.assign(bck, var.read_value()) for var, bck in zip(model_vars, backup_vars)))
# operation to get back values from backup variables to model
restore_weight_backups = tf.group(*(tf.assign(var, bck.read_value()) for var, bck in zip(model_vars, backup_vars)))

initialize_uninitialized(sess)

# Data loading

*  Load in CSV data from Collect Earth
*  Reconstruct the X, Y grid for the Y data per sample
*  Calculate remote sensing indices
*  Stack X, Y, length data
*  Apply median filter to DEM

In [12]:
import hickle as hkl

train_x = np.zeros((12+12+8+8+8+8+5 + 4 + 7 + 6 + 6 + 6 + 4 + 5 + 6 + 6 + 6, 12, 172, 172, 17), dtype = np.float32)
train_y = np.zeros((12+12+8+8+8+8+5 + 4 + 7 + 6 + 6 + 6 + 4 + 5 + 6 + 6 + 6, 158, 158), dtype = np.float32)
batches = [ '369X1279Y', '445X1241Y', '1638X1088Y',
           '448X1239Y', '2283X714Y', '2288X763Y', 
           '1637X1088Y', '449X1239Y', '371X1277Y',
          '412X1198Y', '465X1246Y', '1668X1076Y', 
           '359X1281Y', '2382X944Y', '1642X1140Y', '2309X679Y', '1669X1125Y']#'1650X1092Y (4)'
start = 0
for batch in batches:
    batchx = hkl.load(f'development/{batch}_x.hkl')
    batchy = np.load(f'development/{batch}_y.npy')
    
    _length = batchx.shape[0]
    print(_length)
    if batchy.shape[-1] == 14:
        batchy = np.ones((_length, 158, 158))
    train_x[start:start+_length] = batchx
    train_y[start:start+_length] = batchy
    start += _length
    
med = np.median(train_x, axis = 1)
med = med[:, np.newaxis, :, :, :]
train_x = np.concatenate([train_x, med], axis = 1)

KeyboardInterrupt: 

## Standardization

In [None]:
min_all = [0.006576638437476157, 0.0162050812542916, 0.010040436408026246, 
           0.013351644159609368, 0.01965362020294499, 0.014229037918669413, 
           0.015289539940489814, 0.011993591210803388, 0.008239871824216068, 
           0.006546120393682765, 0.0, 0.0, 0.0, -0.1409399364817101, 
           -0.4973397113668104, -0.09731556326714398, -0.7193834232943873]

max_all = [0.2691233691920348, 0.3740291447318227, 0.5171435111009385, 0.6027466239414053,
           0.5650263218127718, 0.5747005416952773, 0.5933928435187305, 0.6034943160143434, 
           0.7472037842374304, 0.4, 0.509269855802243, 0.948334642387533, 
           0.6729257769285485, 0.8177635298774327, 0.35768999002433816, 0.7545951919107605, 
           0.7602693339366691]

# Min all, and max all are the 0.1 and 99.9 percentiles of each band
for band in tnrange(0, train_x.shape[-1]):
    mins = min_all[band]
    maxs = max_all[band]
    train_x[..., band] = np.clip(train_x[..., band], mins, maxs)
    midrange = (maxs + mins) / 2
    rng = maxs - mins
    standardized = (train_x[..., band] - midrange) / (rng / 2)
    train_x[..., band] = standardized

## Load and process test data

# Evaluation metrics

In [None]:
def compute_f1_score_at_tolerance(true, pred, tolerance = 1):
    """Because of coregistration errors, we evaluate the model
    where false positives/negatives must be >1px away from a true positive
    """
    fp = 0
    tp = 0
    fn = 0
    
    tp = np.zeros_like(true)
    fp = np.zeros_like(true)
    fn = np.zeros_like(true)
    
    for x in range(true.shape[0]):
        for y in range(true.shape[1]):
            min_x = np.max([0, x-1])
            min_y = np.max([0, y-1])
            max_y = np.min([true.shape[0], y+2])
            max_x = np.min([true.shape[0], x+2])
            if true[x, y] == 1:
                if np.sum(pred[min_x:max_x, min_y:max_y]) > 0:
                    tp[x, y] = 1
                else:
                    fn[x, y] = 1
            if pred[x, y] == 1:
                if np.sum(true[min_x:max_x, min_y:max_y]) > 0:
                    if true[x, y] == 1:
                        tp[x, y] = 1
                else:
                    fp[x, y] = 1                
                
    return np.sum(tp), np.sum(fp), np.sum(fn)

def calculate_metrics(al = 0.4, canopy_thresh = 100):
    '''Calculates the following metrics
       
         - Loss
         - F1
         - Precision
         - Recall
         - Dice
         - Mean surface distance
         - Average error
    
         Parameters:
          al (float):
          canopy_thresh (int)
          
         Returns:
          val_loss (float):
          best_dice (float):
          error (float):
    '''
    start_idx = 0
    stop_idx = len(test_x)
    best_f1, best_thresh, relaxed_f1 = 0, 0, 0
    preds, trues, vls = [], [], []

    test_ids = [x for x in range(len(test_x))]
    for test_sample in test_ids[start_idx:stop_idx]:
        if np.sum(test_y[test_sample]) < ((canopy_thresh/100) * 197):
            x_input = test_x[test_sample].reshape(1, 13, 28, 28, n_bands)
            x_median_input = calc_median_input(x_input)
            y, vl = sess.run([fm, test_loss], feed_dict={inp: x_input,
                                                          length: np.full((1,), 12),
                                                          is_training: False,
                                                          labels: test_y[test_sample].reshape(1, OUT, OUT),
                                                          loss_weight: 0.1,
                                                          alpha: 0.33,
                                                          })
            preds.append(y.reshape((OUT, OUT)))
            vls.append(vl)
            trues.append(test_y[test_sample].reshape((OUT, OUT)))
            
    # These threshes are just for ROC
    for thresh in range(7, 9):
        tps_relaxed = np.empty((len(preds), ))
        fps_relaxed = np.empty((len(preds), ))
        fns_relaxed = np.empty((len(preds), ))
        abs_error = np.empty((len(preds), ))
        
        for sample in range(len(preds)):
            pred = np.copy(preds[sample])
            true = trues[sample]
        
            pred[np.where(pred >= thresh*0.05)] = 1
            pred[np.where(pred < thresh*0.05)] = 0
            
            true_s = np.sum(true[1:-1])
            pred_s = np.sum(pred[1:-1])
            abs_error[sample] = abs(true_s - pred_s)
            tp_relaxed, fp_relaxed, fn_relaxed = compute_f1_score_at_tolerance(true, pred)
            tps_relaxed[sample] = tp_relaxed
            fps_relaxed[sample] = fp_relaxed
            fns_relaxed[sample] = fn_relaxed                   
            
        oa_error = np.mean(abs_error)
        precision_r = np.sum(tps_relaxed) / (np.sum(tps_relaxed) + np.sum(fps_relaxed))
        recall_r = np.sum(tps_relaxed) / (np.sum(tps_relaxed) + np.sum(fns_relaxed))
        f1_r = 2*((precision_r* recall_r) / (precision_r + recall_r))
        
        if f1_r > best_f1:
            best_f1 = f1_r
            p = precision_r
            r = recall_r
            error = oa_error
            best_thresh = thresh*0.05

    print(f"Val loss: {np.around(np.mean(vls), 3)}"
          f" Thresh: {np.around(best_thresh, 2)}"
          f" F1: {np.around(best_f1, 3)} R: {np.around(p, 3)} P: {np.around(r, 3)}"
          f" Error: {np.around(error, 3)}")
    return np.mean(vls), best_f1, error

# Data augmentation

The below code block implements cut mix where random samples are spliced together where the output labels have similar tree cover distributions (within the same kmeans cluster). Not super necessary but does give a small performance improvement.

In [None]:
LEN = 4
def augment_batch(batch_ids, batch_size):
    '''Performs random flips and rotations of the X and Y
       data for a total of 4 x augmentation
    
         Parameters:
          batch_ids (list):
          batch_size (int):
          
         Returns:
          x_batch (arr):
          y_batch (arr):
    '''
    x = np.copy(train_x[batch_ids])
    samples_to_median = np.random.randint(0, 12, size=(batch_size, 12)) #[32, 6]
    samples_to_select = np.zeros((batch_size, 4))
    samples_to_select[:, 0] = np.random.randint(0, 4, size=(batch_size))
    samples_to_select[:, 1] = np.random.randint(3, 7, size=(batch_size))
    samples_to_select[:, 2] = np.random.randint(6, 10, size=(batch_size))
    samples_to_select[:, 3] = np.random.randint(8, 12, size=(batch_size))
    #samples_to_select = np.sort(samples_to_select, axis = 1)
    samples_to_select = samples_to_select.astype(np.int)
    #samples_to_median = np.sort(samples_to_median, axis = 1)
    n_samples = np.random.randint(2, 5, size=(batch_size)) 
    
    x_batch = np.zeros((x.shape[0], LEN + 1, 172, 172, 17))
    for samp in range(batch_size):
        samps = samples_to_median[samp, :]#:np.random.randint(6, 12)]
        lower_samp = np.min(samps)
        upper_samp = np.max(samps)
        #print(np.unique(samps))
        x_samp = x[samp]
        samps = np.unique(samps)
        med_samp = np.median(x_samp[samps], axis = 0)
        #med_samp = np.median(x[samp, np.unique(samps)], axis = 0)

       
        if x_batch.shape[1] == 5:
            #print(samples_to_select[samp])
            x_batch[samp, :-1, ...] = x[samp, samples_to_select[samp]]
        else:
            x[samp, :lower_samp] = x[samp, lower_samp]
            x[samp, upper_samp:] = x[samp, upper_samp]
            x_batch[samp, :-1] = x[samp]
        x_batch[samp, -1, ...] = med_samp
        
    x = x_batch
    
    #xmed = x[:, -1]

    #x = np.concatenate([x, xmed[:, np.newaxis]], axis = 1)
        
    y = train_y[batch_ids]

    
    y_batch = np.zeros_like(y)
    #chmy_batch = np.zeros_like(y)
    
    flips = np.random.choice(np.array([0, 1, 2, 3]), batch_size, replace = True)
    for i in range(x.shape[0]):
        current_flip = flips[i]
        if current_flip == 0:
            x_batch[i] = x[i]
            y_batch[i] = y[i]
            #chmy_batch[i] = chmy[i]
            
        if current_flip == 1:
            x_batch[i] = np.flip(x[i], 1)
            y_batch[i] = np.flip(y[i], 0)
            #chmy_batch[i] = np.flip(chmy[i], 0)
        if current_flip == 2:
            x_batch[i] = np.flip(x[i], [2, 1])
            y_batch[i] = np.flip(y[i], [1, 0])
           # chmy_batch[i] = np.flip(chmy[i], [1, 0])
        if current_flip == 3:
            x_batch[i] = np.flip(x[i], 2)
            y_batch[i] = np.flip(y[i], 1)
           # chmy_batch[i] = np.flip(chmy[i], 1)

    y_batch = y_batch.reshape((batch_size, 158, 158))

    #x_batch, y_batch = cut_mix(x_batch, y_batch, batch_ids, 0.5, batch_size)
    #x_batch = np.delete(x_batch, [11, 12], axis = -1)
    return x_batch, y_batch 

#x_batch_test, y_batch_test= augment_batch([x for x in range(32)], 32)

In [None]:
start = 75
end = 125
f, ((c1r1, c1r2), (c2r1, c2r2)) = plt.subplots(2, 2, sharey=False)
f.set_size_inches(15, 12)

c1r1.set_title(f"Train loss - {model_path}")
l1 = sns.scatterplot(y = metrics[0, start:end], x = np.arange(start, end), ax = c1r1)
l1.set(ylim=(0.30, .40))

c1r2.set_title("F1 score")
f =sns.scatterplot(y = metrics[5, start:end], x = np.arange(start, end), ax = c1r2)
f.set(ylim=(0.84, .91))

c2r1.set_title("Test loss")
l = sns.scatterplot(y = metrics[1, start:end], x = np.arange(start, end), ax = c2r1)
l.set(ylim=(0.140, .165)) 

c2r2.set_title("Absolute % error")
e = sns.scatterplot(y = metrics[2, start:end] / 2, x = np.arange(start, end), ax = c2r2)
e.set(ylim=(2.2, 3.5))
plt.show()

In [None]:
import tqdm

best_val = 0.72
fine_tune = False
ft_epochs = 0
BATCH_SIZE = 4

# loss2 125-160 is 0.4 alpha, >0.6 surface, 0.33 loss weight
# loss45 is 0.4 alpha, >0.45 surface, 0.4 loss weight
# loss45 250 - 300 is 0.4 alpha, >0.45 surface, 0.4 loss weight with minimum surface loss
nepochs = 2
for i in range(351, 360):
    if i >= 120:
        SWA = False# set to true to start SWA
    else:
        SWA = False
    al = 0.35
    
    ft_learning_rate = 5e-3
    if nepochs < 5:
        ft_learning_rate *= (0.2 * nepochs)
    nepochs += 1
    be = 0.0
    test_al = al
    op = train_op# if fine_tune else train_op
        
    train_ids = [x for x in range(len(train_y))]
    np.random.shuffle(train_ids)
    randomize = train_ids
    #randomize = equibatch(train_ids, 0)
    print(f"starting epoch {i}, " 
          f"alpha: {al}, beta: {be}, "
          f"drop: {np.max(((1. - (i * 0.005)), 0.6))} "
          f"Learning rate: {ft_learning_rate}"
         )
    
    loss = train_loss
    #test_ids = [x for x in range(0, len(test_x))]
    losses = []
    
    for k in tqdm.notebook.tnrange(int(len(randomize) // BATCH_SIZE)):
        batch_ids = randomize[k*BATCH_SIZE:(k+1)*BATCH_SIZE]
        x_batch, y_batch = augment_batch(batch_ids, BATCH_SIZE)
        #x_batch = augment_batch(batch_ids, BATCH_SIZE)
        opt, tr = sess.run([op, loss],
                          feed_dict={inp: x_batch,
                                     length: np.full((BATCH_SIZE,), 4),
                                     labels: y_batch,
                                     mask: np.ones_like(y_batch),
                                     is_training: True,
                                     loss_weight: 0.4,
                                     keep_rate: 0.5,#np.max(((1. - (i * 0.01)), MAX_DROPBLOCK)),
                                     alpha: al,
                                     beta_: be,
                                     ft_lr: ft_learning_rate,
                                     })
        losses.append(tr)
    
    print(f"Epoch {i}: Loss {np.around(np.mean(losses[:-1]), 3)}")
    #os.mkdir(f"../models/epoch15{str(i)}")
    saver.save(sess, f"../models/epoch30-{str(i)}/model")
    output_node_names = ['conv2d_5/Sigmoid']
    frozen_graph_def = tf.graph_util.convert_variables_to_constants(
        sess,
        sess.graph_def,
        output_node_names)


    # Save the frozen graph
    with open(f'../models/epoch19/predict_graph-{str(i)}.pb', 'wb') as f:
        f.write(frozen_graph_def.SerializeToString())

In [None]:
#sess.run(swa_to_weights)
saver = tf.train.Saver(max_to_keep = 150)
#os.mkdir(f"../models/loss2/")
save_path = saver.save(sess, f"../models/loss2/model")


In [None]:

output_node_names = ['conv2d_5/Sigmoid']
frozen_graph_def = tf.graph_util.convert_variables_to_constants(
    sess,
    sess.graph_def,
    output_node_names)


# Save the frozen graph
with open('../models/loss3/predict_graph.pb', 'wb') as f:
    f.write(frozen_graph_def.SerializeToString())