# Master model development

## John Brandt

### Last updated: November 1 2019

*  Package loading
*  Hyperparameter definitions
*  Additional layer definitions
*  Model definition
*  Data loading
*  Data preprocessing
*  K means clustering
*  Augment training data
*  Loss definition
*  Equibatch creation
*  Model training
*  Model validation and sanity checks

In [None]:
#TODO Remove imports that aren't needed to save RAM
from tqdm import tqdm_notebook, tnrange
import tensorflow as tf

sess = tf.Session()
from keras import backend as K
K.set_session(sess)

import keras
from tensorflow.python.keras.layers import *
from tensorflow.python.keras.layers import ELU
from keras.losses import binary_crossentropy
from tensorflow.python.ops import array_ops
from tensorflow.python.keras.layers import Conv2D, Lambda, Dense, Multiply, Add

import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim import conv2d

import pandas as pd
import numpy as np
from random import shuffle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
import itertools
from tflearn.layers.conv import global_avg_pool
from tensorflow.contrib.framework import arg_scope
from keras.regularizers import l1
from tensorflow.layers import batch_normalization
from tensorflow.python.util import deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False

os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:
%run ../src/zoneout.py
%run ../src/convgru.py
%run ../src/lovasz.py
%run ../src/utils.py
%run ../src/adabound.py
%run ../src/slope.py

# Hyperparameter definitions

In [None]:
ZONE_OUT_PROB = 0.10 #(0.05, 0.20, 0.05) --> 4
L2_REG = 0.0005 #(1-e6, 1-e1, x10) --> 5
INITIAL_LR = 2e-6 #(1e-6, 1e-3, x5) --> 10
FINAL_LR = 2e-4 # (1e - 5, 1e-2, x5) --> 10
BN_MOMENTUM = 0.9 # --> 3
BATCH_SIZE = 4 # -->4
TRAIN_RATIO = 0.8
TEST_RATIO = 0.2


AUGMENTATION_RATIO = 4
IMAGE_SIZE = 16
existing = [int(x[:-4]) for x in os.listdir('../data/final/') if ".DS" not in x]
N_SAMPLES = len(existing)

LABEL_SIZE = 14

    
TRAIN_SAMPLES = int((N_SAMPLES * AUGMENTATION_RATIO) * TRAIN_RATIO)
TEST_SAMPLES = int((N_SAMPLES * AUGMENTATION_RATIO) - TRAIN_SAMPLES)
print(TRAIN_SAMPLES // AUGMENTATION_RATIO, N_SAMPLES - (TRAIN_SAMPLES // AUGMENTATION_RATIO))

# Additional layer definitions

In [None]:
def conv_bn_elu(inp, is_training, kernel_size, scope, filter_count = 16):
    if kernel_size == 3:
        padded = ReflectionPadding2D((1, 1,))(inp)
        padding = 'valid'
    else:
        padded = inp
        padding = 'same'
    conv = Conv2D(filters = filter_count, kernel_size = (kernel_size, kernel_size),
                      padding = padding, kernel_regularizer=reg)(padded)
    elu = ELU()(conv)
    bn = Batch_Normalization(elu, training=is_training, scope = scope + "bn")
    return bn
    
    
def fpa(inp, filter_count):
    one = conv_bn_elu(inp, is_training, 1, 'forward1', filter_count)
    three = conv_bn_elu(inp, is_training, 3, 'down1', filter_count)
    three_f = conv_bn_elu(three, is_training, 3, 'down1_f', filter_count)
    two = conv_bn_elu(three, is_training, 2, 'down2', filter_count)
    two_f = conv_bn_elu(two, is_training, 2, 'down2_f', filter_count)
    
    # top block
    pooled = tf.keras.layers.GlobalAveragePooling2D()(inp)
    one_top = conv_bn_elu(tf.reshape(pooled, (-1, 1, 1, pooled.shape[-1])),
                          is_training, 1, 'top1', filter_count)
    four_top = tf.keras.layers.UpSampling2D((4, 4))(one_top)
    
    
    concat_1 = tf.multiply(one, tf.add(three_f, two_f))
    concat_2 = tf.add(concat_1, four_top)
    print("Feature pyramid attention shape {}".format(concat_2.shape))
    return concat_2


def upconv2d(X, filters, is_training, scope):
    X = tf.image.resize_images(X, [X.shape[1]*2, X.shape[2]*2],
                               method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    X = conv_bn_elu(X, is_training, 3, scope, filter_count = filters)
    return X

    
def create_deconv_init(filter_size, num_channels):
    bilinear_kernel = np.zeros([filter_size, filter_size], dtype=np.float32)
    scale_factor = (filter_size + 1) // 2
    if filter_size % 2 == 1:
        center = scale_factor - 1
    else:
        center = scale_factor - 0.5
    for x in range(filter_size):
        for y in range(filter_size):
            bilinear_kernel[x,y] = (1 - abs(x - center) / scale_factor) * \
                                   (1 - abs(y - center) / scale_factor)
    weights = np.zeros((filter_size, filter_size, num_channels, num_channels))
    for i in range(num_channels):
        weights[:, :, i, i] = bilinear_kernel

    #assign numpy array to constant_initalizer and pass to get_variable
    bilinear_init = tf.constant_initializer(value=weights, dtype=tf.float32)
    return bilinear_init


def get_deconv2d(inp, filter_count, num_channels, scope, is_training):
    bilinear_init = create_deconv_init(4, filter_count)
    x = tf.keras.layers.Conv2DTranspose(filters = filter_count, kernel_size = (4, 4),
                                        strides=(2, 2), padding='same', 
                                        kernel_initializer = bilinear_init)(inp)
    x = ELU()(x)
    x = Batch_Normalization(x, training=is_training, scope = scope + "bn")
    return x


def Batch_Normalization(x, training, scope):
    return batch_normalization(inputs=x, 
                               momentum = BN_MOMENTUM, 
                               training=training,
                               renorm = True,
                               reuse=None,
                               name = scope)

    

def gau(x_low_level, x_high_level, scope, is_training, filter_count, uptype, size = 4):
    """
    The global attention upsample to replace the up_cat_conv element
    """
    print(x_low_level.shape)
    print(x_high_level.shape)
    low_feat = conv_bn_elu(x_low_level, is_training, 3, 'gauforward' + scope, filter_count)
    high_gap = tf.keras.layers.GlobalAveragePooling2D()(x_high_level)
    high_feat = tf.keras.layers.Dense(filter_count, activation='linear', use_bias=False)(high_gap)
    high_feat = ELU()(high_feat)
    high_feat = Batch_Normalization(high_feat, training=is_training, scope = scope + "bn_highfeat")
    high_feat = tf.keras.layers.Reshape((1, 1, -1))(high_feat)
    high_feat_gate = tf.keras.layers.UpSampling2D((size, size))(high_feat)
    gated_low = tf.keras.layers.multiply([low_feat, high_feat_gate])
    gated_low = conv_bn_elu(gated_low, is_training, 3, 'gauforward5' + scope, filter_count)
    if uptype == "upconv":
        gated_high = upconv2d(gated_low, filter_count, is_training, scope + "conv")
    elif uptype == "transpose":
        bilinear_init = create_deconv_init(4, filter_count)
        gated_high = tf.keras.layers.Conv2DTranspose(filters = filter_count, kernel_size = (4, 4),
                                             strides=(2, 2), padding='same', 
                                                     kernel_initializer = bilinear_init)(gated_low)
        gated_high = ELU()(gated_high)
        gated_high = Batch_Normalization(gated_high, training=is_training, scope = scope + "bn_gatedhigh")
    high_clamped = conv_bn_elu(x_high_level, is_training, 3, 'gauforward1' + scope, filter_count)
    return tf.keras.layers.add([gated_high, high_clamped])


def attention(inputs, attention_size, time_major=False, return_alphas=False):
    if isinstance(inputs, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        inputs = tf.concat(inputs, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        inputs = tf.array_ops.transpose(inputs, [1, 0, 2])

    hidden_size = inputs.shape[2].value  # D value - hidden size of the RNN layer

    # Trainable parameters
    w_omega = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
    b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
    u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))

    with tf.name_scope('v'):
        # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
        #  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
        v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)

    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    vu = tf.tensordot(v, u_omega, axes=1, name='vu')  # (B,T) shape
    alphas = tf.nn.softmax(vu, name='alphas')         # (B,T) shape

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)

    if not return_alphas:
        return output
    else:
        return output, alphas

In [None]:
def cse_block(prevlayer, prefix):
    mean = Lambda(lambda xin: K.mean(xin, axis=[1, 2]))(prevlayer)
    lin1 = Dense(K.int_shape(prevlayer)[3] // 2, name=prefix + 'cse_lin1', activation='relu')(mean)
    lin2 = Dense(K.int_shape(prevlayer)[3], name=prefix + 'cse_lin2', activation='sigmoid')(lin1)
    x = Multiply()([prevlayer, lin2])
    return x


def sse_block(prevlayer, prefix):
    # Bug? Should be 1 here?
    conv = Conv2D(K.int_shape(prevlayer)[3], (1, 1), padding="same", kernel_initializer="he_normal",
                  activation='sigmoid', strides=(1, 1),
                  name=prefix + "_conv")(prevlayer)
    conv = Multiply(name=prefix + "_mul")([prevlayer, conv])
    return conv


def csse_block(x, prefix):
    '''
    Implementation of Concurrent Spatial and Channel ‘Squeeze & Excitation’ in Fully Convolutional Networks
    https://arxiv.org/abs/1803.02579
    '''
    cse = cse_block(x, prefix)
    sse = sse_block(x, prefix)
    x = Add(name=prefix + "_csse_mul")([cse, sse])

    return x

class ReflectionPadding2D(Layer):
    def __init__(self, padding=(1, 1), **kwargs):
        self.padding = tuple(padding)
        self.input_spec = [InputSpec(ndim=4)]
        super(ReflectionPadding2D, self).__init__(**kwargs)

    def compute_output_shape(self, s):
        """ If you are using "channels_last" configuration"""
        return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])

    def call(self, x, mask=None):
        w_pad,h_pad = self.padding
        return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')


# Model definition

In [None]:
reg = keras.regularizers.l2(L2_REG)
inp = tf.placeholder(tf.float32, shape=(None, 24, IMAGE_SIZE, IMAGE_SIZE, 14))
length = tf.placeholder(tf.int32, shape = (None, 1))
labels = tf.placeholder(tf.float32, shape=(None, 14, 14))#, 1))
alpha = tf.placeholder(tf.float32, shape = (None))

length2 = tf.reshape(length, (-1,))
is_training = tf.placeholder_with_default(False, (), 'is_training')
    
FILTER_SIZE = IMAGE_SIZE

down_16 = 18
down_8 = 72

up_8 = 36
up_16 = 18

def down_block(inp, length, size, flt, scope, train):
    with tf.variable_scope(scope):
        cell_fw = ConvGRUCell(shape = size, filters = flt,
                           kernel = [3, 3], padding = 'VALID')
        cell_bw = ConvGRUCell(shape = size, filters = flt,
                           kernel = [3, 3], padding = 'VALID')
        cell_fw = ZoneoutWrapper(
            cell_fw, zoneout_drop_prob = ZONE_OUT_PROB, is_training = train)
        cell_bw = ZoneoutWrapper(
            cell_bw, zoneout_drop_prob = ZONE_OUT_PROB, is_training = train)
        gru = convGRU(inp, cell_fw, cell_bw, length)
        print(gru[0].shape)
        flattened = tf.reshape(gru[0], (-1, 24, 16*16*flt*2))
        attended = attention(flattened, flt*2, time_major=False, return_alphas=False)
        flattened = tf.reshape(attended, (-1, 16, 16, flt*2))
        flattened = conv_bn_elu(flattened, train, 3, scope, filter_count = flt*2)
        down = MaxPool2D(pool_size = (2, 2))(flattened)
        print("Down block shape: {}".format(down.shape))
    return down, flattened


def down_block_no_gru(inp, flt, scope, train):
    with tf.variable_scope(scope):
        #padded = ReflectionPadding2D((1, 1))(inp)
        
        # Conv block 1
        x = conv_bn_elu(inp, is_training, 3, scope + "_1", filter_count = flt)
        x = conv_bn_elu(x, is_training, 3, scope + "_2", filter_count = flt)
        x = csse_block(x, prefix='csse_block_{}'.format(scope))
        down = MaxPool2D(pool_size = (2, 2))(x)
        print("Down block shape: {}".format(down.shape))
    return down


def up_block(inp, concat_inp, flt, sq, scope, concat, is_training, uptype, padding = True):
    with tf.variable_scope(scope):
        x = gau(inp, concat_inp, scope, is_training, flt, uptype, inp.shape[-2])
        x = csse_block(x, prefix='csse_block_{}'.format(scope))
        print("Up block conv 1 shape: {}".format(x.shape))
        return x
        
        
# Down block - 16 - 8
down_1, copy_1 = down_block(inp = inp, 
                            length = length2, 
                            size = [FILTER_SIZE, FILTER_SIZE], 
                            flt = down_16, 
                            scope = 'down_16', 
                            train = is_training)

# Down block - 8 - 4
down_2 = down_block_no_gru(down_1, down_8, 'down_8', is_training)

# Feature pyramid attention block - 4 - 4
down_fpa = fpa(down_2, down_8)

# Up block 4 - 8
up_3 = up_block(inp = down_fpa,
                concat_inp = down_1, 
                flt = up_8, 
                sq = up_8,
                scope = 'up_8', 
                concat = True, 
                is_training = is_training, 
                uptype = "transpose", 
                padding =  True) # 4 - 8

# Up block 8 - 16
up_2 = up_block(inp = up_3,
                concat_inp = copy_1,
                flt = up_16, 
                sq = up_16, 
                scope = 'up_16',
                concat = True, 
                is_training = is_training,
                uptype = "transpose",
                padding = True) # 8 - 16

# Hypercolumns
#up_4_8 = get_deconv2d(down_2, down_8, up_8, "upfinal1", is_training)
#up_4_8_16 = getdeconv2d(up_4_8, up_8, up_8, 'upfinal1_1', is_training)
#print("Hypercolumn 1 {}".format(up_4_16.shape))
up_8_16 = get_deconv2d(up_3, up_8, up_16, 'upfinal2', is_training)   
print("Hypercolumn 2 {}".format(up_8_16.shape))
concat_final = tf.concat([up_2, up_8_16], axis = -1)


# Down block 16 - 14
up_2 = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'valid')(concat_final)
elu = ELU()(up_2)
bn = Batch_Normalization(elu, training=is_training, scope =  "conv32_bn")


# Final conv block, with concatenation of DEM Slope
#slope = tf.reshape(inp[:, 0, 1:15, 1:15, -1], (-1, 14, 14, 1))
#up_2 = tf.concat([bn, slope], axis = -1)
up_2 = conv_bn_elu(bn, is_training, 3, "final_out", filter_count = 48)

#B = tf.Variable([-np.log(0.99/0.01)]) 
init = tf.constant_initializer([-np.log(0.98/0.02)]) # For focal loss
fm = Conv2D(filters = 1,
            kernel_size = (1, 1), 
            padding = 'valid',
            activation = 'sigmoid',
            #bias_initializer = init, # For focal loss
            )(up_2)
print(fm.shape)

In [None]:
total_parameters = 0
for variable in tf.trainable_variables():
    shape = variable.get_shape()
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    total_parameters += variable_parameters
print("This model has {} parameters".format(total_parameters))

# Data loading

In [None]:
df = pd.read_csv("../data/subplot.csv")
df1 = pd.read_csv("../data/subplot2.csv")
df2 = pd.read_csv("../data/subplot3.csv")
df3 = pd.read_csv("../data/subplot4.csv")

df = df.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)
df1 = df1.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)
df2 = df2.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)
df3 = df3.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)

lens = [len(x) for x in [df, df1, df2, df3]]

df = pd.concat([df, df1, df2, df3], ignore_index = True)
df = df.dropna(axis = 0)

existing = [int(x[:-4]) for x in os.listdir('../data/correct_dem/') if ".DS" not in x]
N_SAMPLES = len(existing)

In [None]:
df = df[df['PLOT_ID'].isin(existing)]
N_SAMPLES = int(df.shape[0]/196)
N_YEARS = 1

plot_ids = sorted(df['PLOT_ID'].unique())
weight = np.load("weights.npy")
weight = dict(zip(plot_ids, weight))

def reconstruct_images(plot_id):
    subs = df[df['PLOT_ID'] == plot_id]
    rows = []
    lats = reversed(sorted(subs['LAT'].unique()))
    for i, val in enumerate(lats):
        subs_lat = subs[subs['LAT'] == val]
        subs_lat = subs_lat.sort_values('LON', axis = 0)
        rows.append(list(subs_lat['TREE']))
    return rows

data = [reconstruct_images(x) for x in plot_ids]

# Initiate empty lists to store the X and Y data in
data_x, data_y, lengths = [], [], []

# Iterate over each plot
for i in tnrange(len(plot_ids)):
    # Load the sentinel imagery
    for year in ["correct_dem"]:  
        x = np.load("../data/" + year + "/" + str(plot_ids[i]) + ".npy")
        x = ndvi(x, image_size = 16)
        x = evi(x, image_size = 16)
        x = savi(x, image_size = 16)
        x = remove_blank_steps(x)
        y = reconstruct_images(plot_ids[i])
        x[:, :, :, 10] /= 90
        lengths.append(x.shape[0])
        if x.shape[0] < 24:
            padding = np.zeros((24 - x.shape[0], IMAGE_SIZE, IMAGE_SIZE, 14))
            x = np.concatenate((x, padding), axis = 0)
        data_x.append(x)
        data_y.append(y)
print("Finished data loading")

data_x = np.stack(data_x)
data_y = np.stack(data_y)
lengths = np.stack(lengths)

In [None]:
np.max(data_x[:, :, :, :, 10])

# Data preprocessing

In [None]:
below_1 = [i for i, val in enumerate(data_x) if np.min(val) < -1.05]
above_1 = [i for i, val in enumerate(data_x) if np.max(val) > 1.05]
min_vals = [np.min(val) for i, val in enumerate(data_x) if np.min(val) < -1.05]
max_vals = [np.max(val) for i, val in enumerate(data_x) if np.max(val) > 1.05]
outliers = below_1 + above_1
print("The outliers are: {}, totalling {}".format(outliers, len(outliers)))
print("\n")
print(min_vals, max_vals)
data_x = data_x[[x for x in range(0, len(data_x)) if x not in outliers]]
data_y = data_y[[x for x in range(0, len(data_y)) if x not in outliers]]
lengths = lengths[[x for x in range(0, len(lengths)) if x not in outliers]]

min_all = []
max_all = []
for x in range(0, data_x.shape[-1]):
    mins, maxs = (np.min(data_x[:, :, :, :, x]), np.max(data_x[:, :, :, :, x]))
    min_all.append(mins)
    max_all.append(maxs)
    
    data_x[:, :, :, :, x] = (data_x[:, :, :, :, x] - mins) / (maxs - mins)
    
print("The data has been scaled to [{}, {}]".format(np.min(data_x), np.max(data_x)))

In [None]:
plot_ids2 = [val for x, val in enumerate(plot_ids) if x not in list(set([x for x in outliers]))]
#plot_ids2[ordering[460]//4] 
N_SAMPLES = len(data_x)

region_lengths = []
for x in [df1, df2, df3]:
    subs = [i for i in set(x['PLOT_ID']) if i in plot_ids2]
    region_lengths.append(len(subs))
    
region_lengths = [N_SAMPLES - sum(region_lengths)] + region_lengths
    #print(len(x[x['PLOT_ID'] in plot_ids2]))

print("The region sample distribution is {}".format(region_lengths))
print(sum(region_lengths))
train_ordering = []
test_ordering = []
ordering = []
total_samples = 0
for r in TRAIN_RATIO, TEST_RATIO:
    for i, val in enumerate(region_lengths):
        start = int(np.sum(region_lengths[:i]))
        end = start + val
        if r == 0.8:
            start = start
            end = end-((end-start)*(1-r))
            start = int(start)
            end = int(end)
            total_samples += (end - start)
            train_ordering += [x for x in range(start, end)]
        if r == 0.2:
            start = start + ((end-start)*(1-r))
            end = end
            start = int(start)
            end = int(end)
            total_samples += (end-start)
            test_ordering += [x for x in range(start, end)]

ordering = train_ordering + test_ordering

data_x = data_x[ordering]
data_y = data_y[ordering]
lengths = lengths[ordering]

# K Means clustering

In [None]:
from sklearn.cluster import KMeans

NONZERO_CLUSTERS = 10
ZERO_CLUSTERS = 6

kmeans = KMeans(n_clusters=NONZERO_CLUSTERS, random_state = 50)
kmeans_zero = KMeans(n_clusters = ZERO_CLUSTERS, random_state = 50)
unaugmented = [x for x in range(0, len(data_y))]
zeros = [x for x in unaugmented if np.sum(data_y[x]) == 0]
nonzero = [x for x in unaugmented if x not in zeros]
kmeans.fit(data_y[nonzero, :, :].reshape((len(nonzero), 14*14)))
kmeans_zero.fit(np.mean(data_x[zeros, :, :], axis = 1).reshape((len(zeros), 16*16*14)))             

In [None]:
def multiplot(matrices):
    '''Plot multiple heatmaps with subplots'''
    fig, axs = plt.subplots(ncols=4)
    fig.set_size_inches(20, 4)
    for i, matrix in enumerate(matrices):
        sns.heatmap(data = matrix, ax = axs[i], vmin = 0, vmax = 0.9)
        axs[i].set_xlabel("")
        axs[i].set_ylabel("")
        axs[i].set_yticks([])
        axs[i].set_xticks([])
    plt.show()

In [None]:
import random 
samples_x = [val for x, val in enumerate(nonzero) if kmeans.labels_[x] == 1]
print(samples_x)
randoms = random.sample(samples_x, 4)
randoms = [data_y[x] for x in randoms]
randoms = [x.reshape((14, 14)) for x in randoms]
multiplot(randoms)

In [None]:
def balance_data(data_y, labels, labels2, unaugmented = unaugmented):
    ids = {}
    # Loop over the nonzero clusters
    for i in range(0, NONZERO_CLUSTERS):
        tmp = [val for x, val in enumerate(nonzero) if labels[x] == i]
        ids[i] = tmp
    # Loop over the zero clusters
    for i in range(0, ZERO_CLUSTERS):
        tmp = [val for x, val in enumerate(zeros) if labels2[x] == i]
        ids[i + 10] = tmp
    #ids[10] = zeros
    return ids



In [None]:
ids = balance_data(data_y, kmeans.labels_, kmeans_zero.labels_ )
items = [v for k, v in ids.items()]
items = [item for sublist in items for item in sublist]
print("The {} samples have been balanced between the sampling sites".format(len(items)))

In [None]:
train_ids = []
test_ids = []
for i in ids:
    ln = len(ids[i])
    train_len = int(np.floor([ln * TRAIN_RATIO]))
    test_len = ln - train_len
    print(train_len, test_len, ln)
    trains = ids[i][:train_len]
    tests = ids[i][train_len:]
    train_ids += trains
    test_ids += tests
    
train_labels = []
for i in train_ids:
    train_labels.append([k for k, v in ids.items() if i in v][0])

In [None]:
print("Train and test characteristics:")
print("Train mean Y {}".format(np.mean([np.sum(x) for x in data_y[train_ids]])))
print("Test mean Y {}".format(np.mean([np.sum(x) for x in data_y[test_ids]])))
print("Train number with zero trees {}".format(0.2*len([x for x in data_y[train_ids] if np.sum(x) == 0])))
print("Test number with zero trees {}".format(0.8*len([x for x in data_y[test_ids] if np.sum(x) == 0])))
print("Train mean NDVI")
print("Test mean NDVI")
print("There are {} train and {} test samples".format(len(train_ids), len(test_ids)))
print("There is {} overlap between train and test".format(len([x for x in train_ids if x in test_ids])))


# Augment training data

In [None]:
data_x_augmented = []
data_y_augmented = []
lengths_augmented = []
labels_augmented = []
for i, val in enumerate(train_ids):
    data_x_augmented.append(data_x[val])
    data_y_augmented.append(data_y[val])
    lengths_augmented.append(data_x[val].shape[0])
    labels_augmented.append(train_labels[i])
    
    x1 = np.flip(data_x[val], 1)
    y1 = np.flip(data_y[val], 0)
    lengths_augmented.append(x1.shape[0])
    labels_augmented.append(train_labels[i])
    data_x_augmented.append(x1)
    data_y_augmented.append(y1)
    
    x1 = np.flip(data_x[val], [2, 1])
    y1 = np.flip(data_y[val], [1, 0])
    lengths_augmented.append(x1.shape[0])
    labels_augmented.append(train_labels[i])
    data_x_augmented.append(x1)
    data_y_augmented.append(y1)
    
    x1 = np.flip(data_x[val], 2)
    y1 = np.flip(data_y[val], 1)
    lengths_augmented.append(x1.shape[0])
    labels_augmented.append(train_labels[i])
    data_x_augmented.append(x1)
    data_y_augmented.append(y1)

train_x = np.stack(data_x_augmented)
train_y = np.stack(data_y_augmented)
train_y = np.reshape(train_y, (train_y.shape[0], 14, 14, 1))
train_l = np.stack(lengths_augmented)
train_l = np.reshape(train_l, (train_y.shape[0], 1))

In [None]:
test_x = data_x[test_ids]
test_y = data_y[test_ids]
test_lengths = lengths[test_ids]

In [None]:
print("RANDOM TRAIN SAMPLES - SHOULD BE AUGMENTED")
multiplot([x.reshape(14, 14) for x in train_y[:4]])

In [None]:
print("RANDOM TEST SAMPLES - SHOULD BE NOT AUGMENTED")
multiplot([x.reshape(14, 14) for x in test_y[:4]])

# Loss definition

In [None]:
import math
def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def smooth_jaccard(y_true, y_pred, smooth=1):
    y_true = tf.reshape(y_true, (-1, 14*14))
    y_pred = tf.reshape(y_pred, (-1, 14*14))
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return (1 - jac) * smooth

def focal_loss_fixed(y_true, y_pred, gamma = 2., alpha = 0.25):
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    y_pred = K.clip(y_pred, 1e-8, 1-1e-8)
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    epsilon = K.epsilon()
        # clip to prevent NaN's and Inf's
    pt_1 = K.clip(pt_1, epsilon, 1. - epsilon)
    pt_0 = K.clip(pt_0, epsilon, 1. - epsilon)
    loss = -(alpha * K.pow(1. - pt_1, gamma) * K.log(K.epsilon()+pt_1)) - ((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))
    return loss

In [None]:
from keras.losses import binary_crossentropy

def bce_lovasz(y_true, y_pred):
    foc_losses = []
    y_true_r = tf.reshape(y_true, (-1, 14, 14, 1))
    '''
    for i in range(BATCH_SIZE):
        y_true_i = tf.reshape(y_true_r[i, :, :, :], (1, 14, 14, 1))
        y_pred_i = tf.reshape(y_pred[i, :, :, :], (1, 14, 14, 1))
        alpha = (1 - K.clip((tf.reduce_sum(y_true_i)/196), 0.33, 0.67))
        gamma = (-tf.math.log(alpha))/2
        focal_loss = focal_loss_fixed(y_true_i, y_pred_i, gamma = gamma, alpha = alpha)
        foc_losses.append(focal_loss)
    foc_losses = tf.concat(foc_losses, axis = 0)'''
    loss1 = binary_crossentropy(y_true, y_pred)
    lv = lovasz_softmax(y_pred, tf.reshape(y_true, (-1, 14, 14)), classes=[1], per_image=True)
    loss = loss1 + 0.25*lv
    #lovasz =  0.5*lovasz_softmax(tf.reshape(y_pred, (-1, 14, 14)), y_true, classes=[1], per_image=True) +
    return loss

# Equibatch creation

In [None]:
for i in range(0, 100, 8):
    print(np.percentile([np.sum(x) for x in train_y], i))
    
train_ids = [x for x in range(0, len(train_y))]

def equibatch(train_ids, batch_size):
    np.random.shuffle(train_ids)
    randomize = train_ids
    percs = np.sum(train_y[randomize, :, :, :].reshape(len(train_ids), 14*14), axis = 1)
    idx = randomize
    zero_ids = [x for x, z in zip(idx, percs) if z == 0]
    one_ids = [x for x, z in zip(idx, percs) if 0 < z <= 6]
    two_ids = [x for x, z in zip(idx, percs) if 6 < z <= 11]
    three_ids = [x for x, z in zip(idx, percs) if 11 < z <= 16]
    four_ids = [x for x, z in zip(idx, percs) if 16 < z <= 23]
    five_ids = [x for x, z in zip(idx, percs) if 23 < z < 41]
    six_ids = [x for x, z in zip(idx, percs) if 41 < z <= 63]
    seven_ids = [x for x, z in zip(idx, percs) if 63 < z <= 100]
    eight_ids = [x for x, z in zip(idx, percs) if 100 < z]
    
    new_batches = []
    maxes = [len(zero_ids), len(one_ids), len(two_ids), len(three_ids), len(four_ids),
             len(five_ids), len(six_ids), len(seven_ids), len(eight_ids)]
    cur_ids = [0, 0, 0, 0, 0, 0, 0, 0, 0]
    for i in range(0, len(train_ids)//len(maxes)):
        to_append = [ zero_ids[cur_ids[0]], zero_ids[cur_ids[0] + 1], one_ids[cur_ids[1]], two_ids[cur_ids[2]],
                           three_ids[cur_ids[3]], four_ids[cur_ids[4]], five_ids[cur_ids[5]],
                           six_ids[cur_ids[6]], seven_ids[cur_ids[7]], eight_ids[cur_ids[8]]]
        np.random.shuffle(to_append)
        new_batches.append(to_append)
        cur_ids = [x + 1 for x in cur_ids]
        for i, val in enumerate(cur_ids):
            if val > maxes[i] - 1:
                cur_ids[i] = 0
        cur_ids[0] += 1
        
    new_batches = [item for sublist in new_batches for item in sublist]
    #overlap = [x for x in new_batches if x in test_ids]
    #print("There is {} overlap. Error if > 0".format(len(overlap)))
    return new_batches

In [None]:
batch = equibatch(train_ids, 32)
multiplot([x.reshape((14, 14)) for x in train_y[batch[:4]]])

In [None]:
multiplot([x.reshape((14, 14)) for x in train_y[batch[4:8]]])

In [None]:
multiplot([x.reshape((14, 14)) for x in train_y[batch[8:12]]])

# Model training

In [None]:
FRESH_START = True
FINE_TUNE = False
from tensorflow.python.keras.optimizers import SGD
learning_rate = tf.placeholder(tf.float32, shape=[])


BATCH_SIZE = 20
print("Starting model with: \n {} zone out \n {} l2 \n {} initial LR \n {} final LR \n {} parameters"
     .format(ZONE_OUT_PROB, L2_REG, INITIAL_LR, FINAL_LR, total_parameters))
best_val = 0.270
if not FRESH_START:
    print("Resuming training with a best validation score of {}".format(best_val))
if FRESH_START:
    print("Restarting training from scratch on {} train and {} test samples, total {}".format(len(train_ids), len(test_ids), N_SAMPLES))
    #optimizer = AdaBoundOptimizer(learning_rate=1e-6,
    #                              final_lr=1e-4,
    #                              beta1=0.9, beta2=0.999, 
    #                              amsbound=True)
    learning_rate = 1e-6
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    
    train_loss = bce_lovasz(tf.reshape(labels, (-1, 14, 14, 1)), fm)
    test_loss = binary_crossentropy(tf.reshape(labels, (-1, 14, 14, 1)), fm)
    l2_loss = tf.losses.get_regularization_loss()
    train_loss += l2_loss
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(train_loss)   

    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    saver = tf.train.Saver(max_to_keep = 2)

# Run training loop
for i in range(0, 200):
    if i % 25 == 0:
        learning_rate /= 2
        print("Decaying learning rate to {}".format(learning_rate))
    print("Lovasz loss")
    op = train_op
    loss = train_loss
    randomize = equibatch(train_ids, 32)
    test_ids = [x for x in range(0, len(test_x))]

    losses = []
    val_loss = []
    
    for k in tnrange(int(len(train_ids) // BATCH_SIZE)):
        batch_ids = randomize[k*BATCH_SIZE:(k+1)*BATCH_SIZE]
        batch_y = train_y[batch_ids, :, :].reshape(len(batch_ids), 14, 14)
        if sum(sum(sum(batch_y))) > 0:
            opt, tr = sess.run([op, loss],
                                  feed_dict={inp: train_x[batch_ids, :, :, :],
                                             length: train_l[batch_ids].reshape((-1, 1)),
                                             labels: batch_y,
                                             is_training: True,
                                             learning_rate = learning_rate
                                             })
 
        else:
            print("Skipping minibatch for equibatch reasons")
        losses.append(tr)
    for j in range(len(test_ids) // 8):
        batch_ids = test_ids[j*8:(j+1)*8]
        vl, y = sess.run([test_loss, fm], 
                         feed_dict={inp: test_x[batch_ids, :, :, :],
                                    length: test_lengths[batch_ids].reshape((-1, 1)),
                                    labels: test_y[batch_ids, :, :].reshape(8, 14, 14),
                                    is_training: False,
                                    })
        val_loss.append(np.mean(vl))
        
    tps, fps, fns = [], [], []
    for m in test_ids:
        y = sess.run([fm], feed_dict={inp: test_x[m, :, :, :].reshape(1, 24, 16, 16, 14),
                                  length: test_lengths[m].reshape(1, 1),
                                  is_training: False,
                                  })[0]
        true = test_y[m].reshape((LABEL_SIZE, LABEL_SIZE))
        pred = y.reshape((14, 14))
        pred[np.where(pred > 0.4)] = 1
        pred[np.where(pred < 0.4)] = 0
        tp, fp, fn = thirty_meter(true, pred)
        tps.append(tp)
        fps.append(fp)
        fns.append(fn)
        
    precision = np.sum(tps) / (np.sum(tps) + np.sum(fps))
    recall = np.sum(tps) / (np.sum(tps) + np.sum(fns))
    if np.mean(val_loss) < best_val:
        best_val = np.mean(val_loss)
        print("Saving model with {}".format(best_val))
        save_path = saver.save(sess, "../models/equibatch26/model")
    print("Epoch {}: Loss {} Val: {} P {} R {} F1 {} iou {}".format(i + 1,
                                                             np.mean(losses), np.mean(val_loss),
                                                             precision, recall, 'hey', "hey"))

# Model validation and sanity checks





In [None]:
start = 0
test_ids = [x for x in range(0, len(test_x))]

def multiplot(matrices, nrows = 2, ncols = 4):
    '''Plot multiple heatmaps with subplots'''
    fig, axs = plt.subplots(ncols=4, nrows = nrows)
    fig.set_size_inches(20, 4*nrows)
    to_iter = [[x for x in range(i, i + ncols + 1)] for i in range(0, nrows*ncols, ncols)]
    for r in range(1, nrows + 1):
        min_i = min(to_iter[r-1])
        max_i = max(to_iter[r-1])
        for i, matrix in enumerate(matrices[min_i:max_i]):
            sns.heatmap(data = matrix, ax = axs[r - 1, i], vmin = 0, vmax = 0.9)
            axs[r - 1, i].set_xlabel("")
            axs[r - 1, i].set_ylabel("")
            axs[r - 1, i].set_yticks([])
            axs[r - 1, i].set_xticks([])
    plt.show()
start = 0



In [None]:
import random 

    
test_losses = []
print(start/len(test_ids))
test_ids = sorted(test_ids)
matrix_ids = [test_ids[start], test_ids[start + 1], test_ids[start + 2], test_ids[start + 3],
              test_ids[start + 4], test_ids[start + 5], test_ids[start + 6], test_ids[start + 7]]
#matrix_ids = random.sample(test_ids, 4)

preds = []
trues = []
for i in matrix_ids:
    idx = i
    print(i)
    y = sess.run([fm], feed_dict={inp: test_x[idx].reshape(1, 24, IMAGE_SIZE, IMAGE_SIZE, 14),
                                  length: test_lengths[idx].reshape(1, 1),
                                  is_training: False,
                                  })
    y = np.array(y).reshape(14, 14)
    preds.append(y)
    true = test_y[idx].reshape(LABEL_SIZE, LABEL_SIZE)
    trues.append(true)
    

to_plot = trues[0:4] + preds[0:4]# + trues[5:] + preds[5:]
multiplot(to_plot, nrows = 2, ncols = 4)
#plot_ids[ordering[976]//4] 
start = start + 4

In [None]:
plot_ids2 = [val for x, val in enumerate(plot_ids) if x not in list(set([x // 4 for x in outliers]))]
plot_ids2[ordering[460]//4] 

# Calculate ROC for best threshold selection

In [None]:


for j in range(4, 18):
    tps = []
    fps = []
    fns = []
    for i in test_ids:
        y = sess.run([fm], feed_dict={inp: data_x[i].reshape(1, 24, 16, 16, 14),
                                  length: lengths[i].reshape(1, 1),
                                  is_training: False,
                                  })[0]
        true = data_y[i].reshape((14, 14))
        pred = y.reshape((14, 14))
        pred[np.where(pred > j*0.05)] = 1
        pred[np.where(pred < j*0.05)] = 0
        tp, fp, fn = thirty_meter(true, pred)
        tps.append(tp)
        fps.append(fp)
        fns.append(fn)
        
    precision = np.sum(tps) / (np.sum(tps) + np.sum(fps))
    recall = np.sum(tps) / (np.sum(tps) + np.sum(fns))
    print(j*0.05, precision, recall)

## TODO @jombrandt top 10 worst training, test samples by IOU 

These should be written to a tmp/ .txt file and indexed by validate-data.ipynb to ensure that original classifications were correct, and to identify regions that need more training data.