# LSTM - UNET -- 16x16 none padding

## John Brandt

### Last updated: Oct 3 2019, 75% precision, 76% recall

In [None]:
from tqdm import tqdm_notebook, tnrange
import tensorflow as tf
#tf.reset_default_graph()

sess = tf.Session()
from keras import backend as K
K.set_session(sess)

import keras
from tensorflow.python.keras.layers import *
from tensorflow.python.keras.layers import ELU, LeakyReLU
from keras.losses import binary_crossentropy
from tensorflow.python.ops import array_ops
from tensorflow.python.keras.layers import Conv2D, Lambda, Dense, Multiply, Add

import pandas as pd
import numpy as np
from random import shuffle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
import itertools
from tflearn.layers.conv import global_avg_pool
from tensorflow.contrib.framework import arg_scope
from keras.regularizers import l1
from tensorflow.layers import batch_normalization
import tensorflow.contrib.slim as slim

os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:
%run ../src/zoneout.py
%run ../src/convgru.py
%run ../src/lovasz.py
%run ../src/utils.py
%run ../src/adabound.py

In [None]:
ZONE_OUT_PROB = 0.3 #(0.05, 0.20, 0.05) --> 4
L2_REG = 0.005 #(1-e6, 1-e1, x10) --> 5
INITIAL_LR = 2e-6 #(1e-6, 1e-3, x5) --> 10
FINAL_LR = 2e-4 # (1e - 5, 1e-2, x5) --> 10
LOSS_WEIGHTING = 0.5 #(0.2, 1, 0.2) --> 5
SQUEEZE_RATIO = 4 # --> 4, 8, 12, 16 --> 4
BN_MOMENTUM = 0.9 # --> 3
N_LAYERS = 4 # --> 3
REG_TYPE = 'kernel' # kernel # --> 2
SQUEEZE = True
LAYER_NORM = True 
BATCH_SIZE = 4 # -->4
LOSS_TYPE = 'bce-jaccard' #bce-jaccard, bce-dice, bce-lovasz, focal-jaccard, etc. --> 4
N_CONV_PER_LAYER = 1 # --> 2
ACTIVATION_TYPE = 'ELU' #RELU, PRELU --> 2
MASK_LOSS = False # --> 2
PAD_INPUT_TYPE = 'none' # zero, reflect, none # --> 2
RENORM_CLIPPING = None # --> 5
FRESH_START = False
TRAIN_RATIO = 0.8
TEST_RATIO = 0.2


AUGMENTATION_RATIO = 4
IMAGE_SIZE = 16
existing = [int(x[:-4]) for x in os.listdir('../data/2018/') if ".DS" not in x]
#existing = [x for x in existing1 if x in existing2]
N_SAMPLES = len(existing)
RESIZE_OUTPUT = False

LABEL_SIZE = 14
#if LABEL_SIZE == 16 and not RESIZE_OUTPUT:
#    LABEL_SIZE = IMAGE_SIZE
    
TRAIN_SAMPLES = int((N_SAMPLES * AUGMENTATION_RATIO) * TRAIN_RATIO)
TEST_SAMPLES = int((N_SAMPLES * AUGMENTATION_RATIO) - TRAIN_SAMPLES)
print(TRAIN_SAMPLES // AUGMENTATION_RATIO, N_SAMPLES - (TRAIN_SAMPLES // AUGMENTATION_RATIO))

In [None]:
def conv_bn_elu(inp, is_training, kernel_size, scope, filter_count = 16):
    if kernel_size == 3:
        padded = ReflectionPadding2D((1, 1,))(inp)
        padding = 'valid'
    else:
        padded = inp
        padding = 'same'
    conv = Conv2D(filters = filter_count, kernel_size = (kernel_size, kernel_size),
                      padding = padding, kernel_regularizer=reg)(padded)
    elu = ELU()(conv)
    bn = Batch_Normalization(elu, training=is_training, scope = scope + "bn")
    return bn
    
def fpa(inp, filter_count):
    one = conv_bn_elu(inp, is_training, 1, 'forward1', filter_count)
    three = conv_bn_elu(inp, is_training, 3, 'down1', filter_count)
    three_f = conv_bn_elu(three, is_training, 3, 'down1_f', filter_count)
    two = conv_bn_elu(three, is_training, 2, 'down2', filter_count)
    two_f = conv_bn_elu(two, is_training, 2, 'down2_f', filter_count)
    
    # top block
    pooled = tf.keras.layers.GlobalAveragePooling2D()(inp)
    one_top = conv_bn_elu(tf.reshape(pooled, (-1, 1, 1, pooled.shape[-1])), is_training, 1, 'top1', filter_count)
    four_top = tf.keras.layers.UpSampling2D((4, 4))(one_top)
    
    
    concat_1 = tf.multiply(one, tf.add(three_f, two_f))
    concat_2 = tf.add(concat_1, four_top)
    print("Feature pyramid attention shape {}".format(concat_2.shape))
    return concat_2
    
    

def gau(x_low_level, x_high_level, scope, filter_count, size = 4):
    """
    The global attention upsample to replace the up_cat_conv element
    """
    low_feat = conv_bn_elu(x_low_level, is_training, 3, 'gauforward' + scope, filter_count)
    high_gap = tf.keras.layers.GlobalAveragePooling2D()(x_high_level)
    high_feat = tf.keras.layers.Reshape((1, 1, -1))(high_gap)
    high_feat_gate = tf.keras.layers.UpSampling2D((size, size))(high_feat)
    gated_low = tf.keras.layers.multiply([low_feat, high_feat_gate])
    gated_low = conv_bn_elu(gated_low, is_training, 3, 'gauforward5' + scope, filter_count)
    gated_high = tf.keras.layers.Conv2DTranspose(filters = filter_count, kernel_size = (3, 3),
                                             strides=(2, 2), padding='same', kernel_regularizer = reg)(gated_low)
    high_clamped = conv_bn_elu(x_high_level, is_training, 3, 'gauforward1' + scope, filter_count)
    return tf.keras.layers.add([gated_high, high_clamped])

In [None]:
import tensorflow.contrib.slim as slim
import numpy as np
import os
from tensorflow.contrib.slim import conv2d
from tensorflow.contrib.resampler import resampler

def cse_block(prevlayer, prefix):
    mean = Lambda(lambda xin: K.mean(xin, axis=[1, 2]))(prevlayer)
    lin1 = Dense(K.int_shape(prevlayer)[3] // 2, name=prefix + 'cse_lin1', activation='relu')(mean)
    lin2 = Dense(K.int_shape(prevlayer)[3], name=prefix + 'cse_lin2', activation='sigmoid')(lin1)
    x = Multiply()([prevlayer, lin2])
    return x


def sse_block(prevlayer, prefix):
    # Bug? Should be 1 here?
    conv = Conv2D(K.int_shape(prevlayer)[3], (1, 1), padding="same", kernel_initializer="he_normal",
                  activation='sigmoid', strides=(1, 1),
                  name=prefix + "_conv")(prevlayer)
    conv = Multiply(name=prefix + "_mul")([prevlayer, conv])
    return conv


def csse_block(x, prefix):
    '''
    Implementation of Concurrent Spatial and Channel ‘Squeeze & Excitation’ in Fully Convolutional Networks
    https://arxiv.org/abs/1803.02579
    '''
    cse = cse_block(x, prefix)
    sse = sse_block(x, prefix)
    x = Add(name=prefix + "_csse_mul")([cse, sse])

    return x

def Batch_Normalization(x, training, scope):
    return batch_normalization(inputs=x, 
                               momentum = BN_MOMENTUM, 
                               training=training,
                               renorm = True,
                               reuse=None,
                               name = scope)

class ReflectionPadding2D(Layer):
    def __init__(self, padding=(1, 1), **kwargs):
        self.padding = tuple(padding)
        self.input_spec = [InputSpec(ndim=4)]
        super(ReflectionPadding2D, self).__init__(**kwargs)

    def compute_output_shape(self, s):
        """ If you are using "channels_last" configuration"""
        return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])

    def call(self, x, mask=None):
        w_pad,h_pad = self.padding
        return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')

# Model definition

In [None]:
weights = tf.ones([14, 14], tf.float32)
weights = tf.pad(weights, [[1, 1], [1, 1]], 'constant')
weights = tf.reshape(weights, (16*16,)) 

reg = keras.regularizers.l2(L2_REG)
inp = tf.placeholder(tf.float32, shape=(None, 9, 24, IMAGE_SIZE, IMAGE_SIZE, 15))
length = tf.placeholder(tf.int32, shape = (None, 1))
labels = tf.placeholder(tf.float32, shape=(None, 14, 14))#, 1))



length2 = tf.reshape(length, (-1,))
is_training = tf.placeholder_with_default(False, (), 'is_training')
power = tf.placeholder_with_default(1.0, (), 'power')

if PAD_INPUT_TYPE == 'zero':
    inp_pad = tf.pad(inp, [[0, 0], [0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT")

if PAD_INPUT_TYPE == 'reflect':
    inp_pad = tf.pad(inp, [[0, 0], [0, 0], [1,1], [1,1], [0,0] ], 'REFLECT')
    
if PAD_INPUT_TYPE == 'none':
    inp_pad = inp
    
FILTER_SIZE = LABEL_SIZE if RESIZE_OUTPUT else IMAGE_SIZE

down_16 = 10
down_8 = 30
down_4f = 45
#down_2f = 2
#up_4 = 30
up_8 = 30
up_16 = 20

def down_block(inp, length, size, flt, scope, train):
    with tf.variable_scope(scope):
        cell_fw = ConvGRUCell(shape = size, filters = flt,
                           kernel = [3, 3], padding = 'SAME')
        cell_bw = ConvGRUCell(shape = size, filters = flt,
                           kernel = [3, 3], padding = 'SAME')
        cell_fw = ZoneoutWrapper(
            cell_fw, zoneout_drop_prob = ZONE_OUT_PROB, is_training = train)
        cell_bw = ZoneoutWrapper(
            cell_bw, zoneout_drop_prob = ZONE_OUT_PROB, is_training = train)
        gru = convGRU(inp, cell_fw, cell_bw, length)
        down = TimeDistributed(MaxPool2D(pool_size = (2, 2)))(gru[0])
        print("Down block shape: {}".format(gru[1].shape))
    return down, gru[1]

def down_block_no_gru(inp, flt, scope, train):
    with tf.variable_scope(scope):
        padded = ReflectionPadding2D((1, 1))(inp)
        
        # Conv block 1
        conv = Conv2D(filters = flt, kernel_size = (3, 3),
                      padding = 'valid', kernel_regularizer=reg)(padded)
        elu = ELU()(conv)
        bn = Batch_Normalization(elu, training=is_training, scope = scope + "bn")
        x = csse_block(bn, prefix='csse_block_{}'.format(scope))
        down = MaxPool2D(pool_size = (2, 2))(x)
        print("Down block shape: {}".format(down.shape))
    return down


def up_block(inp, concat_inp, flt, sq, scope, concat, is_training, padding = True):
    with tf.variable_scope(scope):
        
        gau_layer = gau(inp, concat_inp, scope, flt, inp.shape[-2])
        x = csse_block(gau_layer, prefix='csse_block_{}'.format(scope))
        print("Up block conv 1 shape: {}".format(x.shape))
        return x
        
        
down_1, copy_1 = down_block(inp_pad, length2, [FILTER_SIZE, FILTER_SIZE], down_16, 'down_16', is_training)
down_2 = down_block_no_gru(copy_1, down_8, 'down_8', is_training)
down_3 = down_block_no_gru(down_2, down_4f, 'down_4', is_training)

down_fpa = fpa(down_3, down_4f)
up_3 = up_block(down_fpa, down_2, up_8, up_8, 'up_8', True, is_training, padding =  True) # 4 - 8
up_2 = up_block(up_3, copy_1, up_16, up_16, 'up_16', True, is_training, padding = True) # 8 - 16
up_4_16 = tf.keras.layers.Conv2DTranspose(filters = up_8, kernel_size = (3, 3),
                                             strides=(2, 2), padding='same', kernel_regularizer = reg)(down_2)

up_8_16 = tf.keras.layers.Conv2DTranspose(filters = up_16, kernel_size = (3, 3),
                                             strides=(2, 2), padding='same', kernel_regularizer = reg)(up_3)

concat_final = tf.concat([up_2, up_4_16, up_8_16], axis = -1)

up_2 = Conv2D(filters = 20, kernel_size = (3, 3), padding = 'valid', kernel_regularizer=reg)(concat_final)
elu = ELU()(up_2)
bn = Batch_Normalization(elu, training=is_training, scope = "out1bn")
x = csse_block(bn, prefix='csse_block_{}'.format("out1"))

up_2 = Conv2D(filters = 20, kernel_size = (3, 3), padding = 'valid', kernel_regularizer=reg)(x)
elu = ELU()(up_2)

#B = tf.Variable([-np.log(0.99/0.01)]) 
init = tf.constant_initializer([-np.log(0.9/0.1)])
fm = Conv2D(filters = 1,
            kernel_size = (1, 1), 
            padding = 'valid',
            activation = 'sigmoid',
            bias_initializer = init,
            )(elu)
print(fm.shape)

In [None]:
total_parameters = 0
for variable in tf.trainable_variables():
    # shape is an array of tf.Dimension
    shape = variable.get_shape()
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    total_parameters += variable_parameters
print(total_parameters)

# Data loading

In [None]:
df = pd.read_csv("../data/subplot.csv")
df1 = pd.read_csv("../data/subplot2.csv")
df2 = pd.read_csv("../data/subplot3.csv")
df3 = pd.read_csv("../data/subplot4.csv")

df = df.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)
df1 = df1.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)
df2 = df2.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)
df3 = df3.drop('IMAGERY_TITLE', axis = 1).dropna(axis = 0)

lens = [len(x) for x in [df, df1, df2, df3]]

df = pd.concat([df, df1, df2, df3], ignore_index = True)
df = df.dropna(axis = 0)

#existing1 = [int(x[:-4]) for x in os.listdir('../data/2017_data/') if ".DS" not in x]
existing = [int(x[:-4]) for x in os.listdir('../data/2018/') if ".DS" not in x]
#existing = [x for x in existing1 if x in existing2]
N_SAMPLES = len(existing)

In [None]:
df = df[df['PLOT_ID'].isin(existing)]
N_SAMPLES = int(df.shape[0]/196)
N_YEARS = 1

plot_ids = sorted(df['PLOT_ID'].unique())

locs_ls = []

def reconstruct_images(plot_id):
    subs = df[df['PLOT_ID'] == plot_id]
    rows = []
    lats = reversed(sorted(subs['LAT'].unique()))
    for i, val in enumerate(lats):
        subs_lat = subs[subs['LAT'] == val]
        subs_lat = subs_lat.sort_values('LON', axis = 0)
        rows.append(list(subs_lat['TREE']))
    return rows

data = [reconstruct_images(x) for x in plot_ids]

# Initiate empty lists to store the X and Y data in
data_x, data_y, lengths = [], [], []

# Iterate over each plot
pad = True
flip = True
for i in plot_ids:
    # Load the sentinel imagery
    for year in ["2018"]: #"2017_data", 
        x = np.load("../data/" + year + "/" + str(i) + ".npy")
        # Shape check
        x = ndvi(x, image_size = 16)
        x = evi(x, image_size = 16)
        x = savi(x, image_size = 16)
        x = remove_blank_steps(x)
        x_grad, y_grad = np.gradient(np.reshape(x[0, :, :, 10], (16, 16)))
        #x[:, :, :, 10] = (x[:, :, :, 10] - np.min(x[:, :, :, 10]) / np.max(x[:, :, :, 10])
        mag = np.stack([np.reshape(np.sqrt(x_grad**2 + y_grad**2)*10, (16, 16, 1))]*x.shape[0])
        #if np.max(mag) > 0:
        #    mag = (mag - np.min(mag)) / np.max(mag)
        x = np.concatenate([x, mag], axis = -1)
        print(np.max(x[:, :, :, 14]))
        y = reconstruct_images(i)
        lengths.append(x.shape[0])
        if x.shape[0] < 24:
            padding = np.zeros((24 - x.shape[0], IMAGE_SIZE, IMAGE_SIZE, 13))
            x = np.concatenate((x, padding), axis = 0)
        data_x.append(x)
        data_y.append(y)
        if flip:
                # FLIP HORIZONTAL
            x1 = np.flip(x, 1)
            data_x.append(x1)
            data_y.append(np.flip(y, 0))
            lengths.append(x.shape[0])

                # FLIP BOTH
            x2 = np.flip(x, 2)
            x2 = np.flip(x2, 1)
            data_x.append(x2)
            data_y.append(np.flip(y, [0, 1]))
            lengths.append(x.shape[0])
                # FLIP VERTICAL
            x3 = np.flip(x, 2)
            data_x.append(x3)
            data_y.append(np.flip(y, 1))
            lengths.append(x.shape[0])

data_x = np.stack(data_x)
data_y = np.stack(data_y)
data_y = np.reshape(data_y, (N_SAMPLES*4*N_YEARS, 14, 14, 1))
lengths = np.stack(lengths)
lengths = np.reshape(lengths, (lengths.shape[0], 1))

if PAD_INPUT_TYPE == 'zero' and RESIZE_OUTPUT:
    data_y = np.pad(data_y, [[0, 0], [1, 1], [1, 1], [0, 0]], 'constant')
    
if PAD_INPUT_TYPE == 'reflect' and RESIZE_OUTPUT:
    data_y = np.pad(data_y, [[0, 0], [1, 1], [1, 1], [0, 0]], 'reflect')
print("Finished data loading")
print(data_y.shape)

In [None]:
len2 = [x//196 for x in lens]
print(len2)
MULT = 4 * N_YEARS

ordering = [[x for x in range(0,int(200*TRAIN_RATIO))],
            [x for x in range(200,200+(int(136*TRAIN_RATIO)))], 
            [x for x in range(200+136,200+136+(int(162*TRAIN_RATIO)))],
           [x for x in range(200+136+162,200+136+162+(int(len2[3]*TRAIN_RATIO)))]]

ordering = [item for sublist in ordering for item in sublist]
test_ordering = [x for x in range(0, N_SAMPLES) if x not in ordering]
ordering = test_ordering + ordering
ordering = [[x*MULT, (x*MULT)+1, (x*MULT)+2, (x*MULT)+3] for x in ordering]
ordering = [item for sublist in ordering for item in sublist]
#randomized = [[x*4, (x*4)+1, (x*4)+2, (x*4)+3] for x in ordering]
##shuffle(randomized)
#randomized = [item for sublist in randomized for item in sublist]

#randomized = [x for x in range(0, N_SAMPLES)]
#shuffle(randomized)
#randomized = [[x*4, (x*4)+1, (x*4)+2, (x*4)+3] for x in randomized]
#randomized = [item for sublist in randomized for item in sublist]

In [None]:
data_x = data_x[ordering]
data_y = data_y[ordering]
lengths = lengths[ordering]


percs = [sum(sum(val)) for x, val in enumerate(data_y) if x % MULT == 0]
percs = np.array(percs).flatten()
zero = len([x for x in percs if x == 0])# number with 0
one = len([x for x in percs if 0 < x <= 8])
two = len([x for x in percs if 8 < x <= 20])
three = len([x for x in percs if 20 < x <= 35])
four = len([x for x in percs if 35 < x <= 70])
five = len([x for x in percs if 70 < x <= 100])
six = len([x for x in percs if 100 < x])

print("{} {} {} {} {} {} {}".format(zero, one, two, three, four, five, six))

In [None]:
len(percs)


In [None]:
idx = [x for x in range(0, len(percs))]
zero_ids = [x for x, z in zip(idx, percs) if z == 0]
one_ids = [x for x, z in zip(idx, percs) if 0 < z <= 8]
two_ids = [x for x, z in zip(idx, percs) if 8 < z <= 20]
three_ids = [x for x, z in zip(idx, percs) if 20 < z <= 35]
four_ids = [x for x, z in zip(idx, percs) if 35 < z <= 70]
five_ids = [x for x, z in zip(idx, percs) if 70 < z < 100]
six_ids = [x for x, z in zip(idx, percs) if 100 < z]

In [None]:
train_ids = (zero_ids[(int(len(zero_ids) * TEST_RATIO)):] + 
             one_ids[(int(len(one_ids) * TEST_RATIO)):] +
             two_ids[(int(len(two_ids) * TEST_RATIO)):] + 
             three_ids[(int(len(three_ids) * TEST_RATIO)):] + 
             four_ids[(int(len(four_ids) * TEST_RATIO)):] + 
             five_ids[(int(len(five_ids) * TEST_RATIO)):] + 
             six_ids[(int(len(six_ids) * TEST_RATIO)):])

test_ids = (zero_ids[:(int(len(zero_ids) * TEST_RATIO))] + 
             one_ids[:(int(len(one_ids) * TEST_RATIO))] +
             two_ids[:(int(len(two_ids) * TEST_RATIO))] + 
             three_ids[:(int(len(three_ids) * TEST_RATIO))] + 
             four_ids[:(int(len(four_ids) * TEST_RATIO))] + 
             five_ids[:(int(len(five_ids) * TEST_RATIO))] + 
             six_ids[:(int(len(six_ids) * TEST_RATIO))])

In [None]:
#train_ids = [[(x*8), (x*8)+1, (x*8)+2, (x*8)+3, (x*8)+4, (x*8)+5, (x*8)+6, (x*8)+7] for x in train_ids]
train_ids = [[(x*MULT), (x*MULT)+1, (x*MULT)+2, (x*MULT)+3] for x in train_ids]
train_ids = [item for sublist in train_ids for item in sublist]

test_ids = [x*4 for x in test_ids]
#test_ids = [item for sublist in test_ids for item in sublist]

# Loss definition

In [None]:
import math
def bin_foc(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred:  A tensor resulting from a sigmoid
        :return: Output tensor.
        """
        y_pred = tf.reshape(y_pred, (-1, 14,14))
        #y_true = tf.reshape(y_true, (-1, 14*14))
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

        epsilon = K.epsilon()
        # clip to prevent NaN's and Inf's
        pt_1 = K.clip(pt_1, epsilon, 1. - epsilon)
        pt_0 = K.clip(pt_0, epsilon, 1. - epsilon)

        return -K.sum(0.5 * K.pow(1. - pt_1, 2) * K.log(pt_1)) \
               -K.sum((1 - 0.5) * K.pow(pt_0, 2) * K.log(1. - pt_0))  
    
def focal_loss(target_tensor, prediction_tensor, weights=None, alpha=0.25, gamma=1.5):
    sigmoid_p = tf.reshape(prediction_tensor, (-1, 14, 14))
    zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)
    
    # For poitive prediction, only need consider front part loss, back part is 0;
    # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
    pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)
    
    # For negative prediction, only need consider back part loss, front part is 0;
    # target_tensor > zeros <=> z=1, so negative coefficient = 0.
    neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
    per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                          - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
    return tf.reduce_sum(per_entry_cross_ent)

def foc_lovasz(y_true, y_pred):
    #jaccard_loss = jaccard_distance(y_true, y_pred)
    lovasz = lovasz_hinge(y_pred, y_true)
    #pred_reshape = tf.reshape(y_pred, (-1, 14, 14))
    #true_reshape = tf.reshape(y_true, (-1, 14, 14))
    focal_loss = bin_foc(y_true, y_pred)
    summed = lovasz + np.log(focal_loss)
    return summed

def weighted_cross_entropy(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1 - tf.keras.backend.epsilon())
    y_pred = tf.log(y_pred / (1 - y_pred))
    loss = tf.nn.weighted_cross_entropy_with_logits(logits=y_pred, targets=y_true, pos_weight=1.5)
    return loss

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def smooth_jaccard(y_true, y_pred, smooth=1):
    y_true = tf.reshape(y_true, (-1, 12*12))
    y_pred = tf.reshape(y_pred, (-1, 12*12))
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return (1 - jac) * smooth

def bce_dice(y_true, y_pred):
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    return 0.5*binary_crossentropy(y_true, y_pred) + (dice_loss(y_true, y_pred))


def bce_lovasz(y_true, y_pred):
    #return 0.5*binary_crossentropy(tf.reshape(y_true, (-1, 14, 14, 1)), y_pred) + 
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    return lovasz_softmax(y_pred, y_true, classes=[1], per_image=True)

def focal_loss_fixed(y_true, y_pred, gamma = 2., alpha = 0.25):
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(K.epsilon()+pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))

def focal_dice(y_true, y_pred):
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    foc = focal_loss_fixed(y_true, y_pred, gamma = 0.5, alpha = 0.25)
    foc = foc / 196
    dice = dice_loss(y_true, y_pred)
    return 0.5*foc + tf.log(dice)

def foc_jaccard(y_true, y_pred):
    jac = dice_loss(y_true, y_pred)
    foc = focal_loss_fixed(y_true, y_pred, gamma = 1.3, alpha = 0.25)
    return (foc / 196) + 0.5*jac


def soft_dice_loss(y_true, y_pred, epsilon=1e-6): 
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    print(y_true.shape)
    print(y_pred.shape)
    ''' 
    Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions.
    Assumes the `channels_last` format.
  
    # Arguments
        y_true: b x X x Y( x Z...) x c One hot encoding of ground truth
        y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax) 
        epsilon: Used for numerical stability to avoid divide by zero errors
    
    # References
        V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation 
        https://arxiv.org/abs/1606.04797
        More details on Dice loss formulation 
        https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72)
        
        Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022
    '''
    # skip the batch and class axis for calculating Dice score
    axes = tuple(range(1, len(y_pred.shape)-1)) 
    numerator = 2. * np.sum(y_pred * y_true, (1, 2))
    denominator = np.sum(np.square(y_pred) + np.square(y_true), axes)
    
    return 1 - np.mean(numerator / (denominator + epsilon)) # average over classes and batch

In [None]:
from keras.losses import binary_crossentropy
import keras.backend as K
import tensorflow as tf 

epsilon = 1e-5
smooth = 1

def dsc(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return score

def dice_loss(y_true, y_pred):
    loss = 1 - dsc(y_true, y_pred)
    return loss

def log_dice(y_true, y_pred):
    loss = tf.log(dsc(y_true, y_pred))
    return loss

def bce_dice_loss(y_true, y_pred):
    loss = 0.5*binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
    return loss

def confusion(y_true, y_pred):
    smooth=1
    y_pred_pos = K.clip(y_pred, 0, 1)
    y_pred_neg = 1 - y_pred_pos
    y_pos = K.clip(y_true, 0, 1)
    y_neg = 1 - y_pos
    tp = K.sum(y_pos * y_pred_pos)
    fp = K.sum(y_neg * y_pred_pos)
    fn = K.sum(y_pos * y_pred_neg) 
    prec = (tp + smooth)/(tp+fp+smooth)
    recall = (tp+smooth)/(tp+fn+smooth)
    return prec, recall

def tp(y_true, y_pred):
    smooth = 1
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))
    y_pos = K.round(K.clip(y_true, 0, 1))
    tp = (K.sum(y_pos * y_pred_pos) + smooth)/ (K.sum(y_pos) + smooth) 
    return tp 

def tn(y_true, y_pred):
    smooth = 1
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))
    y_pred_neg = 1 - y_pred_pos
    y_pos = K.round(K.clip(y_true, 0, 1))
    y_neg = 1 - y_pos 
    tn = (K.sum(y_neg * y_pred_neg) + smooth) / (K.sum(y_neg) + smooth )
    return tn 

def tversky(y_true, y_pred):
    y_true_pos = K.flatten(y_true)
    y_pred_pos = K.flatten(y_pred)
    true_pos = K.sum(y_true_pos * y_pred_pos)
    false_neg = K.sum(y_true_pos * (1-y_pred_pos))
    false_pos = K.sum((1-y_true_pos)*y_pred_pos)
    alpha = 0.7
    return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)

def tversky_loss(y_true, y_pred):
    return 1 - tversky(y_true,y_pred)

def focal_tversky(y_true,y_pred):
    pt_1 = tversky(y_true, y_pred)
    gamma = 0.75
    return K.pow((1-pt_1), gamma)

def ftl_bce(y_true, y_pred):
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    return focal_tversky(y_true, y_pred) + 0.5*binary_crossentropy(y_true, y_pred)


def focal_dice(y_true, y_pred):
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    #foc = focal_loss_fixed(y_true, y_pred, gamma = 0.5, alpha = 0.25)
    #foc = foc / 196
    #dice = dice_loss(y_true, y_pred)
    return 0.5*binary_crossentropy(y_true, y_pred) - log_dice(y_true, y_pred)

def bce_dice_count(y_true, y_pred):
    y_true = tf.reshape(y_true, (-1, 14, 14, 1))
    bce = 0.5*binary_crossentropy(y_true, y_pred)
    dce = -log_dice(y_true, y_pred)
    count = 0.5*count_loss(y_true, y_pred)
    return bce + dce + count

def lvz_bce(y_true, y_pred):
    y_true_r = tf.reshape(y_true, (-1, 14, 14, 1))
    return lovasz_softmax(tf.reshape(y_pred, (-1, 14, 14)), y_true, classes=[1], per_image=True) + 0.5*binary_crossentropy(y_true_r, y_pred)

In [None]:
def count_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    pt_0 = tf.where(tf.math.greater(y_pred_f, 0.5), y_pred_f, tf.zeros_like(y_pred_f))
    true_sum = K.sum(y_true_f) # 5 10 15
    pred_sum = K.sum(y_pred_f) # 1 10 25
    score = K.abs(pred_sum - true_sum) / 196
    return score

def dsc_np(y_true, y_pred):
    smooth = 1.
    y_true_f = y_true.flatten().astype(np.float32)
    y_pred_f = y_pred.flatten().astype(np.float32)
    intersection = sum(y_true_f * y_pred_f)
    score = (2. * intersection + smooth) / (sum(y_true_f) + sum(y_pred_f) + smooth)
    return score

def bce_shift(true, pred, power):
    losses = []
    for i in range(BATCH_SIZE):
        true_i = tf.reshape(true[i], (1, 14, 14, 1))
        pred_i = tf.reshape(pred[i], (1, 12, 12, 1))
        true_p = true_i
        #loss_o = binary_crossentropy(true_p, pred)
        # extract out the candidate shifts
        true_m = true_i[:, 1:13, 1:13]
        true_l = true_i[:, 0:12, 1:13]
        true_r = true_i[:, 2:14, 1:13]
        true_u = true_i[:, 1:13, 0:12]
        true_d = true_i[:, 1:13, 2:14]
        true_dr = true_i[:, 2:14, 0:12]
        true_dl = true_i[:, 0:12, 0:12]
        true_ur = true_i[:, 2:14, 2:14]
        true_ul = true_i[:, 0:12, 2:14]
        true_shifts = [true_m, true_l, true_r, true_u, true_d, true_dr, true_dl, true_ur, true_ul]
        bce_shifts = tf.stack([binary_crossentropy(x, pred_i) for x in true_shifts])
        jac_shifts = tf.stack([smooth_jaccard(x, pred_i) for x in true_shifts])

        # Calculate BCE
        
        
        bce_power = tf.math.pow(1/(tf.reduce_mean(bce_shifts, axis = [2,3])), power)
        jac_power = tf.math.pow(1/(jac_shifts+0.1), power)
        
        sums = tf.reduce_sum(bce_power)
        sum_jac = tf.reduce_sum(jac_power)
        weights = bce_power/sums
        weights_jac = jac_power/sum_jac
    
        weights = (2*weights + weights_jac)/3
        loss = tf.reshape(bce_shifts, (1, 9, 12, 12)) * tf.reshape(weights, (1, 9, 1, 1))
        loss = tf.reduce_sum(loss, axis = 1)
        loss_j = tf.reshape(jac_shifts, (1, 9)) * tf.reshape(weights, (1, 9))
        loss_j = tf.reduce_sum(loss_j, axis = 1)
        losses.append(loss + 0.5*loss_j)
    loss = tf.reshape(tf.stack(losses), (BATCH_SIZE, 12, 12, 1))
    return loss

def get_shifts_batched(arr):
    true_m = arr[:, 1:13, 1:13]
    true_l = arr[:, 0:12, 1:13]
    true_r = arr[:, 2:14, 1:13]
    true_u = arr[:, 1:13, 0:12]
    true_d = arr[:, 1:13, 2:14]
    true_dr = arr[:, 2:14, 0:12]
    true_dl = arr[:, 0:12, 0:12]
    true_ur = arr[:, 2:14, 2:14]
    true_ul = arr[:, 0:12, 2:14]
    true_shifts = [true_m, true_l, true_r, true_u, true_d, true_dr, true_dl, true_ur, true_ul]
    return true_shifts

def lovasz_shift(true, pred, power):
    batch_shifted = get_shifts_batched(tf.reshape(true, (-1, 14, 14, 1)))
    shift_weights = []
    for i in range(BATCH_SIZE):
        true_i = tf.reshape(true[i], (1, 14, 14, 1))
        pred_i = tf.reshape(pred[i], (1, 12, 12, 1))
        true_p = true_i
        true_m = true_i[:, 1:13, 1:13]
        true_l = true_i[:, 0:12, 1:13]
        true_r = true_i[:, 2:14, 1:13]
        true_u = true_i[:, 1:13, 0:12]
        true_d = true_i[:, 1:13, 2:14]
        true_dr = true_i[:, 2:14, 0:12]
        true_dl = true_i[:, 0:12, 0:12]
        true_ur = true_i[:, 2:14, 2:14]
        true_ul = true_i[:, 0:12, 2:14]
        true_shifts = [true_m, true_l, true_r, true_u, true_d, true_dr, true_dl, true_ur, true_ul]
        bce_shifts = tf.stack([binary_crossentropy(x, pred_i) for x in true_shifts])
        bce_power = tf.math.pow(1/(tf.reduce_mean(bce_shifts, axis = [2,3])), power)
        sums = tf.reduce_sum(bce_power)
        weights = bce_power/sums
        weights = tf.reshape(weights, (1, 9))
        shift_weights.append(weights)
    weights = tf.reshape(tf.stack(shift_weights), (BATCH_SIZE, 9))
    print("WEIGHT", weights.shape)
    lovasz = tf.stack([lovasz_softmax(tf.reshape(pred, (-1, 12, 12)), x, classes = [1], per_image = True) for x in batch_shifted])
    #losses = []
    #for i in range(0, 9):
    #    lovasz = lovasz_softmax(tf.reshape(pred, (-1, 12, 12)), batch_shifted[:, i, :, :, :], classes=[1], per_image=True)
    #    lovasz = lovasz * weights[:, i]
    #losses.append(lovasz)
    #losses = tf.reshape(tf.stack(losses), (-1,))
   
    return lovasz

def ce(targets, predictions, epsilon=1e-12):
    """
    Computes cross entropy between targets (encoded as one-hot vectors)
    and predictions. 
    Input: predictions (N, k) ndarray
           targets (N, k) ndarray        
    Returns: scalar
    """
    targets = targets.reshape(1, 144)
    predictions = predictions.reshape(1, 144)
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    N = predictions.shape[0]
    ce = -np.mean(targets*np.log(predictions+1e-9))/N
    return ce

from sklearn.metrics import precision_score
def prec_shift(true, pred):
    true_m = true[1:13, 1:13]
    true_l = true[0:12, 1:13]
    true_r = true[2:14, 1:13]
    true_u = true[1:13, 0:12]
    true_d = true[1:13, 2:14]
    true_dr = true[2:14, 0:12]
    true_dl = true[0:12, 0:12]
    true_ur = true[2:14, 2:14]
    true_ul = true[0:12, 2:14]
    #norm = ce(true_m, pred)
    '''
    l = ce(true_l, pred)
    r = ce(true_r, pred)
    u = ce(true_u, pred)
    d = ce(true_d, pred)
    dr = ce(true_dr, pred)
    dl = ce(true_dl, pred)
    ur = ce(true_ur, pred)
    ul = ce(true_ul, pred)
    for_weights = [(1/(i+0.1)**3) for i in [norm, l, r, u, d, dr, dl, ur, ul]]
    #print([1/i for i in for_weights])
    #print([norm, l, r, u ,d])
    sum_for_weights = sum(for_weights)
    sum_for_weights = max(sum_for_weights, 1)
    #sums = sum([1/i for i in [norm, l, r, u, d, dr, dl, ur, ul]])
    #sums = max(sums, 1)
    weights = [i/sum_for_weights for i in for_weights]
    #weights = [i/sum(weights) for i in weights]
    '''
    match = dsc_np(true_m, pred)
    match_l = dsc_np(true_l, pred)
    match_r = dsc_np(true_r, pred)
    match_u = dsc_np(true_u, pred)
    match_d = dsc_np(true_d, pred)
    match_dr = dsc_np(true_dr, pred)
    match_dl = dsc_np(true_dl, pred)
    match_ur = dsc_np(true_ur, pred)
    match_ul = dsc_np(true_ul, pred)
    return max([match, match_l, match_r, match_u, match_d, match_dr, match_dl, match_ur, match_ul])
    #return sum([(i * l) for i, l in zip([match, match_l, match_r, match_u, match_d, match_dr, match_dl, match_ur, match_ul], weights)])
    #best_shift = max([match_l, match_r, match_u, match_d, match_dr, match_dl, match_ur, match_ul])
    #if match < (best_shift - 0.2):
    #    print("The shifted data performs better by {}".format(best_shift - match))
    #    return best_shift
    #else:
    #    return match
    
def multi_shift(true, pred, power):
    return bce_shift(true, pred, power) + lovasz_shift(true, pred, power)

In [None]:
img_1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]])
img_2 = np.array([[0, 0, 0], [1, 1, 0], [0, 0, 0]])
dsc_np(img_1, img_2)

# Model training

In [None]:
FRESH_START = False
FINE_TUNE = False
from tensorflow.python.keras.optimizers import SGD


BATCH_SIZE = 24
print("Starting model with: \n {} zone out \n {} l2 \n {} initial LR \n {} final LR \n {} parameters"
     .format(ZONE_OUT_PROB, L2_REG, INITIAL_LR, FINAL_LR, total_parameters))
best_val = 0.66
if not FRESH_START:
    print("Resuming training with a best validation score of {}".format(best_val))
if FRESH_START:
    optimizer = tf.train.GradientDescentOptimizer(1e-6)
    print("Restarting training from scratch on {} train and {} test samples, total {}".format(len(train_ids), len(test_ids), N_SAMPLES))
    #optimizer = AdaBoundOptimizer(learning_rate=INITIAL_LR/3,
    #                              final_lr=FINAL_LR/6,
    #                              beta1=0.9, beta2=0.999, 
    #                              amsbound=True)
    
    optimizer2 = AdaBoundOptimizer(learning_rate=INITIAL_LR/5,
                                  final_lr=FINAL_LR/5,
                                  beta1=0.9, beta2=0.999, 
                                  amsbound=True)
    
    loss = bce_shift(labels, fm, power)
    #loss = bce_dice_count(labels, fm)
    l2_loss = tf.losses.get_regularization_loss()
    loss += l2_loss
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss)    

    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    saver = tf.train.Saver(max_to_keep = 2)
    
if FINE_TUNE:
    loss_to_run = loss2
    op_to_run = tune_op
else:
    loss_to_run = loss
    op_to_run = train_op

# Run training loop
for i in range(17, 100):
    randomize = train_ids
    np.random.shuffle(randomize)
    test_randomize = test_ids
    np.random.shuffle(test_randomize)

    losses = []
    val_loss = []
    
    for k in tnrange(int(len(train_ids) // BATCH_SIZE)):
        batch_ids = randomize[k*BATCH_SIZE:(k+1)*BATCH_SIZE]
        batch_y = data_y[batch_ids, :, :].reshape(BATCH_SIZE, 14, 14)
        #if sum(sum(sum(batch_y))) > 0:
        op, tr = sess.run([op_to_run, loss_to_run],
                              feed_dict={inp: data_x[batch_ids, :, :, :],
                                         length: lengths[batch_ids],
                                         labels: data_y[batch_ids, :, :].reshape(BATCH_SIZE, 14, 14),
                                         is_training: True,
                                         power: 1 + (i*0.06),
                                         })
 
        #else:
        #    print("Skipping minibatch for equibatch reasons")
        losses.append(tr)
    for j in range(len(test_ids) // BATCH_SIZE):
        batch_ids = test_randomize[j*BATCH_SIZE:(j+1)*BATCH_SIZE]
        vl, y = sess.run([loss, fm], 
                         feed_dict={inp: data_x[batch_ids, :, :, :],
                                    length: lengths[batch_ids],
                                    labels: data_y[batch_ids, :, :].reshape(BATCH_SIZE, 14, 14),
                                    is_training: False,
                                    power: 1 + (i*0.06)
                                    })
        val_loss.append(vl)
        
    recalls = []
    precisions = []
    ious = []
    for m in test_ids:
        y = sess.run([fm], feed_dict={inp: data_x[m].reshape(1, 24, IMAGE_SIZE, IMAGE_SIZE, 15),
                                  length: lengths[m].reshape(1, 1),
                                  is_training: False,
                                  })[0]
        true = data_y[m].reshape((LABEL_SIZE, LABEL_SIZE))
        pred = y.reshape((12, 12))
        #TODO @jombrandt figure out difference between this in train and inference time
        #TODO @jombrandt convert to ROC-AUC instead of threshold F1
        pred[np.where(pred > 0.45)] = 1
        pred[np.where(pred < 0.45)] = 0
        shifts = get_shifts(true)
        f1s = []
        precs = []
        recs = []
        for s in shifts:
            rec, prec = thirty_meter(s, pred)
            rec = np.mean(rec)
            prec = np.mean(prec)
            f1_score = 2 * ((prec * rec) / (prec + rec))
            f1s.append(f1_score)
            precs.append(prec)
            recs.append(rec)
        rec = recs[np.argmax(f1s)]
        prec = precs[np.argmax(f1s)]
        recalls.append(rec)
        precisions.append(prec)
        iou = prec_shift(true, pred)
        ious.append(iou)
    precision = np.mean([x for x in precisions if not np.isnan(x)])
    recall = np.mean([x for x in recalls if not np.isnan(x)])
    iou = np.mean(ious)
    f1_score = 2 * ((precision * recall) / (precision + recall))
    save_path = saver.save(sess, "../models/dev/model")
    if np.mean(val_loss) < best_val:
        best_val = np.mean(val_loss)
        print("Saving model with {}".format(best_val))
        save_path = saver.save(sess, "../models/dev_best/model")
    print("Epoch {}: Loss {} Val: {} P {} R {} F1 {} iou {}".format(i + 1,
                                                             np.mean(losses), np.mean(val_loss),
                                                             precision, recall, f1_score, iou))

# Model validation and sanity checks

In [None]:
start = 0

In [None]:
#TODO @jombrandt 
#TODO @jombrandt -- remove augmentation of val set
import random 

def multiplot(matrices):
    '''Plot multiple heatmaps with subplots'''
    fig, axs = plt.subplots(ncols=4)
    fig.set_size_inches(20, 4)
    for i, matrix in enumerate(matrices):
        sns.heatmap(data = matrix, ax = axs[i], vmin = 0, vmax = 0.5)
        axs[i].set_xlabel("")
        axs[i].set_ylabel("")
        axs[i].set_yticks([])
        axs[i].set_xticks([])
    plt.show()
    
test_losses = []
#start = 28
start = start + 4
print(start/len(test_ids))
#matrix_ids = random.sample(train_ids, 4)
test_ids = sorted(test_ids)
#matrix_ids = [504, 976]
matrix_ids = [test_ids[start], test_ids[start + 1], test_ids[start + 2], test_ids[start + 3],]
#matrix_ids = random.sample(train_ids, 4)
#matrix = [matrix_ids[0], matrix_ids[0] + 4, matrix_ids[0] + 8, matrix_ids[0] + 12]
#matrix_ids = [988, 900, 2055, test]
# 63""
preds = []
trues = []
for i in matrix_ids:
    idx = i
    print(i)
    y = sess.run([fm], feed_dict={inp: data_x[idx].reshape(1, 24, IMAGE_SIZE, IMAGE_SIZE, 15),
                                  length: lengths[idx].reshape(1, 1),
                                  is_training: False,
                                  #labels: data_y[idx].reshape(1, 14, 14)
                                  })
    #print(idx, np.mean(lr))
    y = np.array(y).reshape(12, 12)
    y = np.pad(y, [[1, 1], [1, 1]], mode = "constant")#, constant_values = min([min(i) for i in y]))
    #y[np.where(y < 0.05)] = 0
    preds.append(y)
    true = data_y[idx].reshape(LABEL_SIZE, LABEL_SIZE)
    trues.append(true)

multiplot(preds)
#plot_ids[ordering[976]//4]

In [None]:
multiplot(trues) # 140, 160, 236, 296, 324, 416, 460, 504, 976

In [None]:
for j in range(4, 12):
    f1_all = []
    for i in test_ids:
        y = sess.run([fm], feed_dict={inp: data_x[i].reshape(1, 24, 16, 16, 13),
                                  length: lengths[i].reshape(1, 1),
                                  is_training: False,
                                  })[0]
        true = data_y[i].reshape((14, 14))
        pred = y.reshape((12, 12))
        #pred = pred[1:15, 1:15]
        #true = true[1:15, 1:15]
        pred[np.where(pred > j*0.05)] = 1
        pred[np.where(pred < j*0.05)] = 0
        shifts = get_shifts(true)
        f1s = []
        precs = []
        recs = []
        for i in shifts:
            rec, prec = thirty_meter(i, pred)
            rec = np.mean(rec)
            prec = np.mean(prec)
            f1_score = 2 * ((prec * rec) / (prec + rec))
            f1s.append(f1_score)
            precs.append(prec)
            recs.append(rec)
        rec = recs[np.argmax(f1s)]
        prec = precs[np.argmax(f1s)]
        f1s = max(f1s)
        f1_all.append(f1s)
        #recalls.append(rec)
        #precisions.append(prec)
    #recalls = [item for sublist in recalls for item in sublist]
    #precisions = [item for sublist in precisions for item in sublist]
    print(np.mean(f1_all))
    #print("{}: Recall: {}\t Precision: {}".format(j*0.05, np.mean(recalls), np.mean(precisions)))
    #TEST: 1161, 1076, 1267, 1187, 1197,  1109, 1235 TEAIN: 290, 184, 294, 890, 807
# 135224667

## TODO @jombrandt top 10 worst training, test samples by IOU 

These should be written to a tmp/ .txt file and indexed by validate-data.ipynb to ensure that original classifications were correct, and to identify regions that need more training data.