**Team: Spatial Extent**

Challenge 2

Notebook 2 

Tiling with

UNet-like model with residual connections

UNet classical model

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os
import tensorflow as tf
import numpy as np
import shutil

# Set the seed for random operations. 
# This let our experiments to be reproducible. 
SEED = 1234
tf.random.set_seed(SEED)  

In [None]:
cwd = os.getcwd()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/My\ Drive/Development_Dataset.zip

Archive:  /content/drive/My Drive/Development_Dataset.zip
  inflating: Development_Dataset/LICENSE.txt  
   creating: Development_Dataset/Test_Dev/
   creating: Development_Dataset/Test_Dev/Bipbip/
   creating: Development_Dataset/Test_Dev/Bipbip/Haricot/
   creating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/
  inflating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/Bipbip_haricot_im_00211.jpg  
  inflating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/Bipbip_haricot_im_00581.jpg  
  inflating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/Bipbip_haricot_im_00721.jpg  
  inflating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/Bipbip_haricot_im_00951.jpg  
  inflating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/Bipbip_haricot_im_01341.jpg  
  inflating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/Bipbip_haricot_im_02421.jpg  
  inflating: Development_Dataset/Test_Dev/Bipbip/Haricot/Images/Bipbip_haricot_im_02781.jpg  
  inflating: Developm

In [None]:
!unzip /content/drive/MyDrive/_Polimi/NN/Development_Dataset.zip

In [None]:
#!ls /content/Development_Dataset
dataset_dir = os.path.join(cwd, 'Development_Dataset')

# Prepare dataset

In [None]:
# ImageDataGenerator
# ------------------

from tensorflow.keras.preprocessing.image import ImageDataGenerator

apply_data_augmentation = True

# Create training ImageDataGenerator object
# We need two different generators for images and corresponding masks
if apply_data_augmentation:
    img_data_gen = ImageDataGenerator(rotation_range=10,
                                      width_shift_range=10,
                                      height_shift_range=10,
                                      zoom_range=0.3,
                                      horizontal_flip=True,
                                      vertical_flip=True,
                                      fill_mode='reflect',
                                      rescale= 1./255)
    mask_data_gen = ImageDataGenerator(rotation_range=10,
                                       width_shift_range=10,
                                       height_shift_range=10,
                                       zoom_range=0.3,
                                       horizontal_flip=True,
                                       vertical_flip=True,
                                       fill_mode='reflect',
                                       )

In [None]:
# custom function that needs to be used to convert images to arrays
def read_rgb_mask(img_path, resize_shape):
    '''
    img_path: path to the mask file
    Returns the numpy array containing target values
    '''

    mask_img = Image.open(img_path)
    mask_img = mask_img.resize(resize_shape, resample=Image.NEAREST)

    mask_arr = np.array(mask_img)
   
    new_mask_arr = np.zeros(mask_arr.shape[:2], dtype=mask_arr.dtype)
    
    # Use RGB dictionary in 'RGBtoTarget.txt' to convert RGB to target
    new_mask_arr[np.where(np.all(mask_arr == [254, 124, 18], axis=-1))] = 0
    new_mask_arr[np.where(np.all(mask_arr == [255, 255, 255], axis=-1))] = 1
    new_mask_arr[np.where(np.all(mask_arr == [216, 67, 82], axis=-1))] = 2

    return new_mask_arr

In [None]:
# split each image into tiles
from PIL import Image
import matplotlib.pyplot as plt

# this function takes an image information as input, divides it into tiles, defines the position of each tile with respect to 
# the original image generating a matrix. 
# If train_data_img_dir and train_data_mask_dir are specified, the generated tiles are saved into those folders. This case occurs when the dataset is originally generated for training purposes
# if train_data_img_dir and train_data_mask_dir are specified, images will not be saved and objects returned as output will be used. This last case occurs when test images are cropped for doing prediction  
def create_split_img(img_dir, img_filename, train_data_img_dir, train_data_mask_dir, tile_size, overlapping, SplitMask=True):
  img_path = os.path.join(img_dir, img_filename)
  print(img_path)
  img = Image.open(os.path.join(img_path))
  img_arr = np.array(img)
  img_h, img_w = img_arr.shape[0:2]

  if(SplitMask):
    mask_path = os.path.join(img_dir[0:-7], 'Masks', img_filename[:-3] + 'png')
    print(mask_path)
    mask = Image.open(os.path.join(mask_path))
    mask_arr = np.array(mask)

  # define the top-left index of each tile
  vertical_tiles = [0]
  horizontal_tiles = [0]
  pos = 0
  while (pos < img_arr.shape[0]):
    if (img_arr.shape[0] - pos >= tile_size):
      pos = pos + tile_size - overlapping
      vertical_tiles.append(pos) 
    else:
      vertical_tiles.pop()
      pos = img_arr.shape[0] - tile_size
      vertical_tiles.append(pos)
      pos = img_arr.shape[0]

  pos = 0
  while (pos < img_arr.shape[1]):
    if (img_arr.shape[1] - pos >= tile_size):
      pos = pos + tile_size - overlapping
      horizontal_tiles.append(pos) 
    else:
      horizontal_tiles.pop()
      pos = img_arr.shape[1] - tile_size
      horizontal_tiles.append(pos)
      pos = img_arr.shape[1] 

  #define the matrix with the top-left indexes of each tile
  tiles_topleft_index = np.zeros((len(vertical_tiles), len(horizontal_tiles)), dtype=tuple)
  for i in range(0, len(vertical_tiles)):
    for j in range(0, len(horizontal_tiles)):
      tiles_topleft_index[i][j] = (vertical_tiles[i], horizontal_tiles[j])

  tile_imgs = []
  tile_masks = []
  tile_positions = []
  for i in range(0, len(tiles_topleft_index)):
    for j in range(0, len(tiles_topleft_index[0])):

      start_row = tiles_topleft_index[i][j][0]
      start_col = tiles_topleft_index[i][j][1]
      tile_positions.append((start_row, start_col))
     # tile_pos = np.zeros((tile_size,tile_size), int)
     # for k in range(start_row, start_row + tile_size):
     #   tile_pos[k] = list(range(start_col , start_col + tile_size))
     # tile_positions.append(tile_pos)

      tile_img_arr = img_arr[start_row:start_row+tile_size, start_col:start_col+tile_size, :]
      tile_img_name = img_filename[:-4] + '_' + str(i) + '_' + str(j) + img_filename[-4:]
      tile_img_path = os.path.join(train_data_img_dir, tile_img_name)
      tile_img = Image.fromarray(tile_img_arr)
      tile_imgs.append(tile_img)

      if(SplitMask):
        tile_mask_arr = mask_arr[start_row:start_row+tile_size, start_col:start_col+tile_size, :]
        tile_mask_name = img_filename[:-4] + '_' + str(i) + '_' + str(j) + '.png'
        tile_mask_path = os.path.join(train_data_mask_dir, tile_mask_name)
        tile_mask = Image.fromarray(tile_mask_arr)
        tile_masks.append(tile_mask)

      if(train_data_img_dir != ''):
        tile_img.save(tile_img_path)
        if(SplitMask):
          tile_mask.save(tile_mask_path)   

  return tile_imgs, tile_masks, tile_positions

In [None]:
#used to eliminate some directories and files
if os.path.exists(os.path.join(dataset_dir, 'train_data_images')):
  shutil.rmtree(os.path.join(dataset_dir, 'train_data_images'))
  shutil.rmtree(os.path.join(dataset_dir, 'train_data_masks'))
if os.path.exists(os.path.join(dataset_dir, 'train.txt')):
  os.remove(os.path.join(dataset_dir, 'train.txt'))
  os.remove(os.path.join(dataset_dir, 'valid.txt'))

In [None]:
# this function defines the generation of the dataset
# if Splitted is True, images are generated using the create_split_img function  
# otherwise they are simply copied into a specified path
def generate_folder_dataset(teams, tile_size, overlapping, Haricot=False, Mais=False, Splitted=True):
  train_data_images_dir = os.path.join(dataset_dir, 'train_data_images')
  train_data_masks_dir = os.path.join(dataset_dir, 'train_data_masks')
  training_dataset_dir = os.path.join(dataset_dir, 'Training')
  if not os.path.exists(train_data_images_dir):
    os.makedirs(train_data_images_dir)
    os.makedirs(train_data_masks_dir)
  if Splitted:
    for team in teams:
      if Haricot:
        images_dir = os.path.join(training_dataset_dir, team, 'Haricot', 'Images')
        for img_name in os.listdir(images_dir):
          create_split_img(images_dir, img_name, train_data_images_dir, train_data_masks_dir, tile_size, overlapping)        
      if Mais:
        images_dir = os.path.join(training_dataset_dir, team, 'Mais', 'Images')
        for img_name in os.listdir(images_dir):
          create_split_img(images_dir, img_name, train_data_images_dir, train_data_masks_dir, tile_size, overlapping) 
  else:
    for team in teams:
      if Haricot:
        images_dir = os.path.join(training_dataset_dir, team, 'Haricot', 'Images')
        masks_dir = os.path.join(training_dataset_dir, team, 'Haricot', 'Masks')
        for img_name in os.listdir(images_dir):
          oldfilename = os.path.join(images_dir, img_name)
          newfilename = os.path.join(train_data_images_dir, img_name)
          shutil.copyfile(oldfilename, newfilename)
        for mask_name in os.listdir(masks_dir):
          oldfilename = os.path.join(masks_dir, mask_name)
          newfilename = os.path.join(train_data_masks_dir, mask_name)
          shutil.copyfile(oldfilename, newfilename)
      if Mais:
        images_dir = os.path.join(training_dataset_dir, team, 'Mais', 'Images')
        masks_dir = os.path.join(training_dataset_dir, team, 'Mais', 'Masks')
        for img_name in os.listdir(images_dir):
          oldfilename = os.path.join(images_dir, img_name)
          newfilename = os.path.join(train_data_images_dir, img_name)
          shutil.copyfile(oldfilename, newfilename)
        for mask_name in os.listdir(masks_dir):
          oldfilename = os.path.join(masks_dir, mask_name)
          newfilename = os.path.join(train_data_masks_dir, mask_name)
          shutil.copyfile(oldfilename, newfilename)

In [None]:
#generate dataset to be trained
teams = ['Bipbip','Roseau','Weedelec','Pead']
tile_size = 224
overlapping = 20
Haricot = True
Mais = True
generate_folder_dataset(teams, tile_size, overlapping, Haricot=Haricot, Mais=Mais)

#check the consistency of the generate_folder_dataset function
a = len(os.listdir(os.path.join(dataset_dir, 'train_data_images')))
b = len(os.listdir(os.path.join(dataset_dir, 'train_data_masks')))
if (a == b):
  print('Images and Masks are ' + str(a))
else:
  print('An error occured')
for team in teams:
  if Haricot:
    length = len(os.listdir(os.path.join(dataset_dir, 'Training', team, 'Haricot', 'Images')))
    print(str(team) + ' Haricot has ' + str(length) + ' elements')
  if Mais:
    length = len(os.listdir(os.path.join(dataset_dir, 'Training', team, 'Mais', 'Images')))
    print(str(team) + ' Mais has ' + str(length) + 'elements')

In [None]:
# further check on the generated dataset
train_dir = '/content/Development_Dataset/train_data_images'
mask_dir = '/content/Development_Dataset/train_data_masks'
num_train_elems = len(os.listdir(train_dir))
num_mask_elems = len(os.listdir(mask_dir))
if num_train_elems == num_mask_elems:
  print('Each directory has ' + str(num_train_elems))

## Definition of the split files dividing training images from validation images 

In [None]:
import math 

def create_splitfiles(dataset_dir, val_split):
  train = 'train.txt'
  valid = 'valid.txt'
  t = open(os.path.join(dataset_dir, train), 'w')
  v = open(os.path.join(dataset_dir, valid), 'w')
  images_dir = os.path.join(dataset_dir, 'train_data_images')
  images = os.listdir(images_dir)
  tf.random.shuffle(images)
  num_elems = len(images)
  train_elems = math.ceil(num_elems * (1 - val_split))
  valid_elems = math.floor(num_elems * val_split)
  count = 1
  print(num_elems)
  for filename in images:
    if (count <= train_elems):
      t.write(filename[:-4] + '\n')
    else:
      v.write(filename[:-4] + '\n')
    count +=1
  print('Original number of elements is ' + str(num_elems) + '\nElements in train and valid files are ' + str(train_elems) + ' and ' + str(valid_elems))

In [None]:
val_split = 0.1
create_splitfiles(dataset_dir, val_split)

## Definition of training and validation dataset 

In [None]:
from PIL import Image


class CustomDataset(tf.keras.utils.Sequence):

  """
    CustomDataset inheriting from tf.keras.utils.Sequence.

    3 main methods:
      - __init__: save dataset params like directory, filenames..
      - __len__: return the total number of samples in the dataset
      - __getitem__: return a sample from the dataset

    Note: 
      - the custom dataset return a single sample from the dataset. Then, we use 
        a tf.data.Dataset object to group samples into batches.
      - in this case we have a different structure of the dataset in memory. 
        We have all the images in the same folder and the training and validation splits
        are defined in text files.

  """

  def __init__(self, dataset_dir, which_subset, img_generator=None, mask_generator=None, 
               preprocessing_function=None, out_shape=None):
    if which_subset == 'training':
      subset_file = os.path.join(dataset_dir, 'train.txt')
    elif which_subset == 'validation':
      subset_file = os.path.join(dataset_dir, 'valid.txt')
    
    with open(subset_file, 'r') as f:
      lines = f.readlines()
    
    subset_filenames = []
    for line in lines:
      subset_filenames.append(line.strip()) 

    self.which_subset = which_subset
    self.dataset_dir = dataset_dir
    self.subset_filenames = subset_filenames
    self.img_generator = img_generator
    self.mask_generator = mask_generator
    self.preprocessing_function = preprocessing_function
    self.out_shape = out_shape

  def __len__(self):
    return len(self.subset_filenames)

  def __getitem__(self, index):
    # Read Image
    curr_filename = self.subset_filenames[index]
    if curr_filename[:6] == "Roseau":
      img = Image.open(os.path.join(self.dataset_dir, 'train_data_images', curr_filename + '.png'))
    else:
      img = Image.open(os.path.join(self.dataset_dir, 'train_data_images', curr_filename + '.jpg'))

    #mask = Image.open(os.path.join(self.dataset_dir, 'Annotations', curr_filename + '.png'))
    mask_arr = read_rgb_mask(os.path.join(self.dataset_dir, 'train_data_masks', curr_filename + '.png'), self.out_shape)

    # Resize image and mask
    img = img.resize(self.out_shape)
    img_arr = np.array(img)

    mask_arr = np.expand_dims(mask_arr, -1)

    if self.which_subset == 'training':
      if self.img_generator is not None and self.mask_generator is not None:
        # Perform data augmentation
        # We can get a random transformation from the ImageDataGenerator using get_random_transform
        # and we can apply it to the image using apply_transform
        img_t = self.img_generator.get_random_transform(img_arr.shape, seed=SEED)
        mask_t = self.mask_generator.get_random_transform(mask_arr.shape, seed=SEED)
        img_arr = self.img_generator.apply_transform(img_arr, img_t)
        # ImageDataGenerator use bilinear interpolation for augmenting the images.
        # Thus, when applied to the masks it will output 'interpolated classes', which
        # is an unwanted behaviour. As a trick, we can transform each class mask 
        # separately and then we can cast to integer values (as in the binary segmentation notebook).
        # Finally, we merge the augmented binary masks to obtain the final segmentation mask.
        out_mask = np.zeros_like(mask_arr)
        for c in np.unique(mask_arr):
          if c > 0:
            curr_class_arr = np.float32(mask_arr == c)
            curr_class_arr = self.mask_generator.apply_transform(curr_class_arr, mask_t)
            # from [0, 1] to {0, 1}
            curr_class_arr = np.uint8(curr_class_arr)
            # recover original class
            curr_class_arr = curr_class_arr * c 
            out_mask += curr_class_arr
    else:
      out_mask = mask_arr
    
    if self.preprocessing_function is not None:
        img_arr = self.preprocessing_function(img_arr)
    #else:
    #    img_arr = img_arr / 255.0 #for unclear reasons this didn't help

    return img_arr, np.float32(out_mask)

In [None]:
# define the shape of the images that will be fed to the model
img_h = 224
img_w = 224
out_shape=[img_w, img_h]

print(img_h)
print(img_w)
print(out_shape)

dataset = CustomDataset(dataset_dir, 'training', 
                        img_generator=img_data_gen, mask_generator=mask_data_gen, out_shape=out_shape)
dataset_valid = CustomDataset(dataset_dir, 'validation',
                        out_shape=out_shape)

In [None]:
train_dataset = tf.data.Dataset.from_generator(lambda: dataset,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, None, 3], [None, None, 1]))
bs=32
train_dataset = train_dataset.batch(32)

train_dataset = train_dataset.repeat()

valid_dataset = tf.data.Dataset.from_generator(lambda: dataset_valid,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, None, 3], [None, None, 1]))
valid_dataset = valid_dataset.batch(32)

valid_dataset = valid_dataset.repeat()

# Definition of the U-net model

In [None]:
# U-net model with residual connections
from tensorflow.keras import layers 

def get_model(img_size, num_classes):
    inputs = tf.keras.Input(shape=img_size + (3,))

    ### [First half of the network: downsampling inputs] ###

    # Entry block
    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    # Blocks 1, 2, 3 are identical apart from the feature depth.
    for filters in [64, 128, 256]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    ### [Second half of the network: upsampling inputs] ###

    for filters in [256, 128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.UpSampling2D(2)(x)

        # Project residual
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Add a per-pixel classification layer
    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

    # Define the model
    model = tf.keras.Model(inputs, outputs)
    return model


# Free up RAM in case the model definition cells were run multiple times
tf.keras.backend.clear_session()

img_size=(224,224)
num_classes=3

Classic U-Net:

In [None]:
from keras.layers import *
from keras.models import *
def create_model_unet(num_classes):
    inputs = Input((None,None,3))
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
    conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
    conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
    drop5 = Dropout(0.2)(conv5)

    up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5))
    merge6 = concatenate([drop4,up6], axis = 3)
    conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
    conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)
    #drop6 = Dropout(0.2)(conv6)

    up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
    merge7 = concatenate([conv3,up7], axis = 3)
    conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
    conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)
    #drop7 = Dropout(0.2)(conv7)

    up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
    merge8 = concatenate([conv2,up8], axis = 3)
    conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
    conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)
    #drop8 = Dropout(0.2)(conv8)

    up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
    merge9 = concatenate([conv1,up9], axis = 3)
    conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
    conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
    conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
    conv10 = Conv2D(filters=num_classes, kernel_size=(1, 1),strides=(1, 1),padding='same', activation='softmax')(conv9)

    model = Model(inputs = inputs, outputs = conv10)   
    

    return model

In [None]:
# U-net
model = get_model(img_size,num_classes)
#model = create_model_unet(3)

model.summary()

# Visualize initialized weights
# model.weights

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 112, 112, 32) 896         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 112, 112, 32) 128         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 112, 112, 32) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

## Prepare the model for training

In [None]:
# Optimization params
# -------------------

# Loss
# Sparse Categorical Crossentropy to use integers (mask) instead of one-hot encoded labels
loss = tf.keras.losses.SparseCategoricalCrossentropy() 
# learning rate
lr = 1e-5
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Here we define the intersection over union for each class in the batch.
# Then we compute the final iou as the mean over classes
def meanIoU(y_true, y_pred):
    # get predicted class from softmax
    y_pred = tf.expand_dims(tf.argmax(y_pred, -1), -1)

    per_class_iou = []

    for i in range(1,3): # exclude the background class 0
      # Get prediction and target related to only a single class (i)
      class_pred = tf.cast(tf.where(y_pred == i, 1, 0), tf.float32)
      class_true = tf.cast(tf.where(y_true == i, 1, 0), tf.float32)
      intersection = tf.reduce_sum(class_true * class_pred)
      union = tf.reduce_sum(class_true) + tf.reduce_sum(class_pred) - intersection
    
      iou = (intersection + 1e-7) / (union + 1e-7)
      per_class_iou.append(iou)

    return tf.reduce_mean(per_class_iou)

# Validation metrics
# ------------------
metrics = ['accuracy', meanIoU]
# ------------------

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## Training with callbacks

In [None]:
dataset_description = ''
for team in teams:
  dataset_description += str(team) + '.' 
if Haricot:
  dataset_description += 'Haricot'
if Mais:
  dataset_description += 'Mais'
print(dataset_description)

Bipbip.Roseau.Weedelec.Pead.HaricotMais


In [None]:
import os
from datetime import datetime

exps_dir = os.path.join(cwd, 'drive/My Drive/Keras4/', dataset_description)
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

model_name = 'CNN'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
# ----------------
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp_{epoch:02d}.ckpt'), 
                                                   save_weights_only=True,
                                                   verbose=1
                                                   )  # False to save the model directly
callbacks.append(ckpt_callback)

# Visualize Learning on Tensorboard
# ---------------------------------
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)
    
# By default shows losses and metrics for both training and validation
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=0)  # if 1 shows weights histograms
callbacks.append(tb_callback)

# Early Stopping
# --------------
early_stop = False
if early_stop:
    es_callback = tf.keras.callback.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)

In [None]:
# U-net model weights
model.load_weights('/content/drive/My Drive/Keras4/Bipbip.Roseau.Weedelec.Pead.HaricotMais/CNN_Dec15_08-49-32/ckpts/cp_01.ckpt')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f56cc073b38>

In [None]:
# Classic unet weights
model.load_weights('/content/drive/My Drive/_Polimi/NN/CHALLENGE2/AlessioUnet_Dec27_19-53-48/ckpts/cp_06.ckpt')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f1f9011af60>

Best weights at https://drive.google.com/drive/folders/11GODGy9Sn5dKVAIgfE2mg-4war4sDClm?usp=sharing

In [None]:
histCallbacks = model.fit(x=train_dataset,
                         epochs=4,  #### set repeat in training dataset
                         steps_per_epoch=len(dataset)//bs,
                         validation_data=valid_dataset,
                         validation_steps=len(dataset_valid)//bs, 
                         callbacks=callbacks)

# Definition of the functions used to generate the prediction of the test images

In [None]:
# this function takes each tile of the test image and feed each of them to the model
# it ouptuts a list of predicted masks 
def predict_tiles(img_tiles):
  predicted_tiles = []
  for i in range(len(img_tiles)):
    #image_size = image.shape
    img_array = np.array(img_tiles[i])
    img_array = np.expand_dims(img_array, 0)
    #img_array = preprocess_input(img_array)
    out_sigmoid = model.predict(x=img_array)

    predicted_class = tf.argmax(out_sigmoid, -1)
    print(np.unique(predicted_class))

    predicted_class = predicted_class[0, ...]
    predicted_class = predicted_class.numpy()

    predicted_tiles.append(predicted_class)

  return predicted_tiles

In [None]:
# code for generating the entire mask, version 1 (second version is at the end of the notebook)
def combine_tiles_for_prediction(predicted_tiles, mask_tiles_pos, img_h, img_w, tile_size):
  # third dimension of predicted_mask counts how many tiles define that specific pixel as belonging to one of the three classes
  # e.g. predicted_mask[i][j][0] contains the count of how many tiles containing pixel at pos [i][j] predicted this pixel as belonging to class 0
  predicted_mask = np.zeros((img_h, img_w, 3), dtype=int) 
  for index in range(len(predicted_tiles)):
    start_row = mask_tiles_pos[index][0]
    start_col = mask_tiles_pos[index][1]
    mask_tile = predicted_tiles[index]
    for i in range(tile_size):
      for j in range(tile_size):
        pred = mask_tile[i][j]
        predicted_mask[i+start_row][j+start_col][pred] +=1
  predicted_mask = tf.argmax(predicted_mask, -1)
  predicted_mask = predicted_mask.numpy()
  return predicted_mask

In [None]:
import json

def rle_encode(img):
    '''
    img: numpy array, 1 - foreground, 0 - background
    Returns run length as string formatted
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
submission_dict = {}
tile_size = 224

In [None]:
from skimage import transform

def prediction_for_team_crop(team,crop):
  folder = os.path.join('Development_Dataset/Test_Dev',team,crop,'Images')
  image_filenames = next(os.walk(folder))[2]

  for image_name in image_filenames:

    img = Image.open(os.path.join(folder,image_name)).convert('RGB')
    img_arr = np.array(img)
    img_h, img_w = img_arr.shape[:2]

    # first define the tiles for each test image
    img_tiles, mask_tiles, mask_tiles_pos = create_split_img(folder, image_name, '', '', 224, 20, SplitMask=False)

    # get the prediction for each tile
    predicted_tiles = predict_tiles(img_tiles)

    # combine the predictions of each tile obtaining the final mask with the same dimension of the original test image 
    predicted_mask_arr = combine_tiles_for_prediction(predicted_tiles, mask_tiles_pos, img_h, img_w, tile_size)
    print(predicted_mask_arr.shape)


    image_name = image_name[:-4]
    submission_dict[image_name] = {}
    submission_dict[image_name]['shape'] = predicted_mask_arr.shape
    submission_dict[image_name]['team'] = team
    submission_dict[image_name]['crop'] = crop
    submission_dict[image_name]['segmentation'] = {}

    print(predicted_mask_arr.shape)
    print(np.unique(predicted_mask_arr))
    # RLE encoding
    # crop
    rle_encoded_crop = rle_encode(predicted_mask_arr == 1)
    # weed
    rle_encoded_weed = rle_encode(predicted_mask_arr == 2)

    submission_dict[image_name]['segmentation']['crop'] = rle_encoded_crop
    submission_dict[image_name]['segmentation']['weed'] = rle_encoded_weed

In [None]:
prediction_for_team_crop('Bipbip','Haricot')
prediction_for_team_crop('Bipbip','Mais')
prediction_for_team_crop('Pead','Haricot')
prediction_for_team_crop('Pead','Mais')
prediction_for_team_crop('Roseau','Haricot')
prediction_for_team_crop('Roseau','Mais')
prediction_for_team_crop('Weedelec','Haricot')
prediction_for_team_crop('Weedelec','Mais')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 1]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
(3456, 5184)
(3456, 5184)
[0 1 2]
Development_Dataset/Test_Dev/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121851-310.jpg
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0 1 2]
[0 1 2]
[0 1 2]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0]
[0]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 1 2]
[0 1 2]
[0 1 2]
[0 1 2]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 1 2]
[0 1 2]
[0 1 2]
[0 2]
[0 1 2]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 1]
[0 1 2]

In [None]:
# Finally, save the results into the submission.json file
with open('/content/submission.json', 'w') as f:
    json.dump(submission_dict, f)

In [None]:
# code for generating the entire mask (second version, slower than the first one) 
img_dir = '/content/Development_Dataset/Training/Bipbip/Haricot/Images'
img_name = 'Bipbip_haricot_im_00321.jpg'
img = Image.open('/content/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_00321.jpg')
img_arr = np.array(img)
print(img_arr.shape)

img_h = img_arr.shape[0]
img_w = img_arr.shape[1]
predicted_mask = np.zeros((img_h, img_w), dtype=int)
for pixel_h in range(img_h):
  for pixel_w in range(img_w):
    tiles_sharing_pixel = []
    pixel_predicted_values = []
    class_count = {2:0, 1:0, 0:0}  #this order is chosen to break ties: the key is the class num and the value is number of tiles predict the pixel as belonging to that class 
    for index in range(len(img_tiles)):
      if (pixel_h >= indexes_dict['top'][index] and pixel_h < indexes_dict['bottom'][index] and pixel_w >= indexes_dict['left'][index] and pixel_w < indexes_dict['right'][index]):
        tiles_sharing_pixel.append((index, predicted_tiles[index]))
    for elem in tiles_sharing_pixel:
      tile_pixel_h = pixel_h - indexes_dict['top'][elem[0]]
      tile_pixel_w = pixel_w - indexes_dict['left'][elem[0]]
      pixel_predicted_values.append(elem[1][tile_pixel_h][tile_pixel_w])
    for i in range(len(pixel_predicted_values)):
      print(class_count[0])
      class_count[pixel_predicted_values[i]] += 1
    max = 0
    predicted_class = 0
    for class_num in class_count.keys():
      if(max < class_count[class_num]):
        max = class_count[class_num]
        predicted_class = class_num
    predicted_mask[pixel_h][pixel_w] = predicted_class