# UAVid Drone Dataset


UAVid Dataset
The The UAVid dataset is an UAV video dataset for semantic segmentation task focusing on urban scenes. It has several features:

Semantic segmentation 4K resolution UAV videos 8 object categories Street scene context.

The segmentation categories need to be converted to the format of the Semantic Drone Dataset, as seen below

## Semantic Annotation

The images are labeled densely using polygons and contain the following 24 classes: 
  - unlabeled
  - paved-area
  - dirt
  - grass
  - gravel
  - water
  - rocks
  - pool
  - vegetation
  - roof
  - wall
  - window
  - door
  - fence
  - fence-pole
  - person
  - dog
  - car
  - bicycle
  - tree
  - bald-tree
  - ar-marker
  - obstacle
  - conflicting

In [None]:
import cv2
import random
import albumentations as A
from matplotlib import pyplot as plt
import numpy as np
import os

#if running locally
drive_root = "." 

# Data Augmentation using Albumentations Library - Assumed Complete

Perfromed as described in README.md

# Working with Augmented Dataset

In [None]:
import keract
import pickle
import numpy as np
import pandas as pd
from PIL import Image
from IPython.display import SVG
import matplotlib.pyplot as plt
%matplotlib inline
import os, re, sys, random, shutil, cv2

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import applications, optimizers
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.utils import model_to_dot, plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger, LearningRateScheduler, TensorBoard
from tensorflow.keras.layers import Input, Lambda, Activation, Conv2D, MaxPooling2D, BatchNormalization, Add, concatenate, Conv2DTranspose

In [None]:
dataset = 'semantic-drone' # 'uavid' or 'semantic-drone'
train_images = drive_root + f'/augmented-{dataset}-dataset/train_images/'
train_masks = drive_root + f'/augmented-{dataset}-dataset/train_masks/'
val_images = drive_root + f'/augmented-{dataset}-dataset/val_images/'
val_masks = drive_root + f'/augmented-{dataset}-dataset/val_masks/'

## Labels for Augmented Images

In [None]:
class_dict_df = pd.read_csv(drive_root + '/augmented-semantic-drone-dataset/class_dict.csv', index_col=False, skipinitialspace=True)
# class_dict_df

label_names= list(class_dict_df.name)
label_codes = []
r= np.asarray(class_dict_df.r)
g= np.asarray(class_dict_df.g)
b= np.asarray(class_dict_df.b)

for i in range(len(class_dict_df)):
    label_codes.append(tuple([r[i], g[i], b[i]]))
    
# label_codes, label_names
# class_dict_df

## Create Useful Label & Code Conversion Dictionaries
These will be used for:

* One hot encoding the mask labels for model training
* Decoding the predicted labels for interpretation and visualization 

In [None]:
code2id = {v:k for k,v in enumerate(label_codes)}
id2code = {k:v for k,v in enumerate(label_codes)}

name2id = {v:k for k,v in enumerate(label_names)}
id2name = {k:v for k,v in enumerate(label_names)}

## Define Functions for One Hot Encoding RGB Labels & Decoding Encoded Predictions


In [None]:
def rgb_to_onehot(rgb_image, colormap = id2code):
    '''Function to one hot encode RGB mask labels
        Inputs: 
            rgb_image - image matrix (eg. 256 x 256 x 3 dimension numpy ndarray)
            colormap - dictionary of color to label id
        Output: One hot encoded image of dimensions (height x width x num_classes) where num_classes = len(colormap)
    '''
    num_classes = len(colormap)
    shape = rgb_image.shape[:2]+(num_classes,)
    encoded_image = np.zeros( shape, dtype=np.int8 )
    for i, cls in enumerate(colormap):
        encoded_image[:,:,i] = np.all(rgb_image.reshape( (-1,3) ) == colormap[i], axis=1).reshape(shape[:2])
    return encoded_image


def onehot_to_rgb(onehot, colormap = id2code):
    '''Function to decode encoded mask labels
        Inputs: 
            onehot - one hot encoded image matrix (height x width x num_classes)
            colormap - dictionary of color to label id
        Output: Decoded RGB image (height x width x 3) 
    '''
    single_layer = np.argmax(onehot, axis=-1)
    output = np.zeros( onehot.shape[:2]+(3,) )
    for k in colormap.keys():
        output[single_layer==k] = colormap[k]
    return np.uint8(output)

# Creating Custom Image Data Generators
## Defining Data Generators


In [None]:
# Normalizing only frame images, since masks contain label info
data_gen_args = dict(rescale=1./255)
mask_gen_args = dict()

train_frames_datagen = ImageDataGenerator(**data_gen_args)
train_masks_datagen = ImageDataGenerator(**mask_gen_args)
val_frames_datagen = ImageDataGenerator(**data_gen_args)
val_masks_datagen = ImageDataGenerator(**mask_gen_args)

# Seed defined for aligning images and their masks
seed = 1

# Custom Image Data Generators for Creating Batches of Frames and Masks

In [None]:
def TrainAugmentGenerator(train_images_dir, train_masks_dir, seed = 1, batch_size = 8, target_size = (512, 512)):
    '''Train Image data generator
        Inputs: 
            seed - seed provided to the flow_from_directory function to ensure aligned data flow
            batch_size - number of images to import at a time
            train_images_dir - train images directory
            train_masks_dir - train masks directory
            target_size - tuple of integers (height, width)
            
        Output: Decoded RGB image (height x width x 3) 
    '''
    train_image_generator = train_frames_datagen.flow_from_directory(
    train_images_dir,
    batch_size = batch_size, 
    seed = seed, 
    target_size = target_size)

    train_mask_generator = train_masks_datagen.flow_from_directory(
    train_masks_dir,
    batch_size = batch_size, 
    seed = seed, 
    target_size = target_size)

    while True:
        X1i = train_image_generator.next()
        X2i = train_mask_generator.next()
        
        #One hot encoding RGB images
        mask_encoded = [rgb_to_onehot(X2i[0][x,:,:,:], id2code) for x in range(X2i[0].shape[0])]
        
        yield X1i[0], np.asarray(mask_encoded)

def ValAugmentGenerator(val_images_dir, val_masks_dir, seed = 1, batch_size = 8, target_size = (512, 512)):
    '''Validation Image data generator
        Inputs: 
            seed - seed provided to the flow_from_directory function to ensure aligned data flow
            batch_size - number of images to import at a time
            val_images_dir - validation images directory
            val_masks_dir - validation masks directory
            target_size - tuple of integers (height, width)
            
        Output: Decoded RGB image (height x width x 3) 
    '''
    val_image_generator = val_frames_datagen.flow_from_directory(
    val_images_dir,
    batch_size = batch_size, 
    seed = seed, 
    target_size = target_size)


    val_mask_generator = val_masks_datagen.flow_from_directory(
    val_masks_dir,
    batch_size = batch_size, 
    seed = seed, 
    target_size = target_size)


    while True:
        X1i = val_image_generator.next()
        X2i = val_mask_generator.next()
        
        #One hot encoding RGB images
        mask_encoded = [rgb_to_onehot(X2i[0][x,:,:,:], id2code) for x in range(X2i[0].shape[0])]
        
        yield X1i[0], np.asarray(mask_encoded)


# Function to Create U-Net Model Using VGG-16 Pre-Trained Weights


In [None]:
# !mkdir pretrained_weights

In [None]:
#if not downloaded already
# !wget -O pretrained_weights/vgg16_weights_tf_dim_ordering_tf_kernels.h5  https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5

In [None]:
#training parameters
batch_size = 32 #reduce this if you run out of memory
num_train_samples = len(np.sort(os.listdir(train_images+'train')))
num_val_samples = len(np.sort(os.listdir(val_images+'val')))
steps_per_epoch = np.ceil(float(num_train_samples) / float(batch_size))
print('steps_per_epoch: ', steps_per_epoch)
validation_steps = np.ceil(float(num_val_samples) / float(batch_size))
print('validation_steps: ', validation_steps)

## Inference Starting point

In [None]:
#Create the model
def dice_coef(y_true, y_pred):
    return (2. * K.sum(y_true * y_pred) + 1.) / (K.sum(y_true) + K.sum(y_pred) + 1.)

def unet(num_classes, input_shape, lr_init, vgg_weight_path=None):
    img_input = Input(input_shape)

    # Block 1
    x = Conv2D(64, (3, 3), padding='same', name='block1_conv1')(img_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(64, (3, 3), padding='same', name='block1_conv2')(x)
    x = BatchNormalization()(x)
    block_1_out = Activation('relu')(x)

    x = MaxPooling2D()(block_1_out)

    # Block 2
    x = Conv2D(128, (3, 3), padding='same', name='block2_conv1')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(128, (3, 3), padding='same', name='block2_conv2')(x)
    x = BatchNormalization()(x)
    block_2_out = Activation('relu')(x)

    x = MaxPooling2D()(block_2_out)

    # Block 3
    x = Conv2D(256, (3, 3), padding='same', name='block3_conv1')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(256, (3, 3), padding='same', name='block3_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(256, (3, 3), padding='same', name='block3_conv3')(x)
    x = BatchNormalization()(x)
    block_3_out = Activation('relu')(x)

    x = MaxPooling2D()(block_3_out)

    # Block 4
    x = Conv2D(512, (3, 3), padding='same', name='block4_conv1')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block4_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block4_conv3')(x)
    x = BatchNormalization()(x)
    block_4_out = Activation('relu')(x)

    x = MaxPooling2D()(block_4_out)

    # Block 5
    x = Conv2D(512, (3, 3), padding='same', name='block5_conv1')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block5_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block5_conv3')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    for_pretrained_weight = MaxPooling2D()(x)

    # Load pretrained weights.
    if vgg_weight_path is not None:
        vgg16 = Model(img_input, for_pretrained_weight)
        vgg16.load_weights(vgg_weight_path, by_name=True)
    
    # Make the layers to be loaded with vgg16 non-trainable.
    for layer in vgg16.layers:
        layer.trainable = False

    # UP 1
    x = Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = concatenate([x, block_4_out])
    x = Conv2D(512, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # UP 2
    x = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = concatenate([x, block_3_out])
    x = Conv2D(256, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(256, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # UP 3
    x = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = concatenate([x, block_2_out])
    x = Conv2D(128, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(128, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # UP 4
    x = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = concatenate([x, block_1_out])
    x = Conv2D(64, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(64, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # last conv
    x = Conv2D(num_classes, (3, 3), activation='softmax', padding='same')(x)

    model = Model(img_input, x)
    model.compile(Adam(learning_rate=lr_init),
                  loss='categorical_crossentropy',
                  metrics=[dice_coef])
    return model

In [None]:
#Load VGG16 pretrained weights
# shape_size = (496,496) #resolution of images after compression, 512,512 is too large for colab

# For presereving aspect ratio of 2:3, and divisible by 16, we pick multiples of the LCM of 16 and 3
scale = 12 #change me for memory constraints, 12 for 384x576
width = 48*scale #must be divisible by 16
height = width*2//3
shape_size = (height, width)
print("shape_size: ", shape_size)
vgg16_unet = unet(num_classes = 24, input_shape = shape_size+(3,), lr_init = 0.0001, vgg_weight_path='./pretrained_weights/vgg16_weights_tf_dim_ordering_tf_kernels.h5')


In [None]:
vgg16_unet.summary(line_length=125, expand_nested=True, show_trainable=True)

In [None]:
#callback functions for training
def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 **(epoch / s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(0.0001, 20)

lr_scheduler = LearningRateScheduler(
    exponential_decay_fn,
    verbose=1
)

checkpoint = ModelCheckpoint(
    filepath = 'vgg16_unet_model_sem_drone.h5',
    save_best_only = True, 
#     save_weights_only = False,
    monitor = 'val_loss', 
    mode = 'auto', 
    verbose = 1
)

earlystop = EarlyStopping(
    monitor = 'val_loss', 
    min_delta = 0.001, 
    patience = 6, 
    mode = 'auto', 
    verbose = 1,
    restore_best_weights = True
)


csvlogger = CSVLogger(
    filename= "model_training_csv.log",
    separator = ",",
    append = False
)

callbacks = [checkpoint, earlystop, csvlogger, lr_scheduler]

In [None]:
#check if tf can see gpu
print(tf.config.list_physical_devices())


In [None]:
#train model
history = vgg16_unet.fit(
    TrainAugmentGenerator(train_images_dir = train_images, train_masks_dir = train_masks, target_size = shape_size), 
    steps_per_epoch=steps_per_epoch,
    validation_data = ValAugmentGenerator(val_images_dir = val_images, val_masks_dir = val_masks, target_size = shape_size), 
    validation_steps = validation_steps, 
    epochs = 100,
    callbacks=callbacks,
    use_multiprocessing=False,
    verbose=1
)

In [None]:
with open('./trainHistoryDict', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(30, 5))
ax = ax.ravel()
metrics = ['Dice Coefficient', 'Loss', 'Learning Rate']

for i, met in enumerate(['dice_coef', 'loss', 'lr']): 
    if met != 'lr':
        ax[i].plot(history.history[met], 'o-')
        ax[i].plot(history.history['val_' + met], 'o-')
        ax[i].set_title('{} vs Epochs'.format(metrics[i]), fontsize=16)
        ax[i].set_xlabel('Epochs')
        ax[i].set_ylabel(metrics[i])
        ax[i].set_xticks(np.arange(0,21,2))
        ax[i].legend(['Train', 'Validation'])
        ax[i].xaxis.grid(True, color = "lightgray", linewidth = "0.8", linestyle = "-")
        ax[i].yaxis.grid(True, color = "lightgray", linewidth = "0.8", linestyle = "-")
    else:
        ax[i].plot(history.history[met], 'o-')
        ax[i].set_title('{} vs Epochs'.format(metrics[i]), fontsize=16)
        ax[i].set_xlabel('Epochs')
        ax[i].set_ylabel(metrics[i])
        ax[i].set_xticks(np.arange(0,21,2))
        ax[i].xaxis.grid(True, color = "lightgray", linewidth = "0.8", linestyle = "-")
        ax[i].yaxis.grid(True, color = "lightgray", linewidth = "0.8", linestyle = "-")
        
plt.savefig('model_metrics_plot.png', facecolor= 'w',transparent= False, bbox_inches= 'tight', dpi= 150)

In [None]:
vgg16_unet.load_weights("./vgg16_unet_model_sem_drone.h5")

In [None]:
demo_dir = '/augmented-semantic-drone-dataset/demo_img/demo/'
demo_img_names = ["demo_img.jpg", "demo_img2.jpg", "demo_img3.jpg", "demo_img4.jpg"]
for demo_img_name in demo_img_names:
    inference_img = cv2.imread(drive_root + demo_dir + demo_img_name)
    inference_img = cv2.cvtColor(inference_img,cv2.COLOR_BGR2RGB)
    inference_img = cv2.resize(inference_img, (width, height), interpolation = cv2.INTER_NEAREST)
    inference_img = inference_img[np.newaxis,...]*1./255
    print(inference_img.shape)
    pred = vgg16_unet.predict(inference_img)
    np.shape(pred)


    fig = plt.figure(figsize=(20,8))

    ax1 = fig.add_subplot(1,2,1)
    ax1.imshow(inference_img[0])
    ax1.title.set_text('Original Image')
    ax1.grid(False)


    ax3 = fig.add_subplot(1,2,2)
    ax3.set_title('Predicted Mask')
    ax3.imshow(onehot_to_rgb(pred[0],id2code))
    ax3.grid(False)

    plt.show()

In [None]:
print('subsets: ', os.listdir( drive_root + '/data-map2dfusion/data/'))

In [None]:
# Inference image splitting to increase the resolution
for subset in os.listdir( drive_root + '/data-map2dfusion/data/'):
    print('subset: ', subset)
    full_size_dir = f'/data-map2dfusion/data/{subset}/rgb/'
    split_dir = f'/data-map2dfusion/data/{subset}/rgb_split_overlap/'
    if not os.path.exists(drive_root + split_dir):
        os.makedirs(drive_root + split_dir)
    full_size_img_names = os.listdir(drive_root + full_size_dir)
    full_size_img_names.sort()
    overlap_px = 100

    # divide each image into 4 overlapping patches
    for full_size_img_name in full_size_img_names:
        if not full_size_img_name.endswith('.jpg'):
            continue
        # full_size_img_name = '1459661063.910500.jpg' #test image
        img = cv2.imread(drive_root + full_size_dir + full_size_img_name)
        M,N = img.shape[0]//tile_count+ overlap_px, img.shape[1]//tile_count+ overlap_px
        tile1 = img[0:M,0:N]
        tile2 = img[0:M,-N:]
        tile3 = img[-M:,0:N]
        tile4 = img[-M:,-N:]
        tiles = [tile1,tile2,tile3,tile4]
        for i,tile in enumerate(tiles):
            cv2.imwrite(drive_root + split_dir + full_size_img_name[:-4] + '_' + str(i) + '.jpg', tile)
        # break


    




'''
# divide each image into 4 tiles

for full_size_img_name in full_size_img_names:
    # full_size_img_name = '1459661063.910500.jpg' #test image
    img = cv2.imread(drive_root + full_size_dir + full_size_img_name)
    M,N = img.shape[0]//tile_count, img.shape[1]//tile_count
    tiles = [img[x:x+M,y:y+N] for x in range(0,img.shape[0],M) for y in range(0,img.shape[1],N)]
    for i,tile in enumerate(tiles):
        cv2.imwrite(drive_root + split_dir + full_size_img_name[:-4] + '_' + str(i) + '.jpg', tile)
    # break
'''

In [None]:
## For overlapping tiles
tile_count = 2
for subset in os.listdir( drive_root + '/data-map2dfusion/data/'):
    pred_datasets_dir = './data-map2dfusion/data'
    # pred_datasets_dir = os.path.expanduser('~') + '/data/3d_vision/'
    if subset == 'phantom3-factory-kfs':
        continue
    pred_dataset = subset
    print('pred_dataset: ', pred_dataset)
    pred_images_dir = os.path.join(pred_datasets_dir, pred_dataset, 'rgb_split_overlap')
    tiles = []
    # save to inference folder
    segmentation_dir = os.path.join(pred_datasets_dir, pred_dataset, 'sem')
    if not os.path.exists(segmentation_dir):
        os.makedirs(segmentation_dir)

    original_height, original_width, _ = 1080, 1920, 3
    tile_height, tile_width = 540, 960
    for img in sorted(os.listdir(pred_images_dir)):
        if not img.endswith('.jpg'):
            continue
        inference_img = cv2.imread(os.path.join(pred_images_dir, img))
        inference_img = cv2.cvtColor(inference_img,cv2.COLOR_BGR2RGB)
        inference_img = cv2.resize(inference_img, (width, height), interpolation = cv2.INTER_NEAREST)
        inference_img = inference_img[np.newaxis,...]*1./255
        pred = vgg16_unet.predict(inference_img)
        segmented_img = onehot_to_rgb(pred[0],id2code)
        segmented_img =cv2.cvtColor(segmented_img,cv2.COLOR_RGB2BGR)
        # #save tile img
        # cv2.imwrite(os.path.join(segmentation_dir, img[:-4] + '.png'), segmented_img)

        #resize to original tile size with overlap
        segmented_img = cv2.resize(segmented_img, (tile_width + overlap_px, tile_height + overlap_px), interpolation = cv2.INTER_NEAREST)

        tiles.append(segmented_img)
        if len(tiles) == tile_count**2:
            stack = []
            for i in range(tile_count):
                stack.append(np.hstack(tiles[i*tile_count:(i+1)*tile_count]))
            segmented_img_overlapped = np.vstack(stack)
            # #save overlapped tile img
            # cv2.imwrite(os.path.join(segmentation_dir, img[:-4] + '_overlapped.png'), segmented_img_overlapped)

            segmented_img = np.delete(segmented_img_overlapped, np.s_[tile_height: -tile_height], axis=0)
            segmented_img = np.delete(segmented_img, np.s_[tile_width: -tile_width], axis=1)

            img_name = img[:-6] + '.png'
            #save merged img
            cv2.imwrite(os.path.join(segmentation_dir, img_name), segmented_img)
            tiles = []