## We parse the TFRecord training data and load the data into a tf.data.Dataset

In [2]:
## Mount on google drive
from google.colab import drive
drive.mount('/content/drive/')


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


### <font color=red> **！！Note: the Tensorflow version should be consistency with the Tensorflow version in the google AI platform, and we use Tensorflow 2.2.0 here!**

In [3]:
# !pip install tensorflow==2.2.0
import tensorflow as tf
print(tf.__version__)

2.2.0


In [6]:
import os
os.chdir("/content/drive/My Drive/Earth-Engine-with-Deep-Learning")
from utils import imgShow
import matplotlib.pyplot as plt
from models.models import UNet
import folium
import datetime
import random
# !cat models/models.py

In [7]:
tf.random.set_seed(111)  # Make the training process reproducible
random.seed(222)

In [9]:
## Super-parameter
# training data folder and name
Image_Folder_tra = 'EE_Image/MSMT_RF_Impervious_traData'   # !can't write into the second-level directory
Image_Folder_eva = 'EE_Image/MSMT_RF_Impervious_evaData'

## TFRecord features
# output bands
Bands_l8 = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
Bands_l57 = ['B1', 'B2', 'B3', 'B4', 'B5', 'B7']
Targets = ['impervious']
Features_l8 = Bands_l8 + Targets
Features_l57 = Bands_l57 + Targets

# Specify the size and shape of patches expected by the model.
Kernel_shape = [256, 256]
Columns_l8 = [
  tf.io.FixedLenFeature(shape=Kernel_shape, dtype=tf.float32) for k in Features_l8
]
Features_Dict_l8 = dict(zip(Features_l8, Columns_l8))

Columns_l57 = [
  tf.io.FixedLenFeature(shape=Kernel_shape, dtype=tf.float32) for k in Features_l57
]

Features_Dict_l57 = dict(zip(Features_l57, Columns_l57))

# Specify model training parameters.
Batch_size = 32
Epochs = 20
Buffer_size = 2000
Optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.9, beta_2=0.999)
Loss = tf.keras.losses.BinaryCrossentropy(from_logits=False, label_smoothing=0)
Metrics = ['RootMeanSquaredError']

# path and name of the trained model
path_pretrain = "/content/drive/My Drive/Earth-Engine-with-Deep-Learning/models/pretrain"
model_path = 'unet_MSMT_train_50epoch_CEloss_nosiyAug'
path_save_model = path_pretrain + '/' + model_path + '/model'
## configure the tensorboard
log_dir = path_pretrain + '/' + model_path + "/logs"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
log_dir
# path_save_model

'/content/drive/My Drive/Earth-Engine-with-Deep-Learning/models/pretrain/unet_MSMT_train_50epoch_CEloss_nosiyAug/logs'

### Parse the tfrecord data

In [10]:
# Dataset loading functions
tra_pattern_l57 = '/content/drive/My Drive/' + Image_Folder_tra + '/' + 'Train_Landsat7*.tfrecord.gz'
tra_pattern_l8 = '/content/drive/My Drive/' + Image_Folder_tra + '/' + 'Train_Landsat8*.tfrecord.gz'
eva_pattern_l57 = '/content/drive/My Drive/' + Image_Folder_eva + '/' + 'Eva_Landsat7*.tfrecord.gz'
eva_pattern_l8 = '/content/drive/My Drive/' + Image_Folder_eva + '/' + 'Eva_Landsat8*.tfrecord.gz'

# Dataset loading functions
def parse_tfrecord_l57(example_proto):
	return tf.io.parse_single_example(example_proto, Features_Dict_l57)

def to_tuple_l57(inputs):
    inputsList = [inputs.get(key) for key in Features_l57]
    stacked = tf.stack(inputsList, axis=0)
    stacked = tf.transpose(stacked, [1, 2, 0])
    return stacked[:,:,:len(Bands_l57)], stacked[:,:,len(Bands_l57):]

def parse_tfrecord_l8(example_proto):
	return tf.io.parse_single_example(example_proto, Features_Dict_l8)
 
def to_tuple_l8(inputs):
    inputsList = [inputs.get(key) for key in Features_l8]
    stacked = tf.stack(inputsList, axis=0)
    stacked = tf.transpose(stacked, [1, 2, 0])
    return stacked[:,:,:len(Bands_l8)], stacked[:,:,len(Bands_l8):]

def image_aug(image, truth, flip = True, rot = True, noisy = True):
    
    if flip == True:
        if tf.random.uniform(()) > 0.5:
            if random.randint(1,2) == 1:  ## horizontal or vertical mirroring
                image = tf.image.flip_left_right(image)
                truth = tf.image.flip_left_right(truth)
            else: 
                image = tf.image.flip_up_down(image)
                truth = tf.image.flip_up_down(truth)
    if rot == True:
        if tf.random.uniform(()) > 0.5: 
            degree = random.randint(1,3)
            image = tf.image.rot90(image, k=degree)
            truth = tf.image.rot90(truth, k=degree)
    if noisy == True:
        if tf.random.uniform(()) > 0.5:
            std = random.uniform(0.002, 0.03)
            gnoise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=std, dtype=tf.float32)
            image = tf.add(image, gnoise)
    return image, truth

def get_training_dataset():
    ## for landsat 5
    glob_l57 = tf.io.gfile.glob(tra_pattern_l57)
    dataset_l57 = tf.data.TFRecordDataset(glob_l57, compression_type='GZIP')    
    dataset_l57 = dataset_l57.map(parse_tfrecord_l57)
    dataset_l57 = dataset_l57.map(to_tuple_l57)
    ## for landsat 8
    glob_l8 = tf.io.gfile.glob(tra_pattern_l8)
    dataset_l8 = tf.data.TFRecordDataset(glob_l8, compression_type='GZIP')
    dataset_l8 = dataset_l8.map(parse_tfrecord_l8)
    dataset_l8 = dataset_l8.map(to_tuple_l8)
    ## combination
    combined_dataset = dataset_l57.concatenate(dataset_l8)
    combined_dataset = combined_dataset.map(image_aug)
    combined_dataset = combined_dataset.shuffle(Buffer_size).batch(Batch_size).repeat()
    return combined_dataset

def get_eval_dataset():
    ## for landsat 5&7
    glob_l57 = tf.io.gfile.glob(eva_pattern_l57)
    dataset_l57 = tf.data.TFRecordDataset(glob_l57, compression_type='GZIP')    
    dataset_l57 = dataset_l57.map(parse_tfrecord_l57)
    dataset_l57 = dataset_l57.map(to_tuple_l57)
    ## for landsat 8
    glob_l8 = tf.io.gfile.glob(eva_pattern_l8)
    dataset_l8 = tf.data.TFRecordDataset(glob_l8, compression_type='GZIP')
    dataset_l8 = dataset_l8.map(parse_tfrecord_l8)
    dataset_l8 = dataset_l8.map(to_tuple_l8)
    ## combination
    combined_dataset = dataset_l57.concatenate(dataset_l8)
    combined_dataset = combined_dataset.shuffle(Buffer_size).batch(1).repeat()
    return combined_dataset


In [11]:
traData = get_training_dataset()
evaData = get_eval_dataset()
# model = UNet(input_shape=(256, 256, 6), nclasses=2)
# one_traBatch = iter(traData.take(1)).next()
# one_traBatch
# one_evaBatch = iter(evaData.take(1)).next()
# one_evaBatch
# model.summary()

### Model training

In [12]:
## or load the pre-trained model
# Cloud authentication.
# from google.colab import auth
# auth.authenticate_user()
# Model_gs_path = 'gs://earth-engine-bucket-1/ai_platform_train/unet_256_l8l5_20epoch/model'
# Model_drive_path = "/content/drive/My Drive/Earth-Engine-with-Deep-Learning/models/pretrain/unet_MSMT_train_50epoch/model"
# model = tf.keras.models.load_model(Model_drive_path)
# model.summary()

In [13]:
model.compile(
    optimizer=tf.keras.optimizers.get(Optimizer),
    loss=tf.keras.losses.get(Loss),
    metrics=[tf.keras.metrics.get(metric) for metric in Metrics])

model.fit(
    x=traData,
    epochs=Epochs,
    steps_per_epoch=int(1000*6/Batch_size),
    validation_data=evaData,
    validation_steps=300*6,
    callbacks=[tensorboard_callback]
    )


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb067111dd8>

In [None]:
%load_ext tensorboard
%tensorboard --logdir models/pretrain/unet_MSMT_train_50epoch_CEloss_nosiyAug/logs


In [14]:
model.save(path_save_model, save_format='tf')


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /content/drive/My Drive/Earth-Engine-with-Deep-Learning/models/pretrain/unet_MSMT_train_50epoch_CEloss_nosiyAug/model/assets
