In [1]:
# mount on google drive
from google.colab import drive
drive.mount('/content/drive/')
# go to your code files directory
import os
os.chdir("/content/drive/My Drive/Earth-surface-water-mapping")
# !ls
# !nvidia-smi

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
%%writefile dataloader/dataloader.py

try:
    get_ipython().magic(u'tensorflow_version 2.x')
except Exception:
    pass
import tensorflow as tf
import numpy as np
import random
import pathlib
from utils.utils import readTiff

###  Get the pathes (string) corresponding to image pathes, return a list
def get_path(folder_Scenes, folder_Truths):    
    path_Scenes = pathlib.Path(folder_Scenes)
    path_Truths = pathlib.Path(folder_Truths)
    Scene_paths = list(path_Scenes.glob('*'))
    Scene_paths = sorted([str(path) for path in Scene_paths])    
    Truth_paths = list(path_Truths.glob('*'))
    Truth_paths = sorted([str(path) for path in Truth_paths])
    return Scene_paths, Truth_paths

### load the scenes
def load_scene(Scene_paths, Truth_paths, Patch_size):
    '''
    output Ratios: Scenes area/Patch area
    '''
    Scenes = list(range(len(Scene_paths)))   ## initialized the list
    Truths = list(range(len(Scene_paths)))
    Radios = list(range(len(Scene_paths)))  
    for i in range(len(Scene_paths)):
        Scenes[i], _, _, im_row,im_col, _ = readTiff(Scene_paths[i])
        Truths[i], _, _, _, _, _ = readTiff(Truth_paths[i])
        Truths[i] = np.expand_dims(Truths[i], axis=2)
        Radios[i] = (im_row//Patch_size+1)*(im_col//Patch_size+1)
    return Scenes, Truths, Radios

#### Data augmentation: noisy, filp, rotate. 
def image_aug(image, truth, flip = True, rot = True, noisy = True):
    if flip == True:
        if tf.random.uniform(()) > 0.5:
            if random.randint(1,2) == 1:  ## horizontal or vertical mirroring
                image = tf.image.flip_left_right(image)
                truth = tf.image.flip_left_right(truth)
            else: 
                image = tf.image.flip_up_down(image)
                truth = tf.image.flip_up_down(truth)
    if rot == True:
        if tf.random.uniform(()) > 0.5: 
            degree = random.randint(1,3)
            image = tf.image.rot90(image, k=degree)
            truth = tf.image.rot90(truth, k=degree)
    if noisy == True:
        if tf.random.uniform(()) > 0.5:
            std = random.uniform(0.002, 0.02)
            gnoise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=std, dtype=tf.float32)
            image = tf.add(image, gnoise)
    return image, truth

def get_scene(folder_Scenes, folder_Truths, PATCH_SIZE):
    ## input the path of the folders corresponding to scenes and truth
    path_Scenes, path_Truths = get_path(folder_Scenes, folder_Truths)
    Scenes, Truths, Ratios = load_scene(path_Scenes, path_Truths, PATCH_SIZE)
    Scenes = [np.clip(Scenes/10000, 0, 1) for Scenes in Scenes]  #   Normalization
    return Scenes, Truths

def get_patch(Scenes, Truths, PATCH_SIZE, BATCH_SIZE, BUFFER_SIZE):
    '''
    input: Scenes (list) and Truths (list)
    output: tf.data.Dataset
    '''
    # data augmentation
    stacked = list(zip(Scenes,Truths))
    stacked = [np.concatenate(imgPair,axis=2) for imgPair in stacked]
    stacked = [tf.convert_to_tensor(imgPair, dtype=tf.float32) for imgPair in stacked]
    Patches_aug = []
    PatchTruths_aug = []
    for imgPair in stacked:
        imgPair = tf.image.random_crop(imgPair, size=[512, 512, imgPair.shape[2]])    
        Patch, PatchTruth = imgPair[:,:,:Scenes[0].shape[2]], imgPair[:,:,Scenes[0].shape[2]:]
        Patch_aug, PatchTruth_aug = image_aug(Patch, PatchTruth,\
                                 flip = True, rot = True, noisy = True)
        Patches_aug.append(Patch_aug)
        PatchTruths_aug.append(PatchTruth_aug)
    dataSet = tf.data.Dataset.from_tensor_slices((Patches_aug,PatchTruths_aug))
    dataSet = dataSet.batch(BATCH_SIZE).shuffle(BUFFER_SIZE)
    return dataSet


Overwriting dataloader/dataloader.py


In [3]:
##### test the data loader functions
# from dataloader.dataloader import get_scene, get_patch
# folder_TrainScenes = '/content/drive/My Drive/Colab/WaterMapping/TrainingData/TrainingScene/' 
# folder_TrainTruths = '/content/drive/My Drive/Colab/WaterMapping/TrainingData/TrainingTruth/'
# PATCH_SIZE = 512
# BATCH_SIZE = 4
# BUFFER_SIZE = 200
# Scenes, Truths = get_scene(folder_TrainScenes, folder_TrainTruths, PATCH_SIZE=512)
# TrainSet = get_patch(Scenes, Truths, PATCH_SIZE=512, BATCH_SIZE=4, BUFFER_SIZE=200)
# TrainSet


<ShuffleDataset shapes: ((None, 512, 512, 6), (None, 512, 512, 1)), types: (tf.float32, tf.float32)>