## Collect and export the training data: **satellite image and the corresponding target classification map**

In [None]:
## Mount on google drive
from google.colab import drive
drive.mount('/content/drive/')
# ## Authenticate to Google Cloud
from google.colab import auth
auth.authenticate_user()
# Authenticate to Earth Engine
import ee
ee.Authenticate()
ee.Initialize()


In [None]:
import os
os.chdir("/content/drive/My Drive/Earth-Engine-with-Deep-Learning")
try:
    %tensorflow_version 2.x
except Exception:
    pass
import tensorflow as tf
import folium
import time

## Parameters configuration

In [None]:
### Super-parameters
# output folder and name
Project_ID = 'my-project-20200813'
Bucket = 'earth-engine-bucket-1'
Export_GStorage_Folder = 'NLCD_Impervious_Data'   # !can't write into the second-level directory
Export_Drive_Folder = 'EE_Image'   # !can't write into the second-level directory
Export_Files_Name = 'Train-Landsat-8-2016'

# Date of collected data
date_start = '2016-01-01' 
date_end = '2016-12-31'

# output bands
Bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
Targets = ['impervious']
Kernel_shape = [256, 256]   # Specify the shape of patches expected by the model.

# Region (polygons) for training data collection
TrainingPolys = ee.FeatureCollection(ee.Geometry.Rectangle(-78.9, 35.79, -80.00, 36.65))
num_shards = 100    #  The numbers of shards in each polygon, this number is set to avoid the computed value too large error
num_ploySam = 1000   #  Total sample size in each polygon.


In [None]:
#### Data collection
## 1) satellite image source 
image_collection = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
  .filterDate(date_start, date_end)\
  .filter(ee.Filter.lt('CLOUD_COVER_LAND', 50))
# 1.1) cloud mask
def cloudMask(image):
    qa = image.select('pixel_qa')
    mask = qa.bitwiseAnd(1<<5).eq(0)
    return image.updateMask(mask)
image_mask = image_collection.map(cloudMask).median()
# 1.2) fill the null value of the cloud-masked image.
nullMask = image_mask.mask().Not()
img_nullMask = image_collection.map(lambda image_mask: image_mask.updateMask(nullMask)).median()
img_fill = image_mask.unmask(0).add(img_nullMask.unmask(0)).divide(10000)

## 2) NLCD dataset
nlcd = ee.Image('USGS/NLCD/NLCD2011').select('impervious')
nlcd = nlcd.divide(100).float()


In [None]:
####  Use folium to visualize the collected data and training regions.
# define the color of training regions
polyImage = ee.Image(0).byte().paint(TrainingPolys, 1)
polyImage = polyImage.updateMask(polyImage)

mapid = img_fill.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})
map = folium.Map(location=[38., -122.5],zoom_start=5)
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='median composite',
  ).add_to(map)
map

mapid = nlcd.getMapId({'min': 0, 'max': 1})
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='nlcd impervious',
  ).add_to(map)
# map.add_child(folium.LayerControl())
map

mapid = polyImage.getMapId({'min': 1, 'max': 1, 'palette': ['red']})
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='training polygons',
  ).add_to(map)
map.add_child(folium.LayerControl())
map


### Stack the satellite image and the target classification map

In [None]:
## stack the satellite image and the target classification map
ImageStack = ee.Image.cat([
  img_fill.select(Bands),
  nlcd.select(Targets)
]).float()
# define the kernel
list = ee.List.repeat(1, Kernel_shape[0])
lists = ee.List.repeat(list, Kernel_shape[1])
kernel = ee.Kernel.fixed(Kernel_shape[0], Kernel_shape[1], lists)  # patch size
neighborImage = ImageStack.neighborhoodToArray(kernel)    ### 变成了Neighborhood Image: 即Image中每个位置都是由一个二维Array构成
neighborImage

<ee.image.Image at 0x7f264bcb6be0>

### **Sampling the stacked images to patches, and export the stacked images to tfrecord files.**
### <font color=red>! We could simplize the file export through specifing the formatOptions  (Awaiting for testing).</font> 
e.g., image_export_options = {'patchDimensions': [256, 256], 'maxFileSize': 304857600, 'compressed': True}

In [None]:
# Convert the feature collections to lists for iteration.
TrainingPolysList = TrainingPolys.toList(TrainingPolys.size())

for i_polygons in range(TrainingPolys.size().getInfo()):   ### 遍历每个polygon
    geomSample = ee.FeatureCollection([])
    for i_shards in range(num_shards):
        sample = neighborImage.sample(
        region = ee.Feature(TrainingPolysList.get(i_polygons)).geometry(),
        scale = 30,
        numPixels = num_ploySam / num_shards, # Size of the shard. 采样的像元数（10个像元？）
        seed = i_polygons,  # 采样的随机种子点
        tileScale = 8    ## ？？tileScale越大，输出的tile越小。tile:分布式计算时输入单元
        )
        geomSample = geomSample.merge(sample)

    Image_Geom_Export_Name = Export_Files_Name + '-Geom-' + str(i_polygons)   # 各个polygon对应的样本
    ### export to Google Storage
    task_GStorage = ee.batch.Export.table.toCloudStorage(
        collection = geomSample,
        description = Image_Geom_Export_Name,
        bucket = Bucket,
        fileNamePrefix = Export_GStorage_Folder + '/' + Image_Geom_Export_Name,
        fileFormat = 'TFRecord',
        selectors = Bands + Targets
    )
    task_GStorage.start()
    
    # ### export to Google Drive
    # task_Drive = ee.batch.Export.table.toDrive(
    #     collection = geomSample,
    #     description = Image_Geom_Export_Name,
    #     folder = Export_Drive_Folder,
    #     fileNamePrefix = Image_Geom_Export_Name,
    #     fileFormat = 'TFRecord',
    #     selectors = Bands + Targets
    # )
    # task_Drive.start()

while task_GStorage.active():
  print('Polling for task (id: {}).'.format(task_GStorage.id))
  time.sleep(30)
# while task_Drive.active():
#   print('Polling for task (id: {}).'.format(task_Drive.id))
#   time.sleep(30)
print('Done with image export.')