In [18]:
import folium as folium
from google.cloud import storage
from google.cloud.exceptions import NotFound



In [19]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/Users/zhaoyu/credential/zhaoyutest.json"
client = storage.Client()

try:
    bucket = client.get_bucket("zhaoyutimtest")
except NotFound:
    print("Sorry, that bucket does not exist!")

In [20]:
import tensorflow as tf
# Make sure you can see the output bucket.  You must have write access.
print('Found Cloud Storage bucket.' if tf.io.gfile.exists('gs://' + "zhaoyutimtest") 
    else 'Can not find output Cloud Storage bucket.')

Found Cloud Storage bucket.


In [21]:
import ee
ee.Authenticate()
ee.Initialize()


Successfully saved authorization token.


In [22]:
USER_NAME = 'test'
OUTPUT_BUCKET = 'zhaoyutimtest'
L8SR = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')
BANDS = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
LABEL_DATA = ee.FeatureCollection('projects/google/demo_landcover_labels')
LABEL = 'landcover'
N_CLASSES = 3
FEATURE_NAMES = list(BANDS)
FEATURE_NAMES.append(LABEL)
TRAIN_FILE_PREFIX = 'Training_demo'
TEST_FILE_PREFIX = 'Testing_demo'
file_extension = '.tfrecord.gz'
TRAIN_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TRAIN_FILE_PREFIX + file_extension
TEST_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TEST_FILE_PREFIX + file_extension
IMAGE_FILE_PREFIX = 'Image_pixel_demo_'
OUTPUT_IMAGE_FILE = 'gs://' + OUTPUT_BUCKET + '/Classified_pixel_demo.TFRecord'
EXPORT_REGION = ee.Geometry.Rectangle([-122.7, 37.3, -121.8, 38.00])
OUTPUT_ASSET_ID = 'users/' + USER_NAME + '/Classified_pixel_demo'

In [23]:
# Cloud masking function.
def maskL8sr(image):
  cloudShadowBitMask = ee.Number(2).pow(3).int()
  cloudsBitMask = ee.Number(2).pow(5).int()
  qa = image.select('pixel_qa')
  mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
  return image.updateMask(mask).select(BANDS).divide(10000)

# The image input data is a 2018 cloud-masked median composite.
image = L8SR.filterDate('2018-01-01', '2018-12-31').map(maskL8sr).median()

# Use folium to visualize the imagery.
mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})
map = folium.Map(location=[38., -122.5])

folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='median composite',
  ).add_to(map)
map.add_child(folium.LayerControl())

In [24]:
# Sample the image at the points and add a random column.
sample = image.sampleRegions(
  collection=LABEL_DATA, properties=[LABEL], scale=30).randomColumn()

# Partition the sample approximately 70-30.
training = sample.filter(ee.Filter.lt('random', 0.7))
testing = sample.filter(ee.Filter.gte('random', 0.7))

from pprint import pprint

# Print the first couple points to verify.
pprint({'training': training.first().getInfo()})
pprint({'testing': testing.first().getInfo()})

{'training': {'geometry': None,
              'id': '00009f65e3c9ae02b84e_0',
              'properties': {'B2': 0.05220000073313713,
                             'B3': 0.062049999833106995,
                             'B4': 0.03660000115633011,
                             'B5': 0.01140000019222498,
                             'B6': 0.006800000090152025,
                             'B7': 0.005249999929219484,
                             'landcover': 2,
                             'random': 0.6494815572166289},
              'type': 'Feature'}}
{'testing': {'geometry': None,
             'id': '00000aa0a6a6d7b77beb_0',
             'properties': {'B2': 0.05079999938607216,
                            'B3': 0.05820000171661377,
                            'B4': 0.036649998277425766,
                            'B5': 0.010400000028312206,
                            'B6': 0.005849999841302633,
                            'B7': 0.004749999847263098,
                            'landc

In [25]:
# Make sure you can see the output bucket.  You must have write access.
print('Found Cloud Storage bucket.' if tf.io.gfile.exists('gs://' + OUTPUT_BUCKET) 
    else 'Can not find output Cloud Storage bucket.')

Found Cloud Storage bucket.


In [26]:
# Create the tasks.
training_task = ee.batch.Export.table.toCloudStorage(
  collection=training,
  description='Training Export',
  fileNamePrefix=TRAIN_FILE_PREFIX,
  bucket=OUTPUT_BUCKET,
  fileFormat='TFRecord',
  selectors=FEATURE_NAMES)

testing_task = ee.batch.Export.table.toCloudStorage(
  collection=testing,
  description='Testing Export',
  fileNamePrefix=TEST_FILE_PREFIX,
  bucket=OUTPUT_BUCKET,
  fileFormat='TFRecord',
  selectors=FEATURE_NAMES)

In [27]:
# Start the tasks.
training_task.start()
testing_task.start()

import time

while training_task.active():
  print('Polling for task (id: {}).'.format(training_task.id))
  time.sleep(30)
print('Done with image export.')

In [29]:
print('Found training file.' if tf.io.gfile.exists(TRAIN_FILE_PATH) 
    else 'No training file found.')
print('Found testing file.' if tf.io.gfile.exists(TEST_FILE_PATH) 
    else 'No testing file found.')

Found training file.
Found testing file.


In [30]:
image_export_options = {
  'patchDimensions': [256, 256],
  'maxFileSize': 104857600,
  'compressed': True
}

# Setup the task.
image_task = ee.batch.Export.image.toCloudStorage(
  image=image,
  description='Image Export',
  fileNamePrefix=IMAGE_FILE_PREFIX,
  bucket=OUTPUT_BUCKET,
  scale=30,
  fileFormat='TFRecord',
  region=EXPORT_REGION.toGeoJSON()['coordinates'],
  formatOptions=image_export_options,
)

image_task.start()

while image_task.active():
  print('Polling for task (id: {}).'.format(image_task.id))
  time.sleep(30)
print('Done with image export.')

Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Polling for task (id: OZDQPWOWRZZXL6HIBPGONJ4H).
Done with image export.


In [31]:
# Create a dataset from the TFRecord file in Cloud Storage.
train_dataset = tf.data.TFRecordDataset(TRAIN_FILE_PATH, compression_type='GZIP')
# Print the first record to check.
print(iter(train_dataset).next())

tf.Tensor(b'\nw\n\x0e\n\x02B2\x12\x08\x12\x06\n\x04\xab\xcfU=\n\x0e\n\x02B3\x12\x08\x12\x06\n\x04$(~=\n\x0e\n\x02B4\x12\x08\x12\x06\n\x04\xe2\xe9\x15=\n\x0e\n\x02B5\x12\x08\x12\x06\n\x04\x11\xc7:<\n\x0e\n\x02B6\x12\x08\x12\x06\n\x04\x89\xd2\xde;\n\x0e\n\x02B7\x12\x08\x12\x06\n\x041\x08\xac;\n\x15\n\tlandcover\x12\x08\x12\x06\n\x04\x00\x00\x00@', shape=(), dtype=string)


In [32]:
# List of fixed-length features, all of which are float32.
columns = [
  tf.io.FixedLenFeature(shape=[1], dtype=tf.float32) for k in FEATURE_NAMES
]

# Dictionary with names as keys, features as values.
features_dict = dict(zip(FEATURE_NAMES, columns))

pprint(features_dict)

{'B2': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B3': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B4': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B5': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B6': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B7': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'landcover': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None)}


In [33]:
def parse_tfrecord(example_proto):
  """The parsing function.

  Read a serialized example into the structure defined by featuresDict.

  Args:
    example_proto: a serialized Example.

  Returns:
    A tuple of the predictors dictionary and the label, cast to an `int32`.
  """
  parsed_features = tf.io.parse_single_example(example_proto, features_dict)
  labels = parsed_features.pop(LABEL)
  return parsed_features, tf.cast(labels, tf.int32)

# Map the function over the dataset.
parsed_dataset = train_dataset.map(parse_tfrecord, num_parallel_calls=5)

# Print the first parsed record to check.
pprint(iter(parsed_dataset).next())

({'B2': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.0522], dtype=float32)>,
  'B3': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.06205], dtype=float32)>,
  'B4': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.0366], dtype=float32)>,
  'B5': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.0114], dtype=float32)>,
  'B6': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.0068], dtype=float32)>,
  'B7': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00525], dtype=float32)>},
 <tf.Tensor: shape=(1,), dtype=int32, numpy=array([2], dtype=int32)>)


In [37]:
parsed_dataset

<ParallelMapDataset shapes: ({B2: (1,), B3: (1,), B4: (1,), B5: (1,), B6: (1,), B7: (1,)}, (1,)), types: ({B2: tf.float32, B3: tf.float32, B4: tf.float32, B5: tf.float32, B6: tf.float32, B7: tf.float32}, tf.int32)>

In [34]:
def normalized_difference(a, b):
  """Compute normalized difference of two inputs.

  Compute (a - b) / (a + b).  If the denomenator is zero, add a small delta.

  Args:
    a: an input tensor with shape=[1]
    b: an input tensor with shape=[1]

  Returns:
    The normalized difference as a tensor.
  """
  nd = (a - b) / (a + b)
  nd_inf = (a - b) / (a + b + 0.000001)
  return tf.where(tf.math.is_finite(nd), nd, nd_inf)

def add_NDVI(features, label):
  """Add NDVI to the dataset.
  Args:
    features: a dictionary of input tensors keyed by feature name.
    label: the target label

  Returns:
    A tuple of the input dictionary with an NDVI tensor added and the label.
  """
  features['NDVI'] = normalized_difference(features['B5'], features['B4'])
  return features, label


In [43]:
from tensorflow import keras

# Add NDVI.
input_dataset = parsed_dataset.map(add_NDVI)

# Keras requires inputs as a tuple.  Note that the inputs must be in the
# right shape.  Also note that to use the categorical_crossentropy loss,
# the label needs to be turned into a one-hot vector.
def to_tuple(inputs, label):
  return (tf.transpose(list(inputs.values())),
          tf.one_hot(indices=label, depth=N_CLASSES))

# Map the to_tuple function, shuffle and batch.
input_dataset = input_dataset.map(to_tuple).batch(8)

# Define the layers in the model.
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(64, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(N_CLASSES, activation=tf.nn.softmax)
])

# Compile the model with the specified loss function.
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fit the model to the training data.
model.fit(x=input_dataset, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff11efeb450>

In [52]:
list(parsed_dataset.as_numpy_iterator())

[({'B2': array([0.0522], dtype=float32),
   'B3': array([0.06205], dtype=float32),
   'B4': array([0.0366], dtype=float32),
   'B5': array([0.0114], dtype=float32),
   'B6': array([0.0068], dtype=float32),
   'B7': array([0.00525], dtype=float32)},
  array([2], dtype=int32)),
 ({'B2': array([0.04915], dtype=float32),
   'B3': array([0.06965], dtype=float32),
   'B4': array([0.08975], dtype=float32),
   'B5': array([0.1729], dtype=float32),
   'B6': array([0.2126], dtype=float32),
   'B7': array([0.1515], dtype=float32)},
  array([1], dtype=int32)),
 ({'B2': array([0.0583], dtype=float32),
   'B3': array([0.0856], dtype=float32),
   'B4': array([0.1162], dtype=float32),
   'B5': array([0.2439], dtype=float32),
   'B6': array([0.296], dtype=float32),
   'B7': array([0.1982], dtype=float32)},
  array([1], dtype=int32)),
 ({'B2': array([0.0812], dtype=float32),
   'B3': array([0.0972], dtype=float32),
   'B4': array([0.1018], dtype=float32),
   'B5': array([0.1158], dtype=float32),
   'B6'

In [41]:
test_dataset = (
  tf.data.TFRecordDataset(TEST_FILE_PATH, compression_type='GZIP')
    .map(parse_tfrecord, num_parallel_calls=5)
    .map(add_NDVI)
    .map(to_tuple)
    .batch(1))

model.evaluate(test_dataset)



[0.8426253795623779, 0.9655172228813171]