In [1]:
import tensorflow as tf

In [2]:
TIMESERIES_COL = 'height'
N_OUTPUTS = 1  # in each sequence, 1-49 are features, and 50 is label
SEQ_LEN = None
DEFAULTS = None
N_INPUTS = None

In [3]:
def init(hparams):
    global SEQ_LEN, DEFAULTS, N_INPUTS
    SEQ_LEN = hparams['sequence_length']
    DEFAULTS = [[0.0] for x in range(0, SEQ_LEN)]
    N_INPUTS = SEQ_LEN - N_OUTPUTS

In [4]:
def linear_model(hparams):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape = [N_INPUTS], name = TIMESERIES_COL))
    model.add(tf.keras.layers.Dense(units = 1, activation = None))
    return model

In [5]:
def dnn_model(hparams):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape = [N_INPUTS], name = TIMESERIES_COL))
    model.add(tf.keras.layers.Dense(units = 30, activation = tf.nn.relu))
    model.add(tf.keras.layers.Dense(units = 10, activation = tf.nn.relu))
    model.add(tf.keras.layers.Dense(units = 1, activation = None))
    return model

In [6]:
def cnn_model(hparams):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape = [N_INPUTS], name = TIMESERIES_COL))
    model.add(tf.keras.layers.Reshape(target_shape = [N_INPUTS, 1]))
    model.add(tf.keras.layers.Conv1D(filters = N_INPUTS // 2, kernel_size = 3, padding = "same", activation = tf.nn.relu))
    model.add(tf.keras.layers.MaxPooling1D(pool_size = 2, strides = 2))
    model.add(tf.keras.layers.Conv1D(filters = N_INPUTS // 2, kernel_size = 3, padding = "same", activation = tf.nn.relu))
    model.add(tf.keras.layers.MaxPooling1D(pool_size = 2, strides = 2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units = 3, activation = tf.nn.relu))
    model.add(tf.keras.layers.Dense(units = 1, activation = None))
    return model

In [7]:
def rnn_model(hparams):
    CELL_SIZE = N_INPUTS // 3  # size of the internal state in each of the cells

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape = [N_INPUTS], name = TIMESERIES_COL))
    model.add(tf.keras.layers.Reshape(target_shape = [N_INPUTS, 1]))
    model.add(tf.keras.layers.LSTM(units = CELL_SIZE))
    model.add(tf.keras.layers.Dense(units = N_INPUTS // 2, activation = tf.nn.relu))
    model.add(tf.keras.layers.Dense(units = 1, activation = None))
    return model

In [8]:
# 2-layer RNN
def rnn2_model(hparams):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape = [N_INPUTS], name = TIMESERIES_COL))
    model.add(tf.keras.layers.Reshape(target_shape = [N_INPUTS, 1]))
    model.add(tf.keras.layers.LSTM(units = N_INPUTS * 2, return_sequences = True))
    model.add(tf.keras.layers.LSTM(units = N_INPUTS // 2))
    model.add(tf.keras.layers.Dense(units = (N_INPUTS // 2) // 2, activation = tf.nn.relu))
    model.add(tf.keras.layers.Dense(units = 1, activation = None))
    return model

In [9]:
# read data and convert to needed format
def read_dataset(filename, mode, batch_size=512):
    def _input_fn():
        def decode_csv(row):
            # row is a string tensor containing the contents of one row
            features = tf.decode_csv(row, record_defaults=DEFAULTS)  # string tensor -> list of 50 rank 0 float tensors
            label = features.pop()  # remove last feature and use as label
            features = tf.stack(features)  # list of rank 0 tensors -> single rank 1 tensor
            return {TIMESERIES_COL: features}, label

        # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
        dataset = tf.data.Dataset.list_files(filename)
        # Read in data from files
        dataset = dataset.flat_map(tf.data.TextLineDataset)
        # Parse text lines as comma-separated values (CSV)
        dataset = dataset.map(decode_csv)

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None  # loop indefinitely
            dataset = dataset.shuffle(buffer_size=10 * batch_size)
        else:
            num_epochs = 1  # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        return dataset.make_one_shot_iterator().get_next()

    return _input_fn

In [10]:
def serving_input_fn():
    feature_placeholders = {
        TIMESERIES_COL: tf.placeholder(tf.float32, [None, N_INPUTS])
    }

    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    features[TIMESERIES_COL] = tf.squeeze(features[TIMESERIES_COL], axis=[2])

    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [11]:
# Wrapper function to build selected Keras model type
def sequence_regressor(hparams):
    # 1. Run the appropriate model
    model_functions = {
        "linear": linear_model,
        "dnn": dnn_model,
        "cnn": cnn_model,
        "rnn": rnn_model,
        "rnn2": rnn2_model}
    
    # Get function pointer for selected model type
    model_function = model_functions[hparams["model"]]
    
    # Build selected Keras model
    model = model_function(hparams)
    
    return model

In [12]:
def train_and_evaluate(output_dir, hparams):
    tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file
    
    # Build Keras model
    model = sequence_regressor(hparams)
    
    # Compile Keras model with optimizer, loss function, and eval metrics
    model.compile(
        optimizer = "adam",
        loss = "mse",
        metrics = ["mse"])
        
    # Convert Keras model to an Estimator
    estimator = tf.keras.estimator.model_to_estimator(
        keras_model = model, 
        model_dir = output_dir, 
        config = tf.estimator.RunConfig(save_checkpoints_secs = hparams["min_eval_frequency"]))
    
    # Set estimator's train_spec to use train_input_fn and train for so many steps
    train_spec = tf.estimator.TrainSpec(
        input_fn = read_dataset(
            filename = hparams['train_data_path'],
            mode = tf.estimator.ModeKeys.TRAIN,
            batch_size = hparams['train_batch_size']),
        max_steps = hparams['train_steps'])
    
    # Create exporter that uses serving_input_fn to create saved_model for serving
    exporter = tf.estimator.LatestExporter(name = 'exporter', serving_input_receiver_fn = serving_input_fn)
    
    # Set estimator's eval_spec to use eval_input_fn and export saved_model
    eval_spec = tf.estimator.EvalSpec(
        input_fn = read_dataset(
            filename = hparams['eval_data_path'],
            mode = tf.estimator.ModeKeys.EVAL,
            batch_size = 1000),
        steps = None,
        exporters = exporter,
        start_delay_secs = hparams['eval_delay_secs'],
        throttle_secs = hparams['min_eval_frequency'])
    
    # Run train_and_evaluate loop
    tf.estimator.train_and_evaluate(
        estimator = estimator, 
        train_spec = train_spec, 
        eval_spec = eval_spec)

In [13]:
BUCKET = 'qwiklabs-gcp-ml-49b827b781ab'
model="linear"
learning_rate=0.01
train_batch_size=100
DATADIR="./data/sines"
OUTDIR="./trained_{}/sines".format(model)
train_data_path="gs://{}/sines/train*.csv".format(BUCKET)
eval_data_path="gs://{}/sines/valid*.csv".format(BUCKET)
eval_delay_secs=10
min_eval_frequency=60

hparams={"train_data_path":train_data_path, "eval_data_path":eval_data_path,
        "train_batch_size":train_batch_size,
        "learning_rate":learning_rate,
        "train_steps":1000,
        "sequence_length":50,
        "output_dir":OUTDIR,
        "model":model,
        "job-dir":OUTDIR,
        "eval_delay_secs":eval_delay_secs,
        "min_eval_frequency":min_eval_frequency
        }

init(hparams)
train_and_evaluate(OUTDIR, hparams)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Using the Keras model provided.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Using config: {'_train_distribute': None, '_global_id_in_cluster': 0, '_protocol': None, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5, '_session_creation_timeout_secs': 7200, '_device_fn': None, '_save_checkpoints_steps': None, '_task_id': 0, '_eval_distribute': None, '_evaluation_master': '', '_master': '', '_num_ps_replicas': 0, '_is_chief': True, '_task_type': 'worker', '_experimental_max_worker_delay_secs': None, '_save_checkpoints_secs': 60, '_service': None, '_model_dir': './trained_linear/sines', '_session_config': allow_soft_placement: true
graph_options 

In [14]:
BUCKET = 'qwiklabs-gcp-ml-49b827b781ab'
model="dnn"
learning_rate=0.01
train_batch_size=100
DATADIR="./data/sines"
OUTDIR="./trained_{}/sines".format(model)
train_data_path="gs://{}/sines/train*.csv".format(BUCKET)
eval_data_path="gs://{}/sines/valid*.csv".format(BUCKET)
eval_delay_secs=10
min_eval_frequency=60

hparams={"train_data_path":train_data_path, "eval_data_path":eval_data_path,
        "train_batch_size":train_batch_size,
        "learning_rate":learning_rate,
        "train_steps":1000,
        "sequence_length":50,
        "output_dir":OUTDIR,
        "model":model,
        "job-dir":OUTDIR,
        "eval_delay_secs":eval_delay_secs,
        "min_eval_frequency":min_eval_frequency
        }

init(hparams)
train_and_evaluate(OUTDIR, hparams)

INFO:tensorflow:Using the Keras model provided.
INFO:tensorflow:Using config: {'_train_distribute': None, '_global_id_in_cluster': 0, '_protocol': None, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5, '_session_creation_timeout_secs': 7200, '_device_fn': None, '_save_checkpoints_steps': None, '_task_id': 0, '_eval_distribute': None, '_evaluation_master': '', '_master': '', '_num_ps_replicas': 0, '_is_chief': True, '_task_type': 'worker', '_experimental_max_worker_delay_secs': None, '_save_checkpoints_secs': 60, '_service': None, '_model_dir': './trained_dnn/sines', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_experimental_distribute': None, '_log_step_count_steps': 100, '_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f95b05b3e48>, '_num_worker_replicas': 1}
INFO:tensorflow:Not using Distribute Coordin

In [15]:
BUCKET = 'qwiklabs-gcp-ml-49b827b781ab'
model="cnn"
learning_rate=0.01
train_batch_size=100
DATADIR="./data/sines"
OUTDIR="./trained_{}/sines".format(model)
train_data_path="gs://{}/sines/train*.csv".format(BUCKET)
eval_data_path="gs://{}/sines/valid*.csv".format(BUCKET)
eval_delay_secs=10
min_eval_frequency=60

hparams={"train_data_path":train_data_path, "eval_data_path":eval_data_path,
        "train_batch_size":train_batch_size,
        "learning_rate":learning_rate,
        "train_steps":1000,
        "sequence_length":50,
        "output_dir":OUTDIR,
        "model":model,
        "job-dir":OUTDIR,
        "eval_delay_secs":eval_delay_secs,
        "min_eval_frequency":min_eval_frequency
        }

init(hparams)
train_and_evaluate(OUTDIR, hparams)

INFO:tensorflow:Using the Keras model provided.
INFO:tensorflow:Using config: {'_train_distribute': None, '_global_id_in_cluster': 0, '_protocol': None, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5, '_session_creation_timeout_secs': 7200, '_device_fn': None, '_save_checkpoints_steps': None, '_task_id': 0, '_eval_distribute': None, '_evaluation_master': '', '_master': '', '_num_ps_replicas': 0, '_is_chief': True, '_task_type': 'worker', '_experimental_max_worker_delay_secs': None, '_save_checkpoints_secs': 60, '_service': None, '_model_dir': './trained_cnn/sines', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_experimental_distribute': None, '_log_step_count_steps': 100, '_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f95f52900b8>, '_num_worker_replicas': 1}
INFO:tensorflow:Not using Distribute Coordin

In [16]:
BUCKET = 'qwiklabs-gcp-ml-49b827b781ab'
model="rnn"
learning_rate=0.01
train_batch_size=100
DATADIR="./data/sines"
OUTDIR="./trained_{}/sines".format(model)
train_data_path="gs://{}/sines/train*.csv".format(BUCKET)
eval_data_path="gs://{}/sines/valid*.csv".format(BUCKET)
eval_delay_secs=10
min_eval_frequency=60

hparams={"train_data_path":train_data_path, "eval_data_path":eval_data_path,
        "train_batch_size":train_batch_size,
        "learning_rate":learning_rate,
        "train_steps":1000,
        "sequence_length":50,
        "output_dir":OUTDIR,
        "model":model,
        "job-dir":OUTDIR,
        "eval_delay_secs":eval_delay_secs,
        "min_eval_frequency":min_eval_frequency
        }

init(hparams)
train_and_evaluate(OUTDIR, hparams)

INFO:tensorflow:Using the Keras model provided.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Using config: {'_train_distribute': None, '_global_id_in_cluster': 0, '_protocol': None, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5, '_session_creation_timeout_secs': 7200, '_device_fn': None, '_save_checkpoints_steps': None, '_task_id': 0, '_eval_distribute': None, '_evaluation_master': '', '_master': '', '_num_ps_replicas': 0, '_is_chief': True, '_task_type': 'worker', '_experimental_max_worker_delay_secs': None, '_save_checkpoints_secs': 60, '_service': None, '_model_dir': './trained_rnn/sines', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_experimental_distribute': None, '_log_step_count_steps': 100, '_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.ser

In [17]:
BUCKET = 'qwiklabs-gcp-ml-49b827b781ab'
model="rnn2"
learning_rate=0.01
train_batch_size=100
DATADIR="./data/sines"
OUTDIR="./trained_{}/sines".format(model)
train_data_path="gs://{}/sines/train*.csv".format(BUCKET)
eval_data_path="gs://{}/sines/valid*.csv".format(BUCKET)
eval_delay_secs=10
min_eval_frequency=60

hparams={"train_data_path":train_data_path, "eval_data_path":eval_data_path,
        "train_batch_size":train_batch_size,
        "learning_rate":learning_rate,
        "train_steps":1000,
        "sequence_length":50,
        "output_dir":OUTDIR,
        "model":model,
        "job-dir":OUTDIR,
        "eval_delay_secs":eval_delay_secs,
        "min_eval_frequency":min_eval_frequency
        }

init(hparams)
train_and_evaluate(OUTDIR, hparams)

INFO:tensorflow:Using the Keras model provided.
INFO:tensorflow:Using config: {'_train_distribute': None, '_global_id_in_cluster': 0, '_protocol': None, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5, '_session_creation_timeout_secs': 7200, '_device_fn': None, '_save_checkpoints_steps': None, '_task_id': 0, '_eval_distribute': None, '_evaluation_master': '', '_master': '', '_num_ps_replicas': 0, '_is_chief': True, '_task_type': 'worker', '_experimental_max_worker_delay_secs': None, '_save_checkpoints_secs': 60, '_service': None, '_model_dir': './trained_rnn2/sines', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_experimental_distribute': None, '_log_step_count_steps': 100, '_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f96564007b8>, '_num_worker_replicas': 1}
INFO:tensorflow:Not using Distribute Coordi