In [31]:
%bash
git clone https://github.com/GoogleCloudPlatform/training-data-analyst

fatal: destination path 'training-data-analyst' already exists and is not an empty directory.


In [32]:
import tensorflow as tf
import numpy as np

In [33]:
path = 'training-data-analyst/courses/machine_learning/deepdive/03_tensorflow/'

CSV_COLUMNS = ['fare_amount', 'pickuplon','pickuplat','dropofflon','dropofflat','passengers', 'key']
LABEL_COLUMN = 'fare_amount'
DEFAULTS = [[0.0], [-74.0], [40.0], [-74.0], [40.7], [1.0], ['nokey']]

def read_dataset(filename, mode, batch_size = 512):
  def _input_fn():
    def decode_csv(value_column):
      columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
      features = dict(zip(CSV_COLUMNS, columns))
      label = features.pop(LABEL_COLUMN)
      return features, label

    # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
    filenames_dataset = tf.data.Dataset.list_files(filename)
    # Read lines from text files
    textlines_dataset = filenames_dataset.flat_map(tf.data.TextLineDataset)
    # Parse text lines as comma-separated values (CSV)
    dataset = textlines_dataset.map(decode_csv)
    
    # Note:
    # use tf.data.Dataset.flat_map to apply one to many transformations (here: filename -> text lines)
    # use tf.data.Dataset.map      to apply one to one  transformations (here: text line -> feature list)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        num_epochs = None # indefinitely
        dataset = dataset.shuffle(buffer_size = 10 * batch_size)
    else:
        num_epochs = 1 # end-of-input after this

    dataset = dataset.repeat(num_epochs).batch(batch_size)
    
    return dataset.make_one_shot_iterator().get_next()
  return _input_fn
    

def get_train():
  return read_dataset(path + 'taxi-train.csv', mode = tf.estimator.ModeKeys.TRAIN)

def get_valid():
  return read_dataset(path + 'taxi-valid.csv', mode = tf.estimator.ModeKeys.EVAL)

def get_test():
  return read_dataset(path + 'taxi-test.csv', mode = tf.estimator.ModeKeys.EVAL)

In [34]:
feature_columns = [tf.feature_column.numeric_column(f) for f in CSV_COLUMNS[1:len(CSV_COLUMNS) - 1]]

In [35]:
def serving_input_fn():
#   feature_placeholders = dict(zip(CSV_COLUMNS[1:len(CSV_COLUMNS) - 1], [tf.placeholder(tf.float32, [None])] * len(CSV_COLUMNS) - 2))
  feature_placeholders = {
    'pickuplon' : tf.placeholder(tf.float32, [None]),
    'pickuplat' : tf.placeholder(tf.float32, [None]),
    'dropofflat' : tf.placeholder(tf.float32, [None]),
    'dropofflon' : tf.placeholder(tf.float32, [None]),
    'passengers' : tf.placeholder(tf.float32, [None]),
  }
  features = feature_placeholders
  return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [36]:
import shutil

log_dir = 'taxi-trained'

shutil.rmtree(log_dir, ignore_errors=True)

model = tf.estimator.LinearRegressor(
  feature_columns=feature_columns,
  model_dir=log_dir
)

train_spec = tf.estimator.TrainSpec(
  input_fn=get_train(),
  max_steps=2000
)

exporter = tf.estimator.LatestExporter(
  name='exporter',
  serving_input_receiver_fn=serving_input_fn
)

eval_spec = tf.estimator.EvalSpec(
  input_fn=get_valid(),
  steps=300,
  exporters=exporter,
  start_delay_secs=20,
  throttle_secs=10
)

tf.estimator.train_and_evaluate(model, train_spec, eval_spec)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe3d2d2c0d0>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'taxi-trained', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 10 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow

In [43]:
from google.datalab.ml import TensorBoard

In [45]:
TensorBoard().start(log_dir)

OSError: [Errno 2] No such file or directory: 'taxi-trained'

In [39]:
# TensorBoard().stop(8333)
TensorBoard().list()

Unnamed: 0,logdir,pid,port
0,taxi-trained,17003,38211


In [42]:
%bash
ls

README.md
taxi-trained
train_and_evaluate.ipynb
training-data-analyst
