In [None]:
import tensorflow as tf
import shutil
print(tf.__version__)

In [None]:
CSV_COLUMN_NAMES = ["fare_amount","dayofweek","hourofday","pickuplon","pickuplat","dropofflon","dropofflat"]
CSV_DEFAULTS = [[0.0],[1],[0],[-74.0], [40.0], [-74.0], [40.7]]

In [None]:
def parse_row(row):
    fields = tf.decode_csv(records = row, record_defaults = CSV_DEFAULTS)
    features = dict(zip(CSV_COLUMN_NAMES, fields))
    label = features.pop("fare_amount")
    return features, label

def read_dataset(csv_path):
    dataset = tf.data.TextLineDataset(filenames = csv_path).skip(count = 1) # skip header
    dataset = dataset.map(map_func = parse_row)
    return dataset

def train_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.shuffle(buffer_size = 1000).repeat(count = None).batch(batch_size = batch_size)
    return dataset

def eval_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.batch(batch_size = batch_size)
    return dataset

In [None]:
FEATURE_NAMES = CSV_COLUMN_NAMES[1:] # all but first column

feature_cols = [tf.feature_column.numeric_column(key = k) for k in FEATURE_NAMES]
feature_cols

In [None]:
def serving_input_receiver_fn():
    receiver_tensors = {'dayofweek' : tf.placeholder(shape=[None], dtype=tf.int32),
                        'hourofday' : tf.placeholder(shape=[None], dtype=tf.int32),
                        'pickuplon' : tf.placeholder(shape=[None], dtype=tf.float32),
                        'pickuplat' : tf.placeholder(shape=[None], dtype=tf.float32),
                        'dropofflon': tf.placeholder(shape=[None], dtype=tf.float32),
                        'dropofflat': tf.placeholder(shape=[None], dtype=tf.float32),
                        
                       }
    features = receiver_tensors
    return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = receiver_tensors)

In [None]:
OUTDIR = "taxi_trained"

config = tf.estimator.RunConfig(
    model_dir=OUTDIR,
    tf_random_seed=1,
    save_summary_steps=10,
    save_checkpoints_steps=100,
)

In [None]:
model = tf.estimator.DNNRegressor(
            feature_columns=feature_cols,
            hidden_units=[10, 10],
            config = config
)

In [None]:
def my_rmse(labels, predictions):  
    pred_values = tf.squeeze(input=predictions["predictions"])
    return {
        "rmse": tf.metrics.root_mean_squared_error(labels=labels,predictions=pred_values)
    }

In [None]:
model = tf.contrib.estimator.add_metrics(estimator = model, metric_fn = my_rmse)  

In [None]:
train_spec = tf.estimator.TrainSpec(
    input_fn = lambda: train_input_fn('./taxi-train.csv'),
    max_steps = 500
)

In [None]:
exporter = tf.estimator.FinalExporter(
               name='exporter',
               serving_input_receiver_fn = serving_input_receiver_fn)

In [None]:
eval_spec = tf.estimator.EvalSpec(
                input_fn = lambda: eval_input_fn('./taxi-valid.csv'),
                steps=None,
                exporters=exporter,
                start_delay_secs=1,
                throttle_secs=1)

In [None]:
tf.logging.set_verbosity(tf.logging.INFO) 
shutil.rmtree(path = OUTDIR, ignore_errors = True)
tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file

tf.estimator.train_and_evaluate(estimator = model, 
                                train_spec = train_spec, 
                                eval_spec = eval_spec)

## Inspect Export Folder

Now in the output directory, in addition to the checkpoint files, you'll see a subfolder called 'export'. This contains one or models in the SavedModel format which is compatible with TF Serving. In the next notebook we will deploy the SavedModel behind a production grade REST API.

In [None]:
!ls -R taxi_trained/export

## Monitoring with TensorBoard 

[TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard) is a web UI that allows us to visualize various aspects of our model, including the training and evaluation loss curves. Although you won't see the loss curves yet, it is best to launch TensorBoard *before* you start training so that you may see them update during a long running training process.

To get Tensorboard to work within a Deep Learning VM or Colab, we need to create a tunnel connection to your local machine. To do this we'll set up a tunnel connection with `ngrok`. Using ngrok we'll then create a tunnel connection to our virtual machine's port 6006. We can view the Tensorboard results by following the link provided by ngrok after executing the following cell.

In [None]:
get_ipython().system_raw(
    "tensorboard --logdir {} --host 0.0.0.0 --port 6006 &"
    .format(OUTDIR) )

get_ipython().system_raw("../assets/ngrok http 6006 &")

In [27]:
!curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

http://c29b6cd1.ngrok.io


### Tensorboard cleanup

To close the tunnel connection to Tensorboard, we can find the PIDs for ngrok and Tensorboard and stop them. 

In [None]:
# this will kill the processes for Tensorboard
!ps aux | grep tensorboard | awk '{print $2}' | xargs kill

In [None]:
# this will kill the processes for ngrok
!ps aux | grep ngrok | awk '{print $2}' | xargs kill

## Challenge exercise

Modify your solution to the challenge exercise in d_csv_input.ipynb appropriately.

Copyright 2019 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License