In [1]:
import tensorflow as tf
import shutil
print(tf.__version__)

1.15.0


# Data CSV

In [2]:
!gsutil cp gs://cloud-training-demos/taxifare/small/*.csv .
!ls -l *.csv

Copying gs://cloud-training-demos/taxifare/small/taxi-test.csv...
Copying gs://cloud-training-demos/taxifare/small/taxi-train.csv...              
Copying gs://cloud-training-demos/taxifare/small/taxi-valid.csv...              
/ [3 files][ 10.9 MiB/ 10.9 MiB]                                                
Operation completed over 3 objects/10.9 MiB.                                     
-rw-r--r-- 1 jupyter jupyter 1799474 Dec  5 14:09 taxi-test.csv
-rw-r--r-- 1 jupyter jupyter 7986353 Dec  5 14:09 taxi-train.csv
-rw-r--r-- 1 jupyter jupyter 1673742 Dec  5 14:09 taxi-valid.csv


# Train Process

In [3]:
# Column Name / Default
CSV_COLUMN_NAMES = ["fare_amount","dayofweek","hourofday","pickuplon","pickuplat","dropofflon","dropofflat"]
CSV_DEFAULTS = [[0.0],[1],[0],[-74.0], [40.0], [-74.0], [40.7]]

# Functions
def parse_row(row):
    fields = tf.decode_csv(records = row, record_defaults = CSV_DEFAULTS)
    features = dict(zip(CSV_COLUMN_NAMES, fields))
    label = features.pop("fare_amount")
    return features, label

def read_dataset(csv_path):
    dataset = tf.data.TextLineDataset(filenames = csv_path).skip(count = 1) # skip header
    dataset = dataset.map(map_func = parse_row)
    return dataset

def train_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.shuffle(buffer_size = 1000).repeat(count = None).batch(batch_size = batch_size)
    return dataset

def eval_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.batch(batch_size = batch_size)
    return dataset

# Feature Column
FEATURE_NAMES = CSV_COLUMN_NAMES[1:] # all but first column
feature_cols = [tf.feature_column.numeric_column(key = k) for k in FEATURE_NAMES]
print(feature_cols)

# Serving Function
def serving_input_receiver_fn():
    receiver_tensors = {
        'dayofweek' : tf.placeholder(dtype = tf.int32, shape = [None]),
        'hourofday' : tf.placeholder(dtype = tf.int32, shape = [None]),
        'pickuplon' : tf.placeholder(dtype = tf.float32, shape = [None]), 
        'pickuplat' : tf.placeholder(dtype = tf.float32, shape = [None]),
        'dropofflat' : tf.placeholder(dtype = tf.float32, shape = [None]),
        'dropofflon' : tf.placeholder(dtype = tf.float32, shape = [None]),
    }
    
    features = receiver_tensors 
    return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = receiver_tensors)

# Model Define
OUTDIR = "taxi_trained"

config = tf.estimator.RunConfig(
    model_dir = OUTDIR,
    tf_random_seed = 1,
    save_checkpoints_steps = 100
)
model = tf.estimator.DNNRegressor(
  hidden_units = [10,10], 
  feature_columns = feature_cols, 
  config = config
)

# RMSE Function Add
def my_rmse(labels, predictions):  
    pred_values = tf.squeeze(input = predictions["predictions"], axis = -1)
    return {
        "rmse": tf.metrics.root_mean_squared_error(labels = labels, predictions = pred_values)
    }
model = tf.contrib.estimator.add_metrics(estimator = model, metric_fn = my_rmse)  

# TrainSpec / Exportor / EvalSpec
train_spec = tf.estimator.TrainSpec(
    input_fn = lambda: train_input_fn("./taxi-train.csv"),
    max_steps = 500
)
exporter = tf.estimator.FinalExporter(name = "exporter", serving_input_receiver_fn = serving_input_receiver_fn) 
eval_spec = tf.estimator.EvalSpec(
    input_fn = lambda: eval_input_fn("./taxi-valid.csv"),
    steps = None,
    start_delay_secs = 1,
    throttle_secs = 1,
    exporters = exporter,
)

# train_and_evaluate
tf.estimator.train_and_evaluate(estimator = model, 
                                train_spec = train_spec, 
                                eval_spec = eval_spec)

[NumericColumn(key='dayofweek', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='hourofday', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='pickuplon', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='pickuplat', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='dropofflon', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='dropofflat', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]
INFO:tensorflow:Using config: {'_experimental_distribute': None, '_is_chief': True, '_train_distribute': None, '_experimental_max_worker_delay_secs': None, '_save_checkpoints_secs': None, '_evaluation_master': '', '_device_fn': None, '_model_dir': 'taxi_trained', '_num_worker_replicas': 1, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_opt

(None, None)

In [4]:
!ls -R taxi_trained/export

taxi_trained/export:
exporter

taxi_trained/export/exporter:
1575554041

taxi_trained/export/exporter/1575554041:
saved_model.pb	variables

taxi_trained/export/exporter/1575554041/variables:
variables.data-00000-of-00002  variables.data-00001-of-00002  variables.index


# Tensorboard

In [12]:
OUTDIR

'taxi_trained'

In [16]:
get_ipython().system_raw(
    "tensorboard --logdir {} --host 0.0.0.0 --port 6006 &"
    .format(OUTDIR)
)

get_ipython().system_raw("./assets/ngrok http 6006 &")

In [17]:
!curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

http://37195a34.ngrok.io


In [16]:
!python --version

Python 2.7.13


In [None]:
curl -s http://localhost:4040/api/tunnels | python3 -c     "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"