In [1]:
import tensorflow as tf
import shutil
print(tf.__version__)

1.15.0


In [2]:
CSV_COLUMN_NAMES = ["fare_amount","dayofweek","hourofday","pickuplon","pickuplat","dropofflon","dropofflat"]
CSV_DEFAULTS = [[0.0],[1],[0],[-74.0], [40.0], [-74.0], [40.7]]
FEATURE_NAMES = CSV_COLUMN_NAMES[1:]
LABEL = CSV_COLUMN_NAMES[0]
feature_cols = [tf.feature_column.numeric_column(key=feature) for feature in FEATURE_NAMES]

In [3]:
def parse_row(row):
    fields = tf.decode_csv(records=row, record_defaults=CSV_DEFAULTS)
    features = {'dayofweek' : fields[1],'hourofday' : fields[2],'pickuplon' : fields[3],'pickuplat' : fields[4],'dropofflon' : fields[5],'dropofflat' : fields[6]}
    labels = fields[0]
    return features, labels

In [4]:
def read_dataset(csv_path):  
    dataset = tf.data.TextLineDataset(csv_path)
    dataset = dataset.skip(1).map(parse_row)
    return dataset

In [5]:
def train_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.shuffle(buffer_size=1000,seed=1).repeat(count=None).batch(batch_size)
    return dataset

In [6]:
def eval_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.batch(batch_size)
    return dataset

In [7]:
OUTDIR = "taxi_trained"

model = tf.estimator.DNNRegressor(
           feature_columns=feature_cols,
           hidden_units=[10,10],
           model_dir=OUTDIR,
           config=tf.estimator.RunConfig(tf_random_seed=1))

INFO:tensorflow:Using config: {'_experimental_distribute': None, '_num_ps_replicas': 0, '_master': '', '_model_dir': 'taxi_trained', '_num_worker_replicas': 1, '_keep_checkpoint_max': 5, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f716f392240>, '_log_step_count_steps': 100, '_save_checkpoints_secs': 600, '_service': None, '_eval_distribute': None, '_protocol': None, '_keep_checkpoint_every_n_hours': 10000, '_task_id': 0, '_train_distribute': None, '_task_type': 'worker', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_global_id_in_cluster': 0, '_is_chief': True, '_save_summary_steps': 100, '_tf_random_seed': 1, '_save_checkpoints_steps': None, '_evaluation_master': '', '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200}


In [8]:
model.train(input_fn = lambda: train_input_fn(csv_path='./taxi-train.csv', batch_size = 128),
    steps = 500
)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
INFO:tensorflow:Calling model_fn.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from taxi_trained/model.ckpt-500
Instructions for updating:
Use standard file utilities to get mtimes.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 500 into taxi_trained/model.ckpt.
INFO:tensorflow:loss = 14267.682, step = 500
INFO:tenso

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x7f716f392128>

In [9]:
metrics = model.evaluate(input_fn = lambda: eval_input_fn(csv_path='./taxi-valid.csv'))
print("RMSE on dataset = {}".format(metrics['average_loss']**0.5))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-11-22T14:04:43Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from taxi_trained/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-11-22-14:04:47
INFO:tensorflow:Saving dict for global step 1000: average_loss = 88.6005, global_step = 1000, label/mean = 11.229713, loss = 11322.361, prediction/mean = 11.484488
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: taxi_trained/model.ckpt-1000
RMSE on dataset = 9.412783967252205
