# Custom Estimator

In [27]:
import shutil
import tensorflow as tf
print(tf.__version__)

1.8.0


#### Generate Toy Dataset
X1+X2 = Y

In [28]:
%%writefile data_train.csv
X1,X2,Y
2,3,5
1,3,4
3,-1,2
4,0,4
-2,2,0
2,2,4

Overwriting data_train.csv


In [29]:
%%writefile data_eval.csv
X1,X2,Y
3,2,5
3,1,4
-2,-1,-2

Overwriting data_eval.csv


#### Input Fn

In [30]:
def csv_input_fn(csv_path, batch_size,mode):
    def parse_csv(line):
      CSV_TYPES = [[0.0], [0.0],[0.0]]
      
      fields = tf.decode_csv(line, record_defaults=CSV_TYPES,field_delim=',')
      
      label = fields.pop(-1) #last value is label
      label = tf.expand_dims(label,-1) #to be consistent shape with predictions
      
      #combine features into single tensor
      features = tf.stack(fields,0)
      
      return features, label
    
    # Create a dataset containing the text lines.
    dataset = tf.data.TextLineDataset(csv_path).skip(1) #skip header

    # Parse each line.
    dataset = dataset.map(parse_csv)

    # Shuffle, repeat, and batch the examples.
    if(mode == tf.estimator.ModeKeys.TRAIN):
      dataset = dataset.shuffle(batch_size*10)
      dataset = dataset.repeat()
    
    dataset = dataset.batch(batch_size)

    return dataset

#### Custom Estimator

In [31]:
# Create the custom estimator
def custom_estimator(features, labels, mode, params):
  print('custom_estimator: features: {}'.format(features))
  print('custom_estimator: labels:{}'.format(labels))
  
  predictions = tf.layers.dense(features,1,activation=None)
  print('custom_estimator: predictions: {}'.format(predictions))
  
  # 2. Loss function, training/eval ops
  if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
    loss = tf.losses.mean_squared_error(labels, predictions)
    optimizer = tf.train.FtrlOptimizer(learning_rate=0.1)
    train_op = tf.contrib.layers.optimize_loss(
      loss = loss,
      global_step = tf.train.get_global_step(),
      learning_rate = 0.01,
      optimizer = optimizer)
    
    eval_metric_ops = {
      "rmse": tf.metrics.root_mean_squared_error(labels, predictions)
    }
  else:
    loss = None
    train_op = None
    eval_metric_ops = None
  
  predictions_dict = {'predictions':predictions,'features':features}
  
  return tf.estimator.EstimatorSpec(
    mode = mode,
    predictions = predictions_dict,
    loss = loss,
    train_op = train_op,
    eval_metric_ops = eval_metric_ops,
  )

In [32]:
# Create custom estimator's train and evaluate function
def train_and_evaluate(output_dir,args):
  estimator = tf.estimator.Estimator(model_fn=custom_estimator, 
                                     model_dir=output_dir)
  train_spec = tf.estimator.TrainSpec(input_fn= lambda:csv_input_fn(
                                        args['train_path'],
                                        args['batch_size'],
                                        tf.estimator.ModeKeys.TRAIN),
                                      max_steps = args['train_steps'])
  eval_spec = tf.estimator.EvalSpec(input_fn = lambda:csv_input_fn(
                                      args['eval_path'], 
                                      args['batch_size'],
                                      tf.estimator.ModeKeys.EVAL),
                                    steps = None)
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [33]:
args = {
  'train_path': 'data_train.csv',
  'eval_path': 'data_eval.csv',
  'batch_size': 4,
  'train_steps': 100,
}
OUTDIR = 'trained'
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
train_and_evaluate(OUTDIR,args)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f892c10cad0>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'trained', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
custom_estimator: features: Tensor("IteratorGetNext:0", shape=(?, 2), dtype=float32)
custom_estimator:

#### Inspect Weights

The tensors named dense/kernel and dense/bias are the weights and bias for the model

In [34]:
from tensorflow.python.tools import inspect_checkpoint
inspect_checkpoint.print_tensors_in_checkpoint_file("custom_estimator_trained_model/model.ckpt-100", tensor_name='dense/kernel', all_tensors=False)
inspect_checkpoint.print_tensors_in_checkpoint_file("custom_estimator_trained_model/model.ckpt-100", tensor_name='dense/bias', all_tensors=False)

tensor_name:  dense/kernel
[[0.37332565]
 [1.1088971 ]]
tensor_name:  dense/bias
[0.94920313]


#### Get predictions

In [35]:
#load checkpoint
estimator = tf.estimator.Estimator(model_fn=custom_estimator, 
                                     model_dir=OUTDIR) 

predictions = estimator.predict(
  input_fn = lambda:csv_input_fn(
                  args['eval_path'], 
                  args['batch_size'],
                  tf.estimator.ModeKeys.EVAL),
  yield_single_examples=False
  )
print(predictions.next())

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8949ceaf10>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'trained', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Calling model_fn.
custom_estimator: features: Tensor("IteratorGetNext:0", shape=(?, 2), dtype=float32)
custom_estimator: labels:None
custom_estimator: predictions: Tensor("dense/BiasAdd:0", shape=(?, 1), dtype=float32)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters f