In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import data
import shutil
import math
from datetime import datetime
from tensorflow.python.feature_column import feature_column

print(tf.__version__)

1.12.0


## Steps to use the TF Estimator (Train_And_Evaluate) APIs
1. Define dataset **metadata**
2. Define **data input function** to read the data from .tfrecord files + **feature processing**
3. Create TF **feature columns** based on metadata + **extended feature columns**
4. Define an **estimator** (LinearCombinedDNNRegressor) with the required **feature columns (wide/deep) & parameters**
7. Run an experiment using the estimator **train_and_evaluate** function to train, evaluate, and export the model
8. **Evaluate** the model using test data
9. Perform **predictions** & **serving** the exported model

In [2]:
MODEL_NAME = 'reg-model-06'

TRAIN_DATA_FILES_PATTERN = 'data/train-*.tfrecords'
VALID_DATA_FILES_PATTERN = 'data/valid-*.tfrecords'
TEST_DATA_FILES_PATTERN = 'data/test-*.tfrecords'

RESUME_TRAINING = False
PROCESS_FEATURES = True
EXTEND_FEATURE_COLUMNS = True
MULTI_THREADING = True

## 1. Define Dataset Metadata
* tf.example feature names and defaults
* Numeric and categorical feature names
* Target feature name
* Unused features

In [3]:
HEADER = ['key','x','y','alpha','beta','target']
HEADER_DEFAULTS = [[0], [0.0], [0.0], ['NA'], ['NA'], [0.0]]

NUMERIC_FEATURE_NAMES = ['x', 'y']  

CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY = {'alpha':['ax01', 'ax02'], 'beta':['bx01', 'bx02']}
CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY.keys())

FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES

TARGET_NAME = 'target'

UNUSED_FEATURE_NAMES = list(set(HEADER) - set(FEATURE_NAMES) - {TARGET_NAME})

print("Header: {}".format(HEADER))
print("Numeric Features: {}".format(NUMERIC_FEATURE_NAMES))
print("Categorical Features: {}".format(CATEGORICAL_FEATURE_NAMES))
print("Target: {}".format(TARGET_NAME))
print("Unused Features: {}".format(UNUSED_FEATURE_NAMES))

Header: ['key', 'x', 'y', 'alpha', 'beta', 'target']
Numeric Features: ['x', 'y']
Categorical Features: ['alpha', 'beta']
Target: target
Unused Features: ['key']


## 2. Define Data Input Function
* Input .tfrecords files name pattern
* Use TF Dataset APIs to read and process the data
* Parse tf.exmaples  to feature tensors
* Apply feature processing
* Return (features, target) tensors

### a. Parsing and preprocessing logic

In [4]:
def parse_tf_example(example_proto):

    feature_spec = {}

    for feature_name in NUMERIC_FEATURE_NAMES:
        feature_spec[feature_name] = tf.FixedLenFeature(shape=(1), dtype=tf.float32)
    
    for feature_name in CATEGORICAL_FEATURE_NAMES:
        feature_spec[feature_name] = tf.FixedLenFeature(shape=(1), dtype=tf.string)
    
    feature_spec[TARGET_NAME] = tf.FixedLenFeature(shape=(1), dtype=tf.float32)

    parsed_features = tf.parse_example(serialized=example_proto, features=feature_spec)
    
    target = parsed_features.pop(TARGET_NAME)
    
    return parsed_features, target


def process_features(features):
    
    # example of clipping
    features['x'] = tf.clip_by_value(features['x'], clip_value_min=-3, clip_value_max=3)
    features['y'] = tf.clip_by_value(features['y'], clip_value_min=-3, clip_value_max=3)
    
    # example of polynomial expansion
    features["x_2"] = tf.square(features['x'])
    features["y_2"] = tf.square(features['y'])
    
    # example of nonlinearity
    features["xy"] = features['x'] * features['y']
    
    # example of custom logic
    features['dist_xy'] =  tf.sqrt(tf.squared_difference(features['x'],features['y']))
    features["sin_x"] = tf.sin(features['x'])
    features["cos_y"] = tf.sin(features['y'])
    
    
    
    return features

### b. Data pipeline input function

In [5]:
def tfrecods_input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL, 
                 num_epochs=None, 
                 batch_size=200):
    
    shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False
    
    print("")
    print("* data input_fn:")
    print("================")
    print("Input file(s): {}".format(files_name_pattern))
    print("Batch size: {}".format(batch_size))
    print("Epoch Count: {}".format(num_epochs))
    print("Mode: {}".format(mode))
    print("Shuffle: {}".format(shuffle))
    print("================")
    print("")

    file_names = tf.matching_files(files_name_pattern)
    dataset = data.TFRecordDataset(filenames=file_names)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)
    
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda tf_example: parse_tf_example(tf_example))
    
    if PROCESS_FEATURES:
        dataset = dataset.map(lambda features, target: (process_features(features), target))
        
    dataset = dataset.repeat(num_epochs)
    iterator = dataset.make_one_shot_iterator()
    
    features, target = iterator.get_next()
    return features, target

In [6]:
features, target = tfrecods_input_fn(files_name_pattern="")
print("Feature read from TFRecords: {}".format(list(features.keys())))
print("Target read from TFRecords: {}".format(target))


* data input_fn:
Input file(s): 
Batch size: 200
Epoch Count: None
Mode: eval
Shuffle: False

Feature read from TFRecords: ['alpha', 'beta', 'x', 'y', 'x_2', 'y_2', 'xy', 'dist_xy', 'sin_x', 'cos_y']
Target read from TFRecords: Tensor("IteratorGetNext:10", shape=(?, 1), dtype=float32)


## 3. Define Feature Columns
The input numeric columns are assumed to be normalized (or have the same scale). Otherwise, a normlizer_fn, along with the normlisation params (mean, stdv or min, max) should be passed to tf.feature_column.numeric_column() constructor

In [7]:
def extend_feature_columns(feature_columns, hparams):
    
    num_buckets = hparams.num_buckets
    embedding_size = hparams.embedding_size

    buckets = np.linspace(-3, 3, num_buckets).tolist()

    alpha_X_beta = tf.feature_column.crossed_column(
            [feature_columns['alpha'], feature_columns['beta']], 4)

    x_bucketized = tf.feature_column.bucketized_column(
            feature_columns['x'], boundaries=buckets)

    y_bucketized = tf.feature_column.bucketized_column(
            feature_columns['y'], boundaries=buckets)

    x_bucketized_X_y_bucketized = tf.feature_column.crossed_column(
           [x_bucketized, y_bucketized], num_buckets**2)

    x_bucketized_X_y_bucketized_embedded = tf.feature_column.embedding_column(
            x_bucketized_X_y_bucketized, dimension=embedding_size)


    feature_columns['alpha_X_beta'] = alpha_X_beta
    feature_columns['x_bucketized_X_y_bucketized'] = x_bucketized_X_y_bucketized
    feature_columns['x_bucketized_X_y_bucketized_embedded'] = x_bucketized_X_y_bucketized_embedded
    
    return feature_columns
    

def get_feature_columns(hparams):
    
    CONSTRUCTED_NUMERIC_FEATURES_NAMES = ['x_2', 'y_2', 'xy', 'dist_xy', 'sin_x', 'cos_y']
    all_numeric_feature_names = NUMERIC_FEATURE_NAMES.copy() 
    
    if PROCESS_FEATURES:
        all_numeric_feature_names += CONSTRUCTED_NUMERIC_FEATURES_NAMES

    numeric_columns = {feature_name: tf.feature_column.numeric_column(feature_name)
                       for feature_name in all_numeric_feature_names}

    categorical_column_with_vocabulary = \
        {item[0]: tf.feature_column.categorical_column_with_vocabulary_list(item[0], item[1])
         for item in CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY.items()}
        
    feature_columns = {}

    if numeric_columns is not None:
        feature_columns.update(numeric_columns)

    if categorical_column_with_vocabulary is not None:
        feature_columns.update(categorical_column_with_vocabulary)
    
    if EXTEND_FEATURE_COLUMNS:
        feature_columns = extend_feature_columns(feature_columns, hparams)
        
    return feature_columns

feature_columns = get_feature_columns(tf.contrib.training.HParams(num_buckets=5,embedding_size=3))
print("Feature Columns: {}".format(feature_columns))

Feature Columns: {'x': _NumericColumn(key='x', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'y': _NumericColumn(key='y', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'x_2': _NumericColumn(key='x_2', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'y_2': _NumericColumn(key='y_2', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'xy': _NumericColumn(key='xy', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'dist_xy': _NumericColumn(key='dist_xy', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'sin_x': _NumericColumn(key='sin_x', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'cos_y': _NumericColumn(key='cos_y', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'alpha': _VocabularyListCategoricalColumn(key='alpha', vocabulary_list=('ax01', 'ax02'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 

## 4. Define an Estimator Creation Function

### a. Get wide and deep feature columns
* dense columns = numeric columns + embedding columns
* categorical columns = vocabolary list columns + bucketized columns
* sparse columns = hashed categorical columns + crossed columns
* categorical columns => indicator columns
* deep columns = dense columns + indicator columns
* wide columns = categorical columns + sparse columns

In [8]:
def get_wide_deep_columns():
    
    feature_columns = list(get_feature_columns(hparams).values())
    
    dense_columns = list(
        filter(lambda column: isinstance(column, feature_column._NumericColumn) |
                              isinstance(column, feature_column._EmbeddingColumn),
               feature_columns
        )
    )

    categorical_columns = list(
        filter(lambda column: isinstance(column, feature_column._VocabularyListCategoricalColumn) |
                              isinstance(column, feature_column._BucketizedColumn),
                   feature_columns)
    )
    
    sparse_columns = list(
        filter(lambda column: isinstance(column,feature_column._HashedCategoricalColumn) |
                              isinstance(column, feature_column._CrossedColumn),
               feature_columns)
    )

    indicator_columns = list(
            map(lambda column: tf.feature_column.indicator_column(column),
                categorical_columns)
    )
    
    deep_feature_columns = dense_columns + indicator_columns
    wide_feature_columns = categorical_columns + sparse_columns
    
    return wide_feature_columns, deep_feature_columns
    

### b. Define the DNNLinearCombinedRegressor

In [9]:
def create_estimator(run_config, hparams, print_desc=False):
    
    wide_feature_columns, deep_feature_columns = get_wide_deep_columns()
    
    estimator = tf.estimator.DNNLinearCombinedRegressor(
        
        dnn_feature_columns = deep_feature_columns,
        linear_feature_columns = wide_feature_columns,
        
        dnn_hidden_units= hparams.hidden_units,
        
        dnn_optimizer= tf.train.AdamOptimizer(),
        
        dnn_activation_fn= tf.nn.elu,
        dnn_dropout= hparams.dropout_prob,
        
        config= run_config
    )
    
    
    if print_desc:
        print("")
        print("*Estimator Type:")
        print("================")
        print(type(estimator))
        print("")
        print("*deep columns:")
        print("==============")
        print(deep_feature_columns)
        print("")
        print("wide columns:")
        print("=============")
        print(wide_feature_columns)
        print("")
    
    return estimator

## 5. Run Experiment

### a. Set HParam and RunConfig

In [10]:
TRAIN_SIZE = 12000
NUM_EPOCHS = 1000
BATCH_SIZE = 500
EVAL_AFTER_SEC = 15
TOTAL_STEPS = (TRAIN_SIZE/BATCH_SIZE)*NUM_EPOCHS

hparams  = tf.contrib.training.HParams(
    num_epochs = NUM_EPOCHS,
    batch_size = BATCH_SIZE,
    hidden_units=[16, 12, 8],
    num_buckets = 6,
    embedding_size = 3,
    max_steps = TOTAL_STEPS,
    dropout_prob = 0.001)

model_dir = 'trained_models/{}'.format(MODEL_NAME)

run_config = tf.estimator.RunConfig(
    tf_random_seed=19830610,
    model_dir=model_dir
)

print(hparams)
print("Model Directory:", run_config.model_dir)
print("")
print("Dataset Size:", TRAIN_SIZE)
print("Batch Size:", BATCH_SIZE)
print("Steps per Epoch:",TRAIN_SIZE/BATCH_SIZE)
print("Total Steps:", TOTAL_STEPS)
print("That is 1 evaluation step after each",EVAL_AFTER_SEC," training seconds")

[('batch_size', 500), ('dropout_prob', 0.001), ('embedding_size', 3), ('hidden_units', [16, 12, 8]), ('max_steps', 24000.0), ('num_buckets', 6), ('num_epochs', 1000)]
Model Directory: trained_models/reg-model-06

Dataset Size: 12000
Batch Size: 500
Steps per Epoch: 24.0
Total Steps: 24000.0
That is 1 evaluation step after each 15  training seconds


###  b. Define Serving Function

In [11]:
def csv_serving_input_fn():
    
    SERVING_HEADER = ['x','y','alpha','beta']
    SERVING_HEADER_DEFAULTS = [[0.0], [0.0], ['NA'], ['NA']]

    rows_string_tensor = tf.placeholder(dtype=tf.string,
                                         shape=[None],
                                         name='csv_rows')
    
    receiver_tensor = {'csv_rows': rows_string_tensor}

    row_columns = tf.expand_dims(rows_string_tensor, -1)
    columns = tf.decode_csv(row_columns, record_defaults=SERVING_HEADER_DEFAULTS)
    features = dict(zip(SERVING_HEADER, columns))
    
    if PROCESS_FEATURES:
        features = process_features(features)

    return tf.estimator.export.ServingInputReceiver(
        features, receiver_tensor)

### c. Define an Early Stopping Monitor (Hook)

In [12]:
class EarlyStoppingHook(tf.train.SessionRunHook):
    
    def __init__(self, early_stopping_rounds=1):
        self._best_loss = None
        self._early_stopping_rounds = early_stopping_rounds
        self._counter = 0
        
        print("")
        print("*** Early Stopping Hook: - Created")
        print("*** Early Stopping Hook:: Early Stopping Rounds: {}".format(self._early_stopping_rounds))
        print("")

    def before_run(self, run_context):
        
        graph = run_context.session.graph
        
#         tensor_name = "dnn/head/weighted_loss/Sum:0" #works!!
#         loss_tensor = graph.get_tensor_by_name(tensor_name)

        loss_tensor = graph.get_collection(tf.GraphKeys.LOSSES)[1]
        return tf.train.SessionRunArgs(loss_tensor)

    def after_run(self, run_context, run_values):
        
        last_loss = run_values.results
        
        print("")
        print("************************")
        print("** Evaluation Monitor - Early Stopping **")
        print("-----------------------------------------")
        print("Early Stopping Hook: Current loss: {}".format(str(last_loss)))
        print("Early Stopping Hook: Best loss: {}".format(str(self._best_loss)))

        if self._best_loss is None:
            self._best_loss = last_loss
            
        elif last_loss > self._best_loss:
            
            self._counter += 1
            print("Early Stopping Hook: No improvment! Counter: {}".format(self._counter))
            
            if self._counter == self._early_stopping_rounds:
                
                run_context.request_stop()
                print("Early Stopping Hook: Stop Requested: {}".format(run_context.stop_requested))
        else:
            
            self._best_loss = last_loss
            self._counter = 0
            
        print("************************")
        print("") 


### d. Define TrainSpec and EvaluSpec

In [13]:
train_spec = tf.estimator.TrainSpec(
    input_fn = lambda: tfrecods_input_fn(
        TRAIN_DATA_FILES_PATTERN,
        mode = tf.estimator.ModeKeys.TRAIN,
        num_epochs=hparams.num_epochs,
        batch_size=hparams.batch_size
    ),
    max_steps=hparams.max_steps,
    hooks=None
)

eval_spec = tf.estimator.EvalSpec(
    input_fn = lambda: tfrecods_input_fn(
        VALID_DATA_FILES_PATTERN,
        mode=tf.estimator.ModeKeys.EVAL,
        num_epochs=1,
        batch_size=hparams.batch_size
    ),
    exporters=[tf.estimator.LatestExporter(
        name="estimate",  # the name of the folder in which the model will be exported to under export
        serving_input_receiver_fn=csv_serving_input_fn,
        exports_to_keep=1,
        as_text=True)],
    steps=None,
    #hooks=[EarlyStoppingHook(15)],
    throttle_secs = EVAL_AFTER_SEC # evalute after each 15 training seconds!
)

### e. Run Experiment via train_and_evaluate

In [27]:
if not RESUME_TRAINING:
    print("Removing previous artifacts...")
    shutil.rmtree(model_dir, ignore_errors=True)
else:
    print("Resuming training...") 
import path
path.Path(model_dir +"/export/estimate").makedirs_p()
    
tf.logging.set_verbosity(tf.logging.INFO)

time_start = datetime.utcnow() 
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................") 

estimator = create_estimator(run_config, hparams, True)

tf.estimator.train_and_evaluate(
    estimator=estimator,
    train_spec=train_spec, 
    eval_spec=eval_spec
)

time_end = datetime.utcnow() 
print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))
    

Removing previous artifacts...
Experiment started at 08:44:35
.......................................
INFO:tensorflow:Using config: {'_model_dir': 'trained_models/reg-model-06', '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000000039B99320>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

*Estimator Type:
<class 'tensorflow.python.estimator.cann

INFO:tensorflow:loss = 43590.63, step = 3401 (0.315 sec)
INFO:tensorflow:global_step/sec: 304.878
INFO:tensorflow:loss = 44745.953, step = 3501 (0.328 sec)
INFO:tensorflow:global_step/sec: 295.858
INFO:tensorflow:loss = 41627.12, step = 3601 (0.338 sec)
INFO:tensorflow:global_step/sec: 320.513
INFO:tensorflow:loss = 45065.918, step = 3701 (0.312 sec)
INFO:tensorflow:global_step/sec: 325.733
INFO:tensorflow:loss = 45378.86, step = 3801 (0.307 sec)
INFO:tensorflow:global_step/sec: 319.489
INFO:tensorflow:loss = 44808.05, step = 3901 (0.313 sec)
INFO:tensorflow:global_step/sec: 316.456
INFO:tensorflow:loss = 43936.805, step = 4001 (0.316 sec)
INFO:tensorflow:global_step/sec: 319.489
INFO:tensorflow:loss = 41753.086, step = 4101 (0.314 sec)
INFO:tensorflow:global_step/sec: 319.489
INFO:tensorflow:loss = 42697.87, step = 4201 (0.313 sec)
INFO:tensorflow:global_step/sec: 325.733
INFO:tensorflow:loss = 45538.773, step = 4301 (0.306 sec)
INFO:tensorflow:global_step/sec: 321.544
INFO:tensorflow

INFO:tensorflow:loss = 44024.246, step = 11701 (0.313 sec)
INFO:tensorflow:global_step/sec: 318.472
INFO:tensorflow:loss = 40756.01, step = 11801 (0.313 sec)
INFO:tensorflow:global_step/sec: 323.625
INFO:tensorflow:loss = 48742.65, step = 11901 (0.309 sec)
INFO:tensorflow:global_step/sec: 323.624
INFO:tensorflow:loss = 38693.645, step = 12001 (0.309 sec)
INFO:tensorflow:global_step/sec: 320.513
INFO:tensorflow:loss = 35308.13, step = 12101 (0.313 sec)
INFO:tensorflow:global_step/sec: 325.733
INFO:tensorflow:loss = 47512.156, step = 12201 (0.307 sec)
INFO:tensorflow:global_step/sec: 317.46
INFO:tensorflow:loss = 37003.363, step = 12301 (0.314 sec)
INFO:tensorflow:global_step/sec: 322.58
INFO:tensorflow:loss = 48645.54, step = 12401 (0.310 sec)
INFO:tensorflow:global_step/sec: 324.675
INFO:tensorflow:loss = 42497.95, step = 12501 (0.308 sec)
INFO:tensorflow:global_step/sec: 325.733
INFO:tensorflow:loss = 46080.355, step = 12601 (0.307 sec)
INFO:tensorflow:global_step/sec: 319.489
INFO:te

INFO:tensorflow:global_step/sec: 325.733
INFO:tensorflow:loss = 38728.992, step = 20001 (0.307 sec)
INFO:tensorflow:global_step/sec: 321.543
INFO:tensorflow:loss = 44697.434, step = 20101 (0.311 sec)
INFO:tensorflow:global_step/sec: 325.733
INFO:tensorflow:loss = 42821.098, step = 20201 (0.307 sec)
INFO:tensorflow:global_step/sec: 279.33
INFO:tensorflow:loss = 38483.223, step = 20301 (0.358 sec)
INFO:tensorflow:global_step/sec: 295.858
INFO:tensorflow:loss = 38460.777, step = 20401 (0.338 sec)
INFO:tensorflow:global_step/sec: 315.458
INFO:tensorflow:loss = 38134.21, step = 20501 (0.317 sec)
INFO:tensorflow:global_step/sec: 321.543
INFO:tensorflow:loss = 43162.11, step = 20601 (0.312 sec)
INFO:tensorflow:global_step/sec: 320.513
INFO:tensorflow:loss = 48836.35, step = 20701 (0.312 sec)
INFO:tensorflow:global_step/sec: 320.513
INFO:tensorflow:loss = 48543.0, step = 20801 (0.311 sec)
INFO:tensorflow:global_step/sec: 319.489
INFO:tensorflow:loss = 53233.38, step = 20901 (0.313 sec)
INFO:te

## 6. Evaluate the Model

In [28]:
TRAIN_SIZE = 12000
VALID_SIZE = 3000
TEST_SIZE = 5000
train_input_fn = lambda: tfrecods_input_fn(files_name_pattern= TRAIN_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= TRAIN_SIZE)

valid_input_fn = lambda: tfrecods_input_fn(files_name_pattern= VALID_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= VALID_SIZE)

test_input_fn = lambda: tfrecods_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= TEST_SIZE)

estimator = create_estimator(run_config, hparams)

train_results = estimator.evaluate(input_fn=train_input_fn, steps=1)
train_rmse = round(math.sqrt(train_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Train RMSE: {} - {}".format(train_rmse, train_results))
print("############################################################################################")

valid_results = estimator.evaluate(input_fn=valid_input_fn, steps=1)
valid_rmse = round(math.sqrt(valid_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Valid RMSE: {} - {}".format(valid_rmse,valid_results))
print("############################################################################################")

test_results = estimator.evaluate(input_fn=test_input_fn, steps=1)
test_rmse = round(math.sqrt(test_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Test RMSE: {} - {}".format(test_rmse, test_results))
print("############################################################################################")

INFO:tensorflow:Using config: {'_model_dir': 'trained_models/reg-model-06', '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000003311A860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

* data input_fn:
Input file(s): data/train-*.tfrecords
Batch size: 12000
Epoch Count: None
Mode: eval
Shuffle: False

INFO:tensorflow:Calling model_fn.
INFO:te

## 7. Prediction

In [29]:
import itertools

predict_input_fn = lambda: tfrecods_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.PREDICT,
                                      batch_size= 5)

predictions = estimator.predict(input_fn=predict_input_fn)
values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5))))
print()
print("Predicted Values: {}".format(values))


* data input_fn:
Input file(s): data/test-*.tfrecords
Batch size: 5
Epoch Count: None
Mode: infer
Shuffle: False

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from trained_models/reg-model-06\model.ckpt-24000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.

Predicted Values: [50.57643, -3.1678717, 18.039553, 2.8187575, 2.5548038]


## Serving via the Saved Model

In [30]:
import os

export_dir = model_dir +"/export/estimate"

saved_model_dir = export_dir + "/" + os.listdir(path=export_dir)[-1] 

print(saved_model_dir)

predictor_fn = tf.contrib.predictor.from_saved_model(
    export_dir = saved_model_dir,
    signature_def_key="predict"
)

output = predictor_fn({'csv_rows': ["0.5,1,ax01,bx02", "-0.5,-1,ax02,bx02"]})
print(output)

trained_models/reg-model-06/export/estimate/1543049157
INFO:tensorflow:Restoring parameters from trained_models/reg-model-06/export/estimate/1543049157\variables\variables
{'predictions': array([[ 66.432556],
       [-13.668721]], dtype=float32)}


In [21]:
saved_model_dir

NameError: name 'saved_model_dir' is not defined

In [22]:
export_dir

'trained_models/reg-model-06/export/estimate'

In [24]:
 os.listdir(path=export_dir)

[]