In [22]:
import tensorflow as tf
import pandas as pd
import pathlib

In [23]:
#Download data
train_data = tf.keras.utils.get_file('train.csv', 'https://storage.googleapis.com/tf-datasets/titanic/train.csv')
eval_data = tf.keras.utils.get_file('eval.csv', 'https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
train_data_path = pathlib.Path(train_data)
eval_data_path = pathlib.Path(eval_data)

There are two main steps to define estimator
1. Define Input function which should return Tensorflow Dataset
2. Create Feature column from Row Data

In [41]:
#Define Iuput pipeline
def make_input(file, mode='train', lbl='survived', batch_size=32):
  def input_function():
    data = pd.read_csv(file)
    if mode == 'train':
      label = data.pop(lbl)
      dataset = tf.data.Dataset.from_tensor_slices((dict(data), label))
      dataset = dataset.batch(batch_size)
    return dataset
  return input_function

In [25]:
#Define Training and evaluation input function
train_function = make_input(train_data_path)
eval_function = make_input(eval_data_path)

In [26]:
data = pd.read_csv(train_data_path)

In [27]:
#Define Numerical and Categorical columns
NUMERIC_COLUMNS = ['age', 'n_siblings_spouses', 'fare']
CATEGORICAL_COLUMNS = ['sex', 'parch', 'class', 'deck', 'embark_town', 'alone']

In [42]:
#Define Feature Columns
def define_faetures(data, NUMERIC_COLUMNS, CATEGORICAL_COLUMNS):
  feature_columns = []
  for feature_column in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_column))
  for feature_column in CATEGORICAL_COLUMNS:
    cat_col = tf.feature_column.categorical_column_with_vocabulary_list(feature_column, data[feature_column].unique())
    feature_columns.append(tf.feature_column.indicator_column(cat_col))
  return feature_columns

In [43]:
#Train and evaluate estimator
def train_and_eval_estimator(model_path):
  estimator = tf.estimator.DNNClassifier(model_dir=model_path,
                                        hidden_units=[64, 32], 
                                        feature_columns=define_faetures(data, NUMERIC_COLUMNS, CATEGORICAL_COLUMNS))
  train_spec = tf.estimator.TrainSpec(train_function)
  eval_spec = tf.estimator.EvalSpec(eval_function)
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [40]:
train_and_eval_estimator('titanic_model')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'titanic_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training a